From: ian Date: Wed, 25 Jan 2006 22:46:37 +0000 (+0000) Subject: mintstamp => lexminval; much work, it sort of compiles now but more needs writing X-Git-Tag: debian/1.1.1~78 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?p=chiark-tcl.git;a=commitdiff_plain;h=ca258c32ca9f43fa6ddb5642860f310c8b506f2c mintstamp => lexminval; much work, it sort of compiles now but more needs writing --- diff --git a/cdb/cdb.tct b/cdb/cdb.tct index 017b4e6..e33607b 100644 --- a/cdb/cdb.tct +++ b/cdb/cdb.tct @@ -58,23 +58,25 @@ Table cdbwr Cdbwr_SubCommand open 0 pathb string on_info obj - ?on_mintstamp obj - # on_mintstamp present and not empty list: provides a - # script which returns the current mintstamp. This - # means entries start with a 16-hex-digit signed - # integer known as a tstamp; occasionally, - # on_mintstamp will be called and then entries whose - # tstamp is < mintstamp will be deleted automatically. + ?on_lexminval obj + # on_lexminval present and not empty list: provides a + # script which returns the current lexminval. In + # this case, occasionally, + # on_lexminval will be called and then entries whose + # value is lexically strictly less than lexminval + # will be deleted automatically. The comparison + # is bytewise on the UTF-8 representations. => iddata(&cdbtcl_rwdatabases) open-okjunk RWSCF_OKJUNK pathb string on_info obj - ?on_mintstamp obj + ?on_lexminval obj => iddata(&cdbtcl_rwdatabases) # on_info ...: # on_info open-clean # on_info open-dirty-start - # on_info open-dirty-junk + # on_info open-dirty-junk \ + # # on_info open-dirty-done # on_info compact-start # on_info compact-done diff --git a/cdb/writeable.c b/cdb/writeable.c index f44e881..4230e62 100644 --- a/cdb/writeable.c +++ b/cdb/writeable.c @@ -19,6 +19,8 @@ static void maybe_close(int fd) { rc= cht_posixerr(ip, errno, "failed to " m); goto x_rc; \ }while(0) +/*==================== Subsystems and subtypes ====================*/ + /*---------- Pathbuf ----------*/ typedef struct Pathbuf { @@ -93,11 +95,11 @@ static void ht_maybeupdate(HashTable *ht, const char *key, } static int ht_forall(HashTable *ht, - int (*fn)(const char *key, const HashValue *val, - Tcl_HashEntry *he, struct ht_forall_ctx *ctx), + int (*fn)(const char *key, HashValue *val, + struct ht_forall_ctx *ctx), struct ht_forall_ctx *ctx) { /* Returns first positive value returned by any call to fn, or 0. */ - Tcl_HashSearch sp;n + Tcl_HashSearch sp; Tcl_HashEntry *he; const char *key; HashValue *val; @@ -124,14 +126,17 @@ static void ht_destroy(HashTable *ht) { for (he= Tcl_FirstHashEntry(&ht->t, &sp); he; he= Tcl_NextHashEntry(&sp)) { + /* ht_forall skips empty (deleted) entries so is no good for this */ TFREE(Tcl_GetHashValue(he)); } Tcl_DeleteHashTable(&ht->t); } +/*==================== Existential ====================*/ + /*---------- Rw data structure ----------*/ -typedef struct { +typedef struct Rw { int ix, autocompact; int cdb_fd, lock_fd; struct cdb cdb; /* valid iff cdb_fd >= 0 */ @@ -140,7 +145,7 @@ typedef struct { HashTable logincore; Pathbuf pbsome, pbother; off_t mainsz; - ScriptToInvoke on_info, on_mintstamp; + ScriptToInvoke on_info, on_lexminval; } Rw; static int rw_close(Tcl_Interp *ip, Rw *rw) { @@ -200,6 +205,8 @@ static int acquire_lock(Tcl_Interp *ip, Pathbuf *pb, int *lockfd_r) { else return cht_posixerr(ip, errno, "unexpected error from fcntl while" " acquiring lock"); } + + return TCL_OK; } /*---------- Log reading ----------*/ @@ -227,13 +234,17 @@ static int readlognum(FILE *f, int delim, int *num_r) { } static int readstorelogrecord(FILE *f, HashTable *ht, + int (*omitfn)(const HashValue*, + struct ht_forall_ctx *ctx), + struct ht_forall_ctx *ctx, void (*updatefn)(HashTable*, const char*, HashValue*)) { /* returns: - * 0 for OK - * -1 eof - * -2 corrupt or error - * -3 got newline indicating end + * 0 for OK + * -1 eof + * -2 corrupt or error + * -3 got newline indicating end + * >0 value from omitfn */ int keylen, vallen; char *key; @@ -262,9 +273,12 @@ static int readstorelogrecord(FILE *f, HashTable *ht, r= fread(htv_fillptr(val), 1,vallen, f); if (r!=vallen) goto x2_free_keyval; - updatefn(ht, key, val); + rc= omitfn ? omitfn(val, ctx) : TCL_OK; + if (rc) { assert(rc>0); TFREE(val); } + else updatefn(ht, key, val); + TFREE(key); - return TCL_OK; + return rc; x2_free_keyval: TFREE(val); @@ -309,7 +323,17 @@ int cht_do_cdbwr_create_empty(ClientData cd, Tcl_Interp *ip, static int infocbv3(Tcl_Interp *ip, Rw *rw, const char *arg1, const char *arg2fmt, const char *arg3, va_list al) { - abort(); + Tcl_Obj *aa[3]; + int na; + char buf[200]; + vsnprintf(buf, sizeof(buf), arg2fmt, al); + + na= 0; + aa[na++]= cht_ret_string(ip, arg1); + aa[na++]= cht_ret_string(ip, buf); + if (arg3) aa[na++]= cht_ret_string(ip, arg3); + + return cht_scriptinv_invoke_fg(&rw->on_info, na, aa); } static int infocb3(Tcl_Interp *ip, Rw *rw, const char *arg1, @@ -335,7 +359,7 @@ static int infocb(Tcl_Interp *ip, Rw *rw, const char *arg1, /*---------- Opening ----------*/ int cht_do_cdbwr_open(ClientData cd, Tcl_Interp *ip, const char *pathb, - Tcl_Obj *on_info, Tcl_Obj *on_mintstamp, + Tcl_Obj *on_info, Tcl_Obj *on_lexminval, void **result) { const Cdbwr_SubCommand *subcmd= cd; int r, rc, mainfd=-1; @@ -345,19 +369,18 @@ int cht_do_cdbwr_open(ClientData cd, Tcl_Interp *ip, const char *pathb, rw= TALLOC(sizeof(*rw)); ht_setup(&rw->logincore); - scriptinv_init(&rw->on_info); - scriptinv_init(&rw->on_mintstamp); + cht_scriptinv_init(&rw->on_info); + cht_scriptinv_init(&rw->on_lexminval); rw->cdb_fd= rw->lock_fd= -1; rw->logfile= 0; - rw->maxage= maxage; pathbuf_init(&rw->pbsome, pathb); pathbuf_init(&rw->pbother, pathb); rw->autocompact= 1; - if (on_mintstamp) { - rc= scriptinv_set(&rw->on_mintstamp, ip, on_mintstamp, 0); + if (on_lexminval) { + rc= cht_scriptinv_set(&rw->on_lexminval, ip, on_lexminval, 0); if (rc) goto x_rc; } else { - rw->on_mintstamp.llength= 0; + rw->on_lexminval.llength= 0; } mainfd= open(pathbuf_sfx(&rw->pbsome,".main"), O_RDONLY); @@ -401,7 +424,7 @@ int cht_do_cdbwr_open(ClientData cd, Tcl_Interp *ip, const char *pathb, for (;;) { logrecstart= ftello(rw->logfile); if (logrecstart < 0) PE("ftello .log during (dirty) open"); - r= readstorelogrecord(rw->logfile, &rw->logincore, ht_update); + r= readstorelogrecord(rw->logfile, &rw->logincore, 0,0, ht_update); if (ferror(rw->logfile)) { rc= cht_posixerr(ip, errno, "error reading .log during (dirty) open"); goto x_rc; @@ -451,9 +474,61 @@ int cht_do_cdbwr_open(ClientData cd, Tcl_Interp *ip, const char *pathb, return rc; } -/*---------- Compacting ----------*/ +/*==================== COMPACTING ====================*/ + +struct ht_forall_ctx { + struct cdb_make cdbm; + FILE *mainfile; + int lexminvall; + long *reccount; + const char *lexminval; +}; + +/*---------- helper functions ----------*/ -static int compact_core(Tcl_Interp *ip, Rw *rw, unsigned long logsize) { +static int expiredp(const HashValue *val, struct ht_forall_ctx *a) { + int r, l; + if (!val->len) return 0; + l= val->len < a->lexminvall ? val->len : a->lexminvall; + r= memcmp(val->data, a->lexminval, l); + if (r>0) return 0; + if (r<0) return 1; + return val->len < a->lexminvall; +} + +static int delete_ifexpired(const char *key, HashValue *val, + struct ht_forall_ctx *a) { + if (!expiredp(val, a)) return 0; + val->len= 0; + /* we don't actually need to realloc it to free the memory because + * this will shortly all be deleted as part of the compaction */ + return 0; +} + +static int addto_cdb(const char *key, HashValue *val, + struct ht_forall_ctx *a) { + return cdb_make_add(&a->cdbm, key, strlen(key), val->data, val->len); +} + +static int addto_main(const char *key, HashValue *val, + struct ht_forall_ctx *a) { + int r; + + (*a->reccount)++; + + r= fprintf(a->mainfile, "+%d,%d:%s->", strlen(key), val->len, key); + if (r<0) return -1; + + r= fwrite(val->data, 1, val->len, a->mainfile); + if (r != val->len) return -1; + + return 0; +} + +/*---------- compact main entrypoint ----------*/ + +static int compact_core(Tcl_Interp *ip, Rw *rw, unsigned long logsize, + long *reccount_r) { /* creates new .cdb and .main * closes logfile * leaves .log with old data @@ -462,18 +537,16 @@ static int compact_core(Tcl_Interp *ip, Rw *rw, unsigned long logsize) { */ int r, rc; int cdbfd, cdbmaking; - off_t errpos; + off_t errpos, newmainsz; char buf[100]; - - struct ht_forall_ctx { - struct cdb_make cdbm; - FILE *mainfile; - int count; - } a; + Tcl_Obj *res; + struct ht_forall_ctx a; a.mainfile= 0; cdbfd= -1; cdbmaking= 0; + *reccount_r= 0; + a.reccount= reccount_r; r= fclose(rw->logfile); if (r) { rc= cht_posixerr(ip, errno, "probable data loss! failed to fclose" @@ -484,11 +557,21 @@ static int compact_core(Tcl_Interp *ip, Rw *rw, unsigned long logsize) { logsize, (unsigned long)rw->mainsz); if (rc) goto x_rc; - if (rw->on_mintstamp.llength) { - rc= cht_scriptinv_invoke_fg(&rw->on_mintstamp, 0,0); + if (rw->on_lexminval.llength) { + rc= cht_scriptinv_invoke_fg(&rw->on_lexminval, 0,0); if (rc) goto x_rc; - Tcl_GetObjResult(ip) fixme do something here + res= Tcl_GetObjResult(ip); assert(res); + a.lexminval= Tcl_GetStringFromObj(res, &a.lexminvall); + assert(a.lexminval); + + /* we rely not calling Tcl_Eval during the actual compaction; + * if we did Tcl_Eval then the interp result would be trashed. + */ + rc= ht_forall(&rw->logincore, delete_ifexpired, &a); + + } else { + a.lexminval= ""; } /* merge unsuperseded records from main into hash table */ @@ -497,7 +580,9 @@ static int compact_core(Tcl_Interp *ip, Rw *rw, unsigned long logsize) { if (!a.mainfile) PE("failed to open .main for reading during compact"); for (;;) { - r= readstorelogrecord(a.mainfile, &rw->logincore, ht_maybeupdate); + r= readstorelogrecord(a.mainfile, &rw->logincore, + expiredp, &a, + ht_maybeupdate); if (ferror(a.mainfile)) { rc= cht_posixerr(ip, errno, "error reading" " .main during compact"); goto x_rc; } @@ -530,10 +615,10 @@ static int compact_core(Tcl_Interp *ip, Rw *rw, unsigned long logsize) { if (r) PE("cdb_make_start during compact"); cdbmaking= 1; - r= ht_forall(&rw->logincore, addto_cdb, &addctx); + r= ht_forall(&rw->logincore, addto_cdb, &a); if (r) PE("cdb_make_add during compact"); - r= cdb_make_finish(&a.cdbm, cdbfd); + r= cdb_make_finish(&a.cdbm); if(r) PE("cdb_make_finish during compact"); cdbmaking= 0; @@ -549,14 +634,16 @@ static int compact_core(Tcl_Interp *ip, Rw *rw, unsigned long logsize) { a.mainfile= fopen(pathbuf_sfx(&rw->pbsome,".tmp"), "w"); if (!a.mainfile) PE("create .tmp for new main during compact"); - a.count= 0; - r= ht_forall(&rw->logincore, addto_main, a.mainfile); + r= ht_forall(&rw->logincore, addto_main, &a); if (r) { rc= cht_posixerr(ip, r, "error writing to new .main" " during compact"); goto x_rc; } r= fflush(a.mainfile); if (r) PE("fflush new main during compact"); r= fdatasync(fileno(a.mainfile)); if (r) PE("fdatasync new main during compact"); + + newmainsz= ftello(a.mainfile); + if (newmainsz<0) PE("ftello new main during compact"); r= fclose(a.mainfile); if (r) PE("fclose new main during compact"); a.mainfile= 0; @@ -564,53 +651,69 @@ static int compact_core(Tcl_Interp *ip, Rw *rw, unsigned long logsize) { r= rename(rw->pbsome.buf, pathbuf_sfx(&rw->pbother,".main")); if (r) PE("install new .main during compact"); + rw->mainsz= newmainsz; + /* done! */ - rc= infocb(ip, rw, "compact-end", "log=%luby main=%luby", - logsize, (unsigned long)rw->mainsz); + rc= infocb(ip, rw, "compact-end", "log=%luby main=%luby nrecs=%l", + logsize, (unsigned long)rw->mainsz, *a.reccount); if (rc) goto x_rc; rc= TCL_OK; x_rc: - if (mainfile) fclose(mainfile); - if (cdbmaking) cdb_make_finish(&a.cdbm, cdbfd); + if (a.mainfile) fclose(a.mainfile); + if (cdbmaking) cdb_make_finish(&a.cdbm); maybe_close(cdbfd); remove(pathbuf_sfx(&rw->pbsome,".tmp")); /* for tidyness */ } - -static void compact_forclose(Tcl_Interp *ip, Rw *rw) { + +/*---------- Closing ----------*/ + +static int compact_forclose(Tcl_Interp *ip, Rw *rw, long *reccount_r) { off_t logsz; - int rc; + int r, rc; logsz= ftello(rw->logfile); if (logsz < 0) PE("ftello logfile (during tidy close)"); - rc= compact_core(ip, rw, logsz); if (rc) goto x_rc; + rc= compact_core(ip, rw, logsz, reccount_r); if (rc) goto x_rc; r= remove(pathbuf_sfx(&rw->pbsome,".log")); if (r) PE("remove .log (during tidy close)"); + + return TCL_OK; + +x_rc: return rc; } int cht_do_cdbwr_close(ClientData cd, Tcl_Interp *ip, void *rw_v) { Rw *rw= rw_v; - int rc, compact_rc, infocb_rc; - - if (rw->autocompact) compact_rc= compact_forclose(ip, rw); - else compact_rc= TCL_OK; + int rc, rc_close; + long reccount= -1; + off_t logsz; - rc= rw_close(ip,rw); - infocb_rc= infocb_close(rw); + if (rw->autocompact) rc= compact_forclose(ip, rw, &reccount); + else rc= TCL_OK; + + if (!rc) { + if (!rw->logfile) { + logsz= ftello(rw->logfile); + if (logsz < 0) + rc= cht_posixerr(ip, errno, "ftell logfile during close info"); + else + rc= infocb(ip, rw, "close", "main=%luby log=%luby", + rw->mainsz, logsz); + } else if (reccount>=0) { + rc= infocb(ip, rw, "close", "main=%luby nrecs=%l", rw->mainsz, reccount); + } else { + rc= infocb(ip, rw, "close", "main=%luby", rw->mainsz); + } + } + rc_close= rw_close(ip,rw); + if (rc_close) rc= rc_close; cht_tabledataid_disposing(ip, rw_v, &cdbtcl_rwdatabases); - if (!rc) rc= compact_rc; - if (!rc) rc= infocb_rc; return rc; } - -int cht_do_cdbwr_lookup(ClientData cd, Tcl_Interp *ip, void *db, Tcl_Obj *key, Tcl_Obj **result); -int cht_do_cdbwr_lookup_hb(ClientData cd, Tcl_Interp *ip, void *db, HBytes_Value key, HBytes_Value *result); -int cht_do_cdbwr_update(ClientData cd, Tcl_Interp *ip, void *db, Tcl_Obj *key, Tcl_Obj *value); -int cht_do_cdbwr_update_hb(ClientData cd, Tcl_Interp *ip, void *db, HBytes_Value key, HBytes_Value value); -int cht_do_cdbwr_update_quick(ClientData cd, Tcl_Interp *ip, void *db, Tcl_Obj *key, Tcl_Obj *value); -int cht_do_cdbwr_update_quick_hb(ClientData cd, Tcl_Interp *ip, void *db, HBytes_Value key, HBytes_Value value); +/*==================== Other entrypoints ====================*/