From: ian Date: Mon, 16 Jan 2006 00:25:39 +0000 (+0000) Subject: much work on hash tables etc. X-Git-Tag: debian/1.1.1~85 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?p=chiark-tcl.git;a=commitdiff_plain;h=6c2630597a21671a6b44954cb829b98dd84a22ee much work on hash tables etc. --- diff --git a/cdb/cdb.tct b/cdb/cdb.tct index fca6ecb..0999af9 100644 --- a/cdb/cdb.tct +++ b/cdb/cdb.tct @@ -22,7 +22,7 @@ Table cdb Cdb_SubCommand db iddata(&cdbtcl_databases) Table cdbwr Cdbwr_SubCommand - create-empty -1 + create-empty 0 pathb string # files: # .main @@ -35,29 +35,51 @@ Table cdbwr Cdbwr_SubCommand # which is locked with fcntl by open # .main is a cdb native text file # and always exists - # if .tmp exists it is irrelevant - # if .cdb exists it is a cdb database - # containing exactly the same as .main + # .cdb is a cdb database containing data + # equivalent to and at least as recent as .main + # (maybe not identical, because .cdb may + # have been updated with data from .log but + # .main not yet); if .log does not exist then + # they are identical) + # .cdb may not exist; in which case it is to + # be treated as if it existed and was empty + # but this is maximally early (so main must + # exist and be empty since .main is never + # newer than .cdb) # if .log exists, it is a cdb native # text file _without the trailing newline_; # its contents override values from .main or .cdb - open -1 + # if .main.tmp or .cdb.tmp exists it is irrelevant + # zero length values mean record is deleted (in .log only; + # forbidden elsewhere) + # while db is open: + # .lock is locked + # .log and open hash table contain same info + open 0 + pathb string + on_info obj + => iddata(&cdbtcl_rwdatabases) + open-okjunk RWSCF_OKJUNK pathb string on_info obj => iddata(&cdbtcl_rwdatabases) # on_info ...: # on_info open-clean - # on_info open-dirty + # on_info open-dirty-start + # on_info open-dirty-junk + # on_info open-dirty-done # on_info compact-start - # on_info compact-end + # on_info compact-done # on_info close - lookup 1 + lookup 0 db iddata(&cdbtcl_rwdatabases) key obj + ?def obj => obj - lookup-hb 1 + lookup-hb 0 db iddata(&cdbtcl_rwdatabases) key hb + ?def hb => hb update 0 db iddata(&cdbtcl_rwdatabases) @@ -67,18 +89,16 @@ Table cdbwr Cdbwr_SubCommand db iddata(&cdbtcl_rwdatabases) key hb value hb - update-quick 1 + compact-force 0 db iddata(&cdbtcl_rwdatabases) - key obj - value obj - update-quick-hb 1 + compact-check 0 db iddata(&cdbtcl_rwdatabases) - key hb - value hb - close 0 + compact-onupdate 0 # this is the default db iddata(&cdbtcl_rwdatabases) - close-quick 1 + compact-explicit 0 + db iddata(&cdbtcl_rwdatabases) + close 0 db iddata(&cdbtcl_rwdatabases) EntryExtra Cdbwr_SubCommand - int quick; + unsigned flags; diff --git a/cdb/chiark_tcl_cdb.h b/cdb/chiark_tcl_cdb.h index db2d3d1..ff5424b 100644 --- a/cdb/chiark_tcl_cdb.h +++ b/cdb/chiark_tcl_cdb.h @@ -4,9 +4,17 @@ #ifndef CHIARK_TCL_CDB_H #define CHIARK_TCL_CDB_H +#include +#include +#include + +#include + #include "hbytes.h" #include "cdb+tcmdif.h" +#define RWSCF_OKJUNK 002 + extern const IdDataSpec cdbtcl_databases, cdbtcl_rwdatabases; #endif /*CHIARK_TCL_CDB_H*/ diff --git a/cdb/writeable.c b/cdb/writeable.c index 15afdf5..15ffed2 100644 --- a/cdb/writeable.c +++ b/cdb/writeable.c @@ -1,7 +1,23 @@ /**/ +#include + #include "chiark_tcl_cdb.h" + + +struct ht_forall_ctx; + + + +static void maybe_close(int fd) { + if (fd>=0) close(fd); +} + +#define PE do{ \ + rc= cht_posixerr(ip, errno, "failed to " PE); goto x_rc; \ + }while(0) + /*---------- Pathbuf ----------*/ typedef struct { @@ -27,39 +43,516 @@ static void pathbuf_free(Pathbuf *pb) { pb->buf= 0; } +/*---------- Our hash table ----------*/ + +typedef struct HashTable { + Tcl_HashTable t; + Byte confound[16]; +} HashTable; + +typedef struct HashDatum { + int len; + Byte data[1]; +} HashDatum; + +static unsigned int hkt_hash(Tcl_HashTable *tht, void *key_v) { + HashTable *ht= (void*)tht; + const HashDatum *key= key_v; + + struct md4_ctx mdc; + Byte mdr[MD4_DIGEST_SIZE]; + + md4_init(&mdc); + md4_update(&mdc, sizeof(ht->confound), ht->confound); + md4_update(&mdc, key->len, key->data); + md4_digest(&mdc, sizeof(mdr), mdr); + assert(sizeof(int) <= MD4_DIGEST_SIZE); + return *(int*)md4; +} + +static const struct Tcl_HashKeyType ht_keytype= { + TCL_HASH_KEY_TYPE_VERSION, 0, + htk_hash, htk_compare, 0, 0 +} + +static HashDatum *htd_prep(int len) { + HashDatum *hd; + hd= TALLOC(offsetof(HashDatum, data) + len); + hd->len= len; +} +static Byte *htd_fillptr(HashDatum *hd) { + return hd->data; +} +static void htd_fill(HashDatum *hd, const Byte *data) { + memcpy(hd->data, data, hd->len); +} + +static int ht_setup(HashTable *ht) { + rc= cht_get_urandom(ip, ht->confound, sizeof(ht->confound)); + if (rc) return rc; + + Tcl_InitCustomHashTable(&ht->t, TCL_CUSTOM_PTR_KEYS, &ht_keytype); + return TCL_OK; +} +static void ht_update(HashTable *ht, HashDatum *key_eat, HashDatum *val_eat) { + Tcl_HashEntry *he; + int new; + + he= Tcl_CreateHashEntry(&ht->t, key_eat, &new); + /* eats the key! (since the data structure owns the memory) */ + if (!new) TFREE(Tcl_GetHashValue(he)); + Tcl_SetHashValue(he, val_eat); +} + +static int ht_forall(HashTable *ht, + int (*fn)(const HashDatum *key, const HashDatum *val, + struct ht_forall_ctx *ctx), + struct ht_forall_ctx *ctx) { + /* Returns first nonzero value returned by any call to fn, or 0. */ + Tcl_HashSearch sp; + Tcl_HashEntry *he; + HashDatum *key, *val; + + for (he= Tcl_FirstHashEntry(&ht->t, &sp); + he; + he= Tcl_NextHashEntry(&ht->t, &sp)) { + val= Tcl_GetHashValue(he); + if (!val->len) continue; + + key= Tcl_GetHashKey(&ht->t, he); + + r= fn(key, val, ctx); + if (r) return r; + } + return 0; +} + /*---------- Rw data structure ----------*/ typedef struct { - Pathbuf pbsome, pbtmp; + int ix, autocompact; + int cdb_fd, lock_fd; + struct cdb cdb; /* valid iff cdb_fd >= 0 */ + off_t cdb_bytes; /* valid iff cdb_fd >= 0 */ + FILE *logfile; + HashTable logincore; + Pathbuf pbsome, pbother; + off_t mainsz; } Rw; static void destroy_cdbrw_idtabcb(Tcl_Interp *ip, void *val) { abort(); } +int rw_close(Interp *ip, Rw *rw) { + int rc, r; + + rc= TCL_OK; + ht_destroy(rw); + maybe_close(rw->cdb_fd); + maybe_close(rw->lock_fd); + + if (rw->logfile) { + r= fclose(rw->logfile); + if (r && ip) { rc= cht_posixerr(ip, errno, "data loss! failed to" + " fclose logfile during untidy close"); } + } + + pathbuf_free(&rw->pbsome); pathbuf_free(&rw->pbother); + TFREE(rw); + return rc; +}; +void destroy_cdbrw_idtabcb(Interp *ip, Rw *rw) { rw_close(0,rw); } + const IdDataSpec cdbtcl_rwdatabases= { "cdb-rwdb", "cdb-openrwdatabases-table", destroy_cdbrw_idtabcb }; +/*---------- File handling ----------*/ + +int acquire_lock(Tcl_Interp *ip, PathBuf *pb, int *lockfd_r) { + /* *lockfd_r must be -1 on entry. If may be set to >=0 even + * on error, and must be closed by the caller. */ + mode_t umask, lockmode; + struct flock fl; + + umask= umask(~(mode_t)0); + umask(umask); + + lockmode= 0666 & ~((umask & 0444)>>1); + /* Remove r where umask would remove w; + * eg umask intending 0664 here gives 0660 */ + + *lockfd_r= open(pathbuf_sfx(".lock"), O_RDONLY|O_CREAT, lockmode); + if (*lockfd_r < 0) + return cht_posixerr(ip, errno, "could not open/create lockfile"); + + fl.l_type= F_WRLCK; + fl.l_whence= SEEK_SET; + fl.l_start= 0; + fl.l_end= 0; + fl.l_pid= getpid(); + + r= fcntl(*lockfd_r, F_SETLK, &fl); + if (r == -1) { + if (errno == EACCES || errno == EAGAIN) + return cht_staticerr(ip, "lock held by another process", "CDB LOCKED"); + else return cht_posixerr(ip, errno, "unexpected error from fcntl while" + " acquiring lock"); + } +} + +/*---------- Log reading ----------*/ + +static int readlognum(FILE *f, int delim, int *num_r) { + int c; + char numbuf[20], *p, *ep; + unsigned long ul; + + p= numbuf; + for (;;) { + c= getc(f); if (c==EOF) return -2; + if (c == delim) break; + if (!isdigit((unsigned char)c)) return -2; + *p++= c; + if (p == numbuf+sizeof(numbuf)) return -2; + } + if (p == numbuf) return -2; + *p= 0; + + errno=0; ul= strtoul(numbuf, &ep, 10); + if (*ep || errno || ul >= INT_MAX/2) return -2; + *num_r= ul; + return 0; +} + +static int readstorelogrecord(FILE *f, HashTable *ht, + void (*updatefn)(HashTable*, HashDatum*, + HashDatum*)) { + /* returns: + * 0 for OK + * -1 eof + * -2 corrupt or error + * -3 got newline indicating end + */ + int keylen, vallen; + HashDatum *key, *val; + + c= getc(f); + if (c==EOF) { if (feof(f)) return -1; return -2; } + if (c=='\n') return -3; + if (c!='+') return -2; + + rc= readlognum(f, ',', &keylen); if (rc) return rc; + rc= readlognum(f, ':', &vallen); if (rc) return rc; + + key= htd_prep(keylen); + val= htd_prep(vallen); + + r= fread(htd_fillptr(key), 1,keylen, f); + if (r!=keylen) goto x2_free_keyval; + + c= getc(f); if (c!='-') goto x2_free_keyval; + c= getc(f); if (c!='>') goto x2_free_keyval; + + r= fread(htd_fillptr(val), 1,vallen, f); + if (r!=vallen) goto x2_free_keyval; + + updatefn(ht, key, val); + return TCL_OK; +x2_free_keyval; + TFREE(val); + TFREE(key); + return -2; +} -/*---------- Misc functionality ----------*/ +/*---------- Creating ----------*/ int cht_do_cdbwr_create_empty(ClientData cd, Tcl_Interp *ip, const char *pathb) { + static const char *const toremoves[]= { + ".main", ".cdb", ".log", ".tmp", 0 + } + Pathbuf pb; - int lock_fd=-1, fd=-1; + int lock_fd=-1, fd=-1, rc; + const char *const *toremove; pathbuf_init(&pb, pathb); rc= acquire_lock(ip, &pb, &lock_fd); if (rc) goto x_rc; - fd= open(pathbuf_sfx(".lock"), O_RDONLY + fd= open(pathbuf_sfx(".main"), O_RDWR|O_CREAT|O_EXCL, 0666); + if (fd <= 0) PE("create new database file"); + + for (toremoves=toremove; *toremove; toremove++) { + r= remove(*toremove); + if (r && errno != ENOENT) + PE("delete possible spurious file during creation"); + } + + rc= TCL_OK; + + x_rc: + maybe_close(fd); + maybe_close(lock_fd); + pathbuf_free(&pb); + return rc; +} + +/*---------- Opening ----------*/ + +int cht_do_cdbwr_open(ClientData cd, Tcl_Interp *ip, + const char *pathb, Tcl_Obj *on_info, void **result) { + const Cdbwr_SubCommand *subcmd= cd; + int rc, mainfd=-1; + Rw *rw; + struct stat stab; + fpos_t logrecstart; + + rw= TALLOC(sizeof(*rw)); + rc= ht_setup(&rw->logincore); if (rc) { TFREE(rw); return rc; } + rw->cdb_fd= rw->lock_fd= -1; rw->logfile= 0; + pathbuf_init(&rw->pbsome, &pathb); + pathbuf_init(&rw->pbother, &pathb); + rw->autocompact= 1; + + mainfd= open(pathbuf_sfx(&rw->pbsome,".main"), O_RDONLY); + if (mainfd<0) PE("open exist3ing database file .main"); + rc= acquire_lock(ip, &rw->pbsome, &rw->lock_fd); if (rc) goto x_rc; + + r= stat(mainfd, &stab); if (r) PE("fstat .main"); + rw->mainsz= stab.st_size; + + rw->cdb_fd= open(pathbuf_sfx(&rw->pbsome,".cdb"), O_RDONLY); + if (fd>=0) { + r= cdb_init(&rw->cdb, rw->cdb_fd); + if (r) { + rc= cht_posixerr(ip, errno, "failed to initialise cdb reader"); + close(rw->cdb_fd); rw->cdb_fd= -1; goto x_rc; + } + } else if (errno == ENOENT) { + if (rw->mainsz) { + rc= cht_staticerr(ip, ".cdb does not exist but .main is nonempty -" + " .cdb must have been accidentally deleted!", + "CDB CDBMISSING"); + goto x_rc; + } + /* fine */ + } else { + PE("open .cdb"); + } + + rw->logfile= fopen(pathbuf_sfx(&rw->pbsome,".log"), "r+"); + if (!rw->logfile) { + if (errno != ENOENT) PE("failed to open .log during open"); + rw->logfile= fopen(rw->pbsome.buf, "w"); + if (!rw->logfile) PE("create .log during (clean) open"); + } else { /* rw->logfile */ + r= fstat(fileno(rw->logfile), &stab); + if (r==-1) PE("fstat .log during open"); + rc= infocb(rw, "open-dirty-start", "log=%luby", + (unsigned long)stab.st_size); + if (rc) goto x_rc; + + for (;;) { + r= fgetpos(rw->logfile, &logrecstart); + if (r) PE("fgetpos .log during (dirty) open"); + r= readstorelogrecord(rw->logfile, &rw->logincore, ht_update); + if (ferror(rw->logfile)) { + rc= cht_posixerr(ip, errno, "error reading .log during (dirty) open"); + goto x_rc; + } + if (r==-1) { + break; + } else if (r==-2 || r==-3) { + char buf[100]; + r= fgetpos(rw->logfile, &logjunkpos); + if (r) PE("fgetpos .log during report of junk in dirty open"); + + snprintf(buf,sizeof(buf), "CDB SYNTAX LOG %lu %lu", + (unsigned long)junkpos, (unsigned long)logrecstart); + + if (!(subcmd->flags & RWSCF_OKJUNK)) { + Tcl_SetObjErrorCode(ip, Tcl_NewStringObj(buf,-1)); + snprintf(buf,sizeof(buf),"%lu",(unsigned long)junkpos); + Tcl_ResetResult(ip); + Tcl_AppendResult(ip, "syntax error (junk) in .log during" + " (dirty) open, at file position ", buf, (char*)0); + rc= TCL_ERROR; + goto x_rc; + } + rc= infocb(rw, "open-dirty-junk", "errorfpos=%luby {%s}", + (unsigned long)logjunkpos, buf); + if (rc) goto x_rc; + + r= fsetpos(rw->logfile, logrecstart); + if (r) PE("failed to fsetpos .log before junk during dirty open"); + + r= ftruncate(fileno(rw->logfile), logrecstart); + if (r) PE("ftruncate .log to chop junk during dirty open"); + } else { + assert(!r); + } + } + } + /* now log is positioned for appending and everything is read */ + + *result= rw; + maybe_close(mainfd); + return TCL_OK; + + x_rc: + rw_close(0,rw); + maybe_close(mainfd); + return rc; +} + +/*---------- Compacting ----------*/ + +static int compact_core(Tcl_Interp *ip, Rw *rw, unsigned long logsz) { + /* creates new .cdb and .main + * closes logfile + * leaves .log with old data + * leaves cdb fd open onto old db + * leaves logincore full of crap + */ + int rc; + int cdbfd, cdbmaking; + struct ht_forall_ctx { + struct cdb_make cdbm; + FILE *mainfile; + int count; + } addctx; + + a.mainfile= 0; + cdbfd= -1; + cdbmaking= 0; + + r= fclose(rw->logfile); + if (r) { rc= cht_posixerr(ip, errno, "data loss! failed to fclose" + " logfile during compact"); goto x_rc; } + rw->logfile= 0; + + rc= infocb(ip, rw, "compact-start", "log=%luby main=%luby", + logsize, (unsigned long)rw->mainsz); + if (rc) goto x_rc; + + /* merge unsuperseded records from main into hash table */ + + a.mainfile= fopen(pathbuf_sfx(&rw_pbsome,".main"), "r"); + if (!a.mainfile) PE("failed to open .main for reading during compact"); + + for (;;) { + r= readstorelogrecord(a.mainfile, &rw->logincore, ht_maybeupdate); + if (ferror(a.mainfile)) { rc= cht_posixerr(ip, errno, "error reading" + " .main during compact"); goto x_rc; + } + if (r==-3) { + break; + } else if (r==-1 || r==-2) { + r= fgetpos(a.mainfile, &errpos); + if (r) PE("fgetpos .main during report of syntax error"); + snprintf(buf,sizeof(buf), "CDB SYNTAX MAIN %lu", (uunsigned long)errpos); + Tcl_SetObjErrorCode(ip, Tcl_NewStringObj(buf,-1)); + snprintf(buf,sizeof(buf), "%lu", (uunsigned long)errpos); + Tcl_ResetResult(ip); + Tcl_AppendResult(ip, "syntax error in .main during" + " compact, at file position ", buf, (char*)0); + rc= TCL_ERROR; + goto x_rc; + } else { + assert(!rc); + } + } + fclose(a.mainfile); + a.mainfile= 0; + + /* create new cdb */ + + cdbfd= open(pathbuf_sfx(&rw->pbsome,".tmp"), O_WRONLY|O_CREAT|O_TRUNC, 0666); + if (cdbfd<0) PE("create .tmp for new cdb during compact"); + + r= cdb_make_start(&a.cdbm, cdbfd); + if (r) PE("cdb_make_start during compact"); + cdbmaking= 1; + + r= ht_forall(&rw->logincore, addto_cdb, &addctx); + if (r) PE("cdb_make_add during compact"); + + r= cdb_make_finish(&a.cdbm, cdbfd); + if(r) PE("cdb_make_finish during compact"); + cdbmaking= 0; + + r= fdatasync(cdbfd); if (r) PE("fdatasync new cdb during compact"); + r= close(cdbfd); if (r) PE("close new cdb during compact"); + cdbfd= -1; + + r= rename(rw->pbsome.buf, pathbuf_sfx(&rw->pbother,".cdb")); + if (r) PE("install new .cdb during compact"); + + /* create new main */ + + a.mainfile= fopen(pathbuf_sfx(&rw->pbsome,".tmp"), "w"); + if (!a.mainfile) PE("create .tmp for new main during compact"); + + a.count= 0; + r= ht_forall(&rw->logincore, addto_main, a.mainfile); + if (r) { rc= cht_posixerr(ip, r, "error writing to new .main" + " during compact"); goto x_rc; } + + r= fflush(a.mainfile); if (r) PE("fflush new main during compact"); + r= fdatasync(fileno(a.mainfile)); + if (r) PE("fdatasync new main during compact"); + r= fclose(a.mainfile); if (r) PE("fclose new main during compact"); + a.mainfile= 0; + r= rename(rw->pbsome.buf, pathbuf_sfx(&rw->pbother,".main")); + if (r) PE("install new .main during compact"); -int cht_do_cdbwr_open(ClientData cd, Tcl_Interp *ip, const char *pathb, Tcl_Obj *on_info, void **result); + /* done! */ + + rc= infocb(ip, rw, "compact-end", "log=%luby main=%luby", + logsize, (unsigned long)rw->mainsz); + if (rc) goto x_rc; + + rc= TCL_OK; +x_rc: + if (mainfile) fclose(mainfile); + if (cdbmaking) cdb_make_finish(&a.cdbm, cdbfd); + maybe_close(cdbfd); + remove(pathbuf_sfx(&rw->pbsome,".tmp")); /* for tidyness */ +} + +static void compact_forclose(Tcl_Interp *ip, Rw *rw) { + long logsz; + int rc; + logsz= ftell(rw->logfile); + if (logsz < 0) PE("ftell logfile (during tidy close)"); -int cht_do_cdbwr_close(ClientData cd, Tcl_Interp *ip, void *db); -int cht_do_cdbwr_close_quick(ClientData cd, Tcl_Interp *ip, void *db); + rc= compact_core(ip, rw, logsz); if (rc) goto x_rc; + + r= remove(pathbuf_sfx(&rw->pbsome,".log")); + if (r) PE("remove .log (during tidy close)"); +} + +int cht_do_cdbwr_close(ClientData cd, Tcl_Interp *ip, void *rw_v) { + Rw *rw= rw_v; + int rc, compact_rc, infocb_rc; + + if (rw->autocompact) compact_rc= compact_forclose(ip, rw); + else compact_rc= TCL_OK; + + rc= rw_close(ip,rw); + infocb_rc= infocb_close(rw); + + cht_tabledataid_disposing(ip, rw_v, &cdbtcl_rwdatabases); + if (!rc) rc= compact_rc; + if (!rc) rc= infocb_rc; + return rc; +} + + int cht_do_cdbwr_lookup(ClientData cd, Tcl_Interp *ip, void *db, Tcl_Obj *key, Tcl_Obj **result); int cht_do_cdbwr_lookup_hb(ClientData cd, Tcl_Interp *ip, void *db, HBytes_Value key, HBytes_Value *result); int cht_do_cdbwr_update(ClientData cd, Tcl_Interp *ip, void *db, Tcl_Obj *key, Tcl_Obj *value);