From ea266fc218d8d56681765a38533d91530f0c14fc Mon Sep 17 00:00:00 2001 From: Dan Sheppard Date: Sun, 4 May 2025 00:47:59 +0100 Subject: [PATCH] Don't flip superblock on no sync write. --- .vscode/settings.json | 9 ++- doc/TODO | 5 +- doc/sync.txt | 11 +++ doc/syncing-superblock.txt | 28 +++++++ src/coquet.c | 80 +++++++++++++++++--- src/coquet.h | 22 +++++- src/page.c | 6 +- src/superblock.c | 147 ++++++++++++++++++++++++++++++++----- src/testvfs.c | 10 ++- src/vfs.h | 1 + 10 files changed, 280 insertions(+), 39 deletions(-) create mode 100644 doc/sync.txt create mode 100644 doc/syncing-superblock.txt diff --git a/.vscode/settings.json b/.vscode/settings.json index 73c8923..aad64d6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,5 +12,10 @@ "superblock.h": "c", "page.h": "c" }, - "C_Cpp.dimInactiveRegions": false -} \ No newline at end of file + "editor.acceptSuggestionOnEnter": "on", + "C_Cpp.dimInactiveRegions": false, + "[plaintext]": { + "editor.wordWrap": "bounded", + "editor.wordWrapColumn": 80 + } +} diff --git a/doc/TODO b/doc/TODO index 6c87af5..d7ba699 100644 --- a/doc/TODO +++ b/doc/TODO @@ -2,4 +2,7 @@ * Replication * Strict locking option * lock timeout - +* superblock corrupt jamming +* switch to SHA256 (good h/w support for speed) +* shift superblock somewhere safer +* optional suffix work \ No newline at end of file diff --git a/doc/sync.txt b/doc/sync.txt new file mode 100644 index 0000000..2bfa8d5 --- /dev/null +++ b/doc/sync.txt @@ -0,0 +1,11 @@ +Syncing data with fsync/fdatasync has become somewhat controversial recently, and now requires justifications to combat kneejerk responses. + +The default coquet backend is a single file sitting on disk. For these purposes the coquet sync mechanisms can be regarded as synonymous with fdatasync. In distributed storage, however, it should be some other (hopefully faster) mechanism to provide the functionality. + +Syncing is performed to tell an application that the likelihood of loss of data is reduced from one higher level set of risks to a lower level, and to do so while the situation and consequences are still easily remedyable. Of course, backups and replications are also always required, and files can be corrupted due to bugs at any time. + +Similarly, data-centres can catch fire, companies can become bankrupt, legislatures can issue warrants, laws be passed, hackers intecept codebases, and meteorites can hit the earth. Durability is not an absolute issue; it is about managing risks. + +The first level of this risk management is moving data to disk (whatever that might mean for given hardware). This is beneficial because, until "on-disk", many of the higher-level risks have a probability proportional to the time they are present, and many of the subsequent levels of mitigations rely first on data being on disk. + +Per-page caches later reduce the risk of using corrupt data. However, they do this at the expense of the issue not being recoverable in-band. diff --git a/doc/syncing-superblock.txt b/doc/syncing-superblock.txt new file mode 100644 index 0000000..d9a2081 --- /dev/null +++ b/doc/syncing-superblock.txt @@ -0,0 +1,28 @@ +We must always have at least one valid superblock guaranteed synced to disk. This presents a challenge when we want to update the superblock without doing too many syncs, as there are only two slots. + +An additional issue is presented on first opening: we cannot know the sync status of the superblock, and so can't write anywhere. + +If, on our first superblock write no sync has happened for any other reason in the connection, we must sync before the write. This is indicated by COQUET_FM_FLAG_SUPERSYNC being set. When considering the algorithm, COQUET_FM_FLAG_SUPERSYNC can be seen as a lazily evaluated sync of a hypothedical previous sync. + +We write to the *same* superblock as the most recent write unless COQUET_FM_FLAG_SUPERFLIP is set. + +Flag use: + +On file open: +(a) set COQUET_FM_FLAG_SUPERSYNC + +On superblock write: +(a) if COQUET_FM_FLAG_SUPERSYNC: +(a.1) sync +(a.2) clear COQUET_FM_FLAG_SUPERSYNC +(a.3) set COQUET_FM_FLAG_SUPERFLIP + +(b.1) if COQUET_FM_FLAG_SUPERFLIP: write to not-most-recent superblock +(b.2) if not COQUET_FM_FLAG_SUPERFLIP: write to most-recent superblock + +(c.1) if syncing, sync and set COQUET_FM_FLAG_SUPERFLIP +(c.2) if not syncing, clear COQUET_FM_FLAG_SUPERFLIP + +On sync (anywhere): +(a) clear COQUET_FM_FLAG_SUPERSYNC +(b) set COQUET_FM_FLAG_SUPERFLIP diff --git a/src/coquet.c b/src/coquet.c index 47b382f..2aacbe1 100644 --- a/src/coquet.c +++ b/src/coquet.c @@ -169,7 +169,7 @@ static int find_last_main_block(coquet_t *cq, struct cq_filemeta *fm) { static int filemeta_setup_common(coquet_t *cq, struct cq_filemeta *fm) { int r; - /* load superblock */ + /* load superblock, TODO needed? */ r = cq_super_load(cq,fm->which_file,&(fm->super),1); // TODO "create mode" if(r != COQUET_RET_OK) { return r; @@ -187,8 +187,6 @@ static int filemeta_setup_common(coquet_t *cq, struct cq_filemeta *fm) { static int filemeta_setup_write(coquet_t *cq, struct cq_filemeta *fm) { int r; - fm->flags = 0; - r = filemeta_setup_common(cq,fm); if(r != COQUET_RET_OK) { return r; @@ -201,8 +199,6 @@ static int filemeta_setup_write(coquet_t *cq, struct cq_filemeta *fm) { static int filemeta_setup_read(coquet_t *cq, struct cq_filemeta *fm) { int r; - fm->flags = 0; - r = filemeta_setup_common(cq,fm); if(r != COQUET_RET_OK) { return r; @@ -212,6 +208,12 @@ static int filemeta_setup_read(coquet_t *cq, struct cq_filemeta *fm) { return COQUET_RET_OK; } +static int filemeta_finish_common(coquet_t *cq, struct cq_filemeta *fm) { + fm->flags &=~ (COQUET_FM_FLAG_READ|COQUET_FM_FLAG_WRITE); + + return COQUET_RET_OK; +} + bool cq_writing(coquet_t *cq, int which_file) { return ((cq->flags & COQUET_CQ_FILE_VALID(which_file)) && (cq->filemeta[which_file].flags & COQUET_FM_FLAG_WRITE)); @@ -223,6 +225,39 @@ bool cq_reading(coquet_t *cq, int which_file) { (COQUET_FM_FLAG_WRITE|COQUET_FM_FLAG_READ))); } +int cq_open(coquet_t *cq, int which_file, int mode) { + int r; + struct cq_filemeta *fm; + + fm = &(cq->filemeta[which_file]); + + /* open */ + r = (cq->vfs_funcs.open)(cq->vfs_data, + COQUET_FILE_MAIN,COQUET_CMODE_EITHER); + if(r != COQUET_RET_OK) { + return r; + } + + /* setup */ + cq->flags |= COQUET_CQ_FILE_VALID(which_file); + fm->flags = COQUET_FM_FLAG_SUPERSYNC; /* sync before sb write */ + + return COQUET_RET_OK; +} + +int cq_close(coquet_t *cq, int which_file) { + int r; + + r = (cq->vfs_funcs.close)(cq->vfs_data,which_file); + if(r != COQUET_RET_OK) { + return r; + } + + cq->flags &=~ COQUET_CQ_FILE_VALID(which_file); + + return COQUET_RET_OK; +} + int coquet_write_start(coquet_t *cq, int which_file, bool wait) { int r; @@ -244,7 +279,6 @@ int coquet_write_start(coquet_t *cq, int which_file, bool wait) { COQUET_LOCK_WRITE,COQUET_LMODE_UN,wait); return r; } - cq->flags |= COQUET_CQ_FILE_VALID(which_file); return COQUET_RET_OK; } @@ -263,8 +297,11 @@ int coquet_write_end(coquet_t *cq, int which_file) { return r; } - /* mark stale */ - cq->flags &=~ COQUET_CQ_FILE_VALID(which_file); + /* reset flags */ + r = filemeta_finish_common(cq,&(cq->filemeta[which_file])); + if(r != COQUET_RET_OK) { + return r; + } return COQUET_RET_OK; } @@ -281,18 +318,39 @@ int coquet_read_start(coquet_t *cq, int which_file, bool wait) { if(r != COQUET_RET_OK) { return r; } - cq->flags |= COQUET_CQ_FILE_VALID(which_file); return COQUET_RET_OK; } int coquet_read_end(coquet_t *cq, int which_file) { + int r; + if(!cq_reading(cq,which_file) || cq_writing(cq,which_file)) { return COQUET_RET_TRSMERROR; } - /* mark stale */ - cq->flags &=~ COQUET_CQ_FILE_VALID(which_file); + /* reset flags */ + r = filemeta_finish_common(cq,&(cq->filemeta[which_file])); + if(r != COQUET_RET_OK) { + return r; + } + + return COQUET_RET_OK; +} + +int cq_datasync(coquet_t *cq, int which_file) { + struct cq_filemeta *fm; + int ret; + + fm = &(cq->filemeta[which_file]); + + ret = (cq->vfs_funcs.sync)(cq->vfs_data,COQUET_FILE_MAIN,1); + if(ret != COQUET_RET_OK) { + return ret; + } + + fm->flags &=~ COQUET_FM_FLAG_SUPERSYNC; + fm->flags |= COQUET_FM_FLAG_SUPERFLIP; return COQUET_RET_OK; } diff --git a/src/coquet.h b/src/coquet.h index 9bb7f82..892ea34 100644 --- a/src/coquet.h +++ b/src/coquet.h @@ -11,13 +11,20 @@ #define COQUET_CQ_FILE_VALID(file) (COQUET_CQ_FLAG_MAINVALID<<(file)) -#define COQUET_FM_FLAG_WRITE 0x01 -#define COQUET_FM_FLAG_READ 0x02 +#define COQUET_FM_FLAG_WRITE 0x00000001 +#define COQUET_FM_FLAG_READ 0x00000002 +/* superblock was synced since last write, good, flip on next write */ +#define COQUET_FM_FLAG_SUPERFLIP 0x00000004 +/* file must be synced before first sb write, no known sync */ +#define COQUET_FM_FLAG_SUPERSYNC 0x00000008 struct cq_filemeta { + /* valid whenever meta is valid*/ int which_file; + uint64_t flags; + + /* valid only when writing or reading (flags & 1 or flags & 2) */ struct cq_super super; - uint8_t flags; uint64_t last_main_block; }; @@ -54,9 +61,16 @@ int coquet_write_end(coquet_t *cq, int which_file); int coquet_read_start(coquet_t *cq, int which_file, bool wait); int coquet_read_end(coquet_t *cq, int which_file); - /* test if we can read/write */ bool cq_reading(coquet_t *cq, int which_file); bool cq_writing(coquet_t *cq, int which_file); +/* perform vfs data sync, with state updates */ +int cq_datasync(coquet_t *cq, int which_file); + +/* open/close a file, setting up as necessary */ +int cq_open(coquet_t *cq, int which_file, int mode); +int cq_close(coquet_t *cq, int which_file); + + #endif diff --git a/src/page.c b/src/page.c index f20d9e9..05e08dc 100644 --- a/src/page.c +++ b/src/page.c @@ -106,7 +106,7 @@ void test_page() { test_bail(&cq,r); /* first page */ - r = (cq.vfs_funcs.open)(cq.vfs_data,COQUET_FILE_MAIN,COQUET_CMODE_CREATE); + r = cq_open(&cq,COQUET_FILE_MAIN,COQUET_CMODE_CREATE); test_bail(&cq,r); cq_super_load(&cq,COQUET_FILE_MAIN,&super,1); @@ -122,10 +122,10 @@ void test_page() { r = coquet_write_end(&cq,COQUET_FILE_MAIN); test_bail(&cq,r); - r = (cq.vfs_funcs.close)(cq.vfs_data,COQUET_FILE_MAIN); + cq_close(&cq,COQUET_FILE_MAIN); test_bail(&cq,r); - r = (cq.vfs_funcs.open)(cq.vfs_data,COQUET_FILE_MAIN,COQUET_CMODE_OPEN); + r = cq_open(&cq,COQUET_FILE_MAIN,COQUET_CMODE_OPEN); test_bail(&cq,r); r = coquet_write_start(&cq,COQUET_FILE_MAIN,1); diff --git a/src/superblock.c b/src/superblock.c index c3dcafa..bdce9de 100644 --- a/src/superblock.c +++ b/src/superblock.c @@ -103,26 +103,44 @@ static int super_init(coquet_t *cq, struct cq_super *super) { return COQUET_RET_OK; } +/* TODO super due to split, anticipating */ +static int super_load_half(coquet_t *cq, int which_file, + struct cq_super *super, int *ok, int use_b) { + uint8_t super_bytes[SUPER_BYTES]; + int r; + + r = (cq->vfs_funcs.read) + (cq->vfs_data,which_file,super_bytes,0,SUPER_BYTES); + if(r != COQUET_RET_OK) { + return r; + } + *ok = !!extract_half(super_bytes+(use_b?HALF_BYTES:0),super); + return COQUET_RET_OK; +} + /* Extract a superblock into the given structure and check it. We don't * have a valid global_iv, we just have to trust the one in the block, * reducing the HMAC to a simple hash in terms of guarantees. Requires the * main file to be open. */ int cq_super_load(coquet_t *cq, int which_file, struct cq_super *super, bool create) { - uint8_t super_bytes[SUPER_BYTES]; struct cq_super super_a, super_b; - int r,r2; + int r,r_a,r_b; bool use_b; - r = (cq->vfs_funcs.read) - (cq->vfs_data,which_file,super_bytes,0,SUPER_BYTES); + /* superblock A */ + r = super_load_half(cq,which_file,&super_a,&r_a,0); if(r != COQUET_RET_OK) { return r; } - r = !!extract_half(super_bytes,&super_a); - r2 = !!extract_half(super_bytes+HALF_BYTES,&super_b); - switch(r*2+r2) { + /* superblock B */ + r = super_load_half(cq,which_file,&super_b,&r_b,1); + if(r != COQUET_RET_OK) { + return r; + } + + switch(r_a*2+r_b) { case 3: /* both valid */ use_b = (super_b.sb_serial > super_a.sb_serial); break; @@ -195,8 +213,24 @@ static int super_write(coquet_t *cq, struct cq_super *super, } int cq_super_save(coquet_t *cq, int which_file, struct cq_super *super, bool wait, bool sync) { - int r, ret; + int r, ret, use_b; struct cq_super old; + struct cq_filemeta *fm; + + fm = &(cq->filemeta[which_file]); + + /* sync on first open, so at least one (actually both) superblocks + * are synced before we trample over one of them. + * (see doc/syncing-superblock.txt) + */ + if(fm->flags & COQUET_FM_FLAG_SUPERSYNC) { + ret = (cq->vfs_funcs.sync)(cq->vfs_data,COQUET_FILE_MAIN,1); + if(ret != COQUET_RET_OK) { + return ret; + } + fm->flags &=~ COQUET_FM_FLAG_SUPERSYNC; + fm->flags |= COQUET_FM_FLAG_SUPERFLIP; + } /* lock */ r = lock_super(cq,which_file,COQUET_LMODE_EXCL,wait); @@ -206,17 +240,26 @@ int cq_super_save(coquet_t *cq, int which_file, struct cq_super *super, bool wai /* load */ ret = cq_super_load(cq,which_file,&old,0); + + /* which half? */ + use_b = old.from_b; + if(fm->flags & COQUET_FM_FLAG_SUPERFLIP) { + use_b = !use_b; + } + + /* save */ if(ret == COQUET_RET_OK) { - ret = super_write(cq,super,!old.from_b,old.sb_serial+1); + ret = super_write(cq,super,use_b,old.sb_serial+1); } else if(ret == COQUET_RET_CORRUPT) { - // XXX log - /* save */ + // XXX log TODO check this arm for correctness ret = super_write(cq,super,0,1); } /* sync */ + fm->flags &=~ COQUET_FM_FLAG_SUPERFLIP; if(sync) { ret = (cq->vfs_funcs.sync)(cq->vfs_data,COQUET_FILE_MAIN,1); + fm->flags |= COQUET_FM_FLAG_SUPERFLIP; } /* unlock */ @@ -250,15 +293,19 @@ char * cq_super_get_desc(struct cq_super *super) { static void test_open(coquet_t *cq) { int r; - r = (cq->vfs_funcs.open)(cq->vfs_data, - COQUET_FILE_MAIN,COQUET_CMODE_EITHER); + r = cq_open(cq,COQUET_FILE_MAIN,COQUET_CMODE_EITHER); + test_bail(cq,r); + r = coquet_write_start(cq,COQUET_FILE_MAIN,1); test_bail(cq,r); } static void test_close(coquet_t *cq) { int r; - r = (cq->vfs_funcs.close)(cq->vfs_data,COQUET_FILE_MAIN); + r = coquet_write_end(cq,COQUET_FILE_MAIN); + test_bail(cq,r); + + r = cq_close(cq,COQUET_FILE_MAIN); test_bail(cq,r); } @@ -268,8 +315,6 @@ void test_superblock_main() { struct cq_super super; char *desc; - printf("testing superblock\n"); - r = coquet_init(&cq,"tmp/test"); test_bail(&cq,r); testvfs_fakerandom(cq.vfs_data,0xA5); @@ -431,12 +476,80 @@ void test_superblock_corruption() { test_bail(&cq,r); } +static int test_nursery(coquet_t *cq, int use_b) { + int r,ok; + struct cq_super super; + + r = super_load_half(cq,COQUET_FILE_MAIN,&super,&ok,use_b); + test_bail(cq,r); + if(!ok) + return -1; + return super.current.nursery_size; +} + +static void test_superblock_synclogic() { + coquet_t cq; + struct cq_super super; + int r; + + r = coquet_init(&cq,"tmp/test"); + test_bail(&cq,r); + + test_unlink("tmp/test.coquet"); + test_open(&cq); + + r = cq_super_load(&cq,COQUET_FILE_MAIN,&super,1); + test_bail(&cq,r); + + /* should do a sync before first save (and one after, here) */ + super.current.nursery_size = 11; /* save 11 to A */ + cq_super_save(&cq,COQUET_FILE_MAIN,&super,1,1); + test_bail(&cq,r); + test_eq_int(testvfs_synccount(cq.vfs_data),2,"sync A"); + super.current.nursery_size = 12; /* save 12 to B */ + cq_super_save(&cq,COQUET_FILE_MAIN,&super,1,1); + test_bail(&cq,r); + test_eq_int(testvfs_synccount(cq.vfs_data),3,"sync B"); + /* check 11 went to A, 12 to B */ + test_eq_int(11,test_nursery(&cq,0),"sync C"); + test_eq_int(12,test_nursery(&cq,1),"sync D"); + /* we did a sync so 13 should go to A, but DON'T SYNC AFTER */ + super.current.nursery_size = 13; /* save 13 to A */ + cq_super_save(&cq,COQUET_FILE_MAIN,&super,1,0); + test_eq_int(13,test_nursery(&cq,0),"sync E"); + test_eq_int(12,test_nursery(&cq,1),"sync F"); + /* next sync also to A, now sync */ + super.current.nursery_size = 14; /* save 14 to A */ + cq_super_save(&cq,COQUET_FILE_MAIN,&super,1,1); + test_eq_int(14,test_nursery(&cq,0),"sync G"); + test_eq_int(12,test_nursery(&cq,1),"sync H"); + /* next sync to B, no sync */ + super.current.nursery_size = 15; /* save 15 to B */ + cq_super_save(&cq,COQUET_FILE_MAIN,&super,1,0); + test_eq_int(14,test_nursery(&cq,0),"sync I"); + test_eq_int(15,test_nursery(&cq,1),"sync J"); + /* external sync */ + cq_datasync(&cq,COQUET_FILE_MAIN); + /* next sync to A */ + super.current.nursery_size = 16; /* save 16 to A */ + cq_super_save(&cq,COQUET_FILE_MAIN,&super,1,0); + test_eq_int(16,test_nursery(&cq,0),"sync K"); + test_eq_int(15,test_nursery(&cq,1),"sync L"); + + test_close(&cq); + + r = coquet_finish(&cq); + test_bail(&cq,r); +} + void test_superblock() { + printf("testing superblock\n"); test_superblock_main(); test_superblock_corruption(); + test_superblock_synclogic(); } -/* To test: +/* To test: TODO creation a/b choice diff --git a/src/testvfs.c b/src/testvfs.c index 552536e..009daa3 100644 --- a/src/testvfs.c +++ b/src/testvfs.c @@ -6,7 +6,7 @@ #include "coquet.h" struct test_data { - int fake_random, virtual; + int fake_random, virtual, sync_count; uint8_t * vfiles[COQUET_FILE_NUM]; uint64_t vfile_len[COQUET_FILE_NUM]; vfs_t vfs_funcs; @@ -22,6 +22,7 @@ void * testvfs_make(vfs_t *vfs_funcs, void *vfs_data) { return NULL; td->fake_random = -1; td->virtual = 0; + td->sync_count = 0; td->vfs_funcs = *vfs_funcs; td->vfs_data = vfs_data; for(i=0;isync_count++; if(td->virtual) return COQUET_RET_OK; return (td->vfs_funcs.sync)(td->vfs_data, which_file, data_only); @@ -188,6 +190,12 @@ void testvfs_virtual(void * vfs_data, bool yn) { td->virtual = yn; } +int testvfs_synccount(void *vfs_data) { + struct test_data * td = (struct test_data *)vfs_data; + + return td->sync_count; +} + vfs_t vfs_test = { .start = test_start, .get_error_text = test_get_error_text, diff --git a/src/vfs.h b/src/vfs.h index d8f949c..2451364 100644 --- a/src/vfs.h +++ b/src/vfs.h @@ -97,6 +97,7 @@ extern vfs_t vfs_test; void * testvfs_make(vfs_t *vfs_funcs, void *vfs_data); void testvfs_fakerandom(void * vfs_data, int setting); void testvfs_virtual(void * vfs_data, bool yn); +int testvfs_synccount(void *vfs_data); #endif /* COQUET_TEST */ -- 2.30.2