"superblock.h": "c",
"page.h": "c"
},
- "C_Cpp.dimInactiveRegions": false
-}
\ No newline at end of file
+ "editor.acceptSuggestionOnEnter": "on",
+ "C_Cpp.dimInactiveRegions": false,
+ "[plaintext]": {
+ "editor.wordWrap": "bounded",
+ "editor.wordWrapColumn": 80
+ }
+}
* Replication
* Strict locking option
* lock timeout
-
+* superblock corrupt jamming
+* switch to SHA256 (good h/w support for speed)
+* shift superblock somewhere safer
+* optional suffix work
\ No newline at end of file
--- /dev/null
+Syncing data with fsync/fdatasync has become somewhat controversial recently, and now requires justifications to combat kneejerk responses.
+
+The default coquet backend is a single file sitting on disk. For these purposes the coquet sync mechanisms can be regarded as synonymous with fdatasync. In distributed storage, however, it should be some other (hopefully faster) mechanism to provide the functionality.
+
+Syncing is performed to tell an application that the likelihood of loss of data is reduced from one higher level set of risks to a lower level, and to do so while the situation and consequences are still easily remedyable. Of course, backups and replications are also always required, and files can be corrupted due to bugs at any time.
+
+Similarly, data-centres can catch fire, companies can become bankrupt, legislatures can issue warrants, laws be passed, hackers intecept codebases, and meteorites can hit the earth. Durability is not an absolute issue; it is about managing risks.
+
+The first level of this risk management is moving data to disk (whatever that might mean for given hardware). This is beneficial because, until "on-disk", many of the higher-level risks have a probability proportional to the time they are present, and many of the subsequent levels of mitigations rely first on data being on disk.
+
+Per-page caches later reduce the risk of using corrupt data. However, they do this at the expense of the issue not being recoverable in-band.
--- /dev/null
+We must always have at least one valid superblock guaranteed synced to disk. This presents a challenge when we want to update the superblock without doing too many syncs, as there are only two slots.
+
+An additional issue is presented on first opening: we cannot know the sync status of the superblock, and so can't write anywhere.
+
+If, on our first superblock write no sync has happened for any other reason in the connection, we must sync before the write. This is indicated by COQUET_FM_FLAG_SUPERSYNC being set. When considering the algorithm, COQUET_FM_FLAG_SUPERSYNC can be seen as a lazily evaluated sync of a hypothedical previous sync.
+
+We write to the *same* superblock as the most recent write unless COQUET_FM_FLAG_SUPERFLIP is set.
+
+Flag use:
+
+On file open:
+(a) set COQUET_FM_FLAG_SUPERSYNC
+
+On superblock write:
+(a) if COQUET_FM_FLAG_SUPERSYNC:
+(a.1) sync
+(a.2) clear COQUET_FM_FLAG_SUPERSYNC
+(a.3) set COQUET_FM_FLAG_SUPERFLIP
+
+(b.1) if COQUET_FM_FLAG_SUPERFLIP: write to not-most-recent superblock
+(b.2) if not COQUET_FM_FLAG_SUPERFLIP: write to most-recent superblock
+
+(c.1) if syncing, sync and set COQUET_FM_FLAG_SUPERFLIP
+(c.2) if not syncing, clear COQUET_FM_FLAG_SUPERFLIP
+
+On sync (anywhere):
+(a) clear COQUET_FM_FLAG_SUPERSYNC
+(b) set COQUET_FM_FLAG_SUPERFLIP
static int filemeta_setup_common(coquet_t *cq, struct cq_filemeta *fm) {
int r;
- /* load superblock */
+ /* load superblock, TODO needed? */
r = cq_super_load(cq,fm->which_file,&(fm->super),1); // TODO "create mode"
if(r != COQUET_RET_OK) {
return r;
static int filemeta_setup_write(coquet_t *cq, struct cq_filemeta *fm) {
int r;
- fm->flags = 0;
-
r = filemeta_setup_common(cq,fm);
if(r != COQUET_RET_OK) {
return r;
static int filemeta_setup_read(coquet_t *cq, struct cq_filemeta *fm) {
int r;
- fm->flags = 0;
-
r = filemeta_setup_common(cq,fm);
if(r != COQUET_RET_OK) {
return r;
return COQUET_RET_OK;
}
+static int filemeta_finish_common(coquet_t *cq, struct cq_filemeta *fm) {
+ fm->flags &=~ (COQUET_FM_FLAG_READ|COQUET_FM_FLAG_WRITE);
+
+ return COQUET_RET_OK;
+}
+
bool cq_writing(coquet_t *cq, int which_file) {
return ((cq->flags & COQUET_CQ_FILE_VALID(which_file)) &&
(cq->filemeta[which_file].flags & COQUET_FM_FLAG_WRITE));
(COQUET_FM_FLAG_WRITE|COQUET_FM_FLAG_READ)));
}
+int cq_open(coquet_t *cq, int which_file, int mode) {
+ int r;
+ struct cq_filemeta *fm;
+
+ fm = &(cq->filemeta[which_file]);
+
+ /* open */
+ r = (cq->vfs_funcs.open)(cq->vfs_data,
+ COQUET_FILE_MAIN,COQUET_CMODE_EITHER);
+ if(r != COQUET_RET_OK) {
+ return r;
+ }
+
+ /* setup */
+ cq->flags |= COQUET_CQ_FILE_VALID(which_file);
+ fm->flags = COQUET_FM_FLAG_SUPERSYNC; /* sync before sb write */
+
+ return COQUET_RET_OK;
+}
+
+int cq_close(coquet_t *cq, int which_file) {
+ int r;
+
+ r = (cq->vfs_funcs.close)(cq->vfs_data,which_file);
+ if(r != COQUET_RET_OK) {
+ return r;
+ }
+
+ cq->flags &=~ COQUET_CQ_FILE_VALID(which_file);
+
+ return COQUET_RET_OK;
+}
+
int coquet_write_start(coquet_t *cq, int which_file, bool wait) {
int r;
COQUET_LOCK_WRITE,COQUET_LMODE_UN,wait);
return r;
}
- cq->flags |= COQUET_CQ_FILE_VALID(which_file);
return COQUET_RET_OK;
}
return r;
}
- /* mark stale */
- cq->flags &=~ COQUET_CQ_FILE_VALID(which_file);
+ /* reset flags */
+ r = filemeta_finish_common(cq,&(cq->filemeta[which_file]));
+ if(r != COQUET_RET_OK) {
+ return r;
+ }
return COQUET_RET_OK;
}
if(r != COQUET_RET_OK) {
return r;
}
- cq->flags |= COQUET_CQ_FILE_VALID(which_file);
return COQUET_RET_OK;
}
int coquet_read_end(coquet_t *cq, int which_file) {
+ int r;
+
if(!cq_reading(cq,which_file) || cq_writing(cq,which_file)) {
return COQUET_RET_TRSMERROR;
}
- /* mark stale */
- cq->flags &=~ COQUET_CQ_FILE_VALID(which_file);
+ /* reset flags */
+ r = filemeta_finish_common(cq,&(cq->filemeta[which_file]));
+ if(r != COQUET_RET_OK) {
+ return r;
+ }
+
+ return COQUET_RET_OK;
+}
+
+int cq_datasync(coquet_t *cq, int which_file) {
+ struct cq_filemeta *fm;
+ int ret;
+
+ fm = &(cq->filemeta[which_file]);
+
+ ret = (cq->vfs_funcs.sync)(cq->vfs_data,COQUET_FILE_MAIN,1);
+ if(ret != COQUET_RET_OK) {
+ return ret;
+ }
+
+ fm->flags &=~ COQUET_FM_FLAG_SUPERSYNC;
+ fm->flags |= COQUET_FM_FLAG_SUPERFLIP;
return COQUET_RET_OK;
}
#define COQUET_CQ_FILE_VALID(file) (COQUET_CQ_FLAG_MAINVALID<<(file))
-#define COQUET_FM_FLAG_WRITE 0x01
-#define COQUET_FM_FLAG_READ 0x02
+#define COQUET_FM_FLAG_WRITE 0x00000001
+#define COQUET_FM_FLAG_READ 0x00000002
+/* superblock was synced since last write, good, flip on next write */
+#define COQUET_FM_FLAG_SUPERFLIP 0x00000004
+/* file must be synced before first sb write, no known sync */
+#define COQUET_FM_FLAG_SUPERSYNC 0x00000008
struct cq_filemeta {
+ /* valid whenever meta is valid*/
int which_file;
+ uint64_t flags;
+
+ /* valid only when writing or reading (flags & 1 or flags & 2) */
struct cq_super super;
- uint8_t flags;
uint64_t last_main_block;
};
int coquet_read_start(coquet_t *cq, int which_file, bool wait);
int coquet_read_end(coquet_t *cq, int which_file);
-
/* test if we can read/write */
bool cq_reading(coquet_t *cq, int which_file);
bool cq_writing(coquet_t *cq, int which_file);
+/* perform vfs data sync, with state updates */
+int cq_datasync(coquet_t *cq, int which_file);
+
+/* open/close a file, setting up as necessary */
+int cq_open(coquet_t *cq, int which_file, int mode);
+int cq_close(coquet_t *cq, int which_file);
+
+
#endif
test_bail(&cq,r);
/* first page */
- r = (cq.vfs_funcs.open)(cq.vfs_data,COQUET_FILE_MAIN,COQUET_CMODE_CREATE);
+ r = cq_open(&cq,COQUET_FILE_MAIN,COQUET_CMODE_CREATE);
test_bail(&cq,r);
cq_super_load(&cq,COQUET_FILE_MAIN,&super,1);
r = coquet_write_end(&cq,COQUET_FILE_MAIN);
test_bail(&cq,r);
- r = (cq.vfs_funcs.close)(cq.vfs_data,COQUET_FILE_MAIN);
+ cq_close(&cq,COQUET_FILE_MAIN);
test_bail(&cq,r);
- r = (cq.vfs_funcs.open)(cq.vfs_data,COQUET_FILE_MAIN,COQUET_CMODE_OPEN);
+ r = cq_open(&cq,COQUET_FILE_MAIN,COQUET_CMODE_OPEN);
test_bail(&cq,r);
r = coquet_write_start(&cq,COQUET_FILE_MAIN,1);
return COQUET_RET_OK;
}
+/* TODO super due to split, anticipating */
+static int super_load_half(coquet_t *cq, int which_file,
+ struct cq_super *super, int *ok, int use_b) {
+ uint8_t super_bytes[SUPER_BYTES];
+ int r;
+
+ r = (cq->vfs_funcs.read)
+ (cq->vfs_data,which_file,super_bytes,0,SUPER_BYTES);
+ if(r != COQUET_RET_OK) {
+ return r;
+ }
+ *ok = !!extract_half(super_bytes+(use_b?HALF_BYTES:0),super);
+ return COQUET_RET_OK;
+}
+
/* Extract a superblock into the given structure and check it. We don't
* have a valid global_iv, we just have to trust the one in the block,
* reducing the HMAC to a simple hash in terms of guarantees. Requires the
* main file to be open.
*/
int cq_super_load(coquet_t *cq, int which_file, struct cq_super *super, bool create) {
- uint8_t super_bytes[SUPER_BYTES];
struct cq_super super_a, super_b;
- int r,r2;
+ int r,r_a,r_b;
bool use_b;
- r = (cq->vfs_funcs.read)
- (cq->vfs_data,which_file,super_bytes,0,SUPER_BYTES);
+ /* superblock A */
+ r = super_load_half(cq,which_file,&super_a,&r_a,0);
if(r != COQUET_RET_OK) {
return r;
}
- r = !!extract_half(super_bytes,&super_a);
- r2 = !!extract_half(super_bytes+HALF_BYTES,&super_b);
- switch(r*2+r2) {
+ /* superblock B */
+ r = super_load_half(cq,which_file,&super_b,&r_b,1);
+ if(r != COQUET_RET_OK) {
+ return r;
+ }
+
+ switch(r_a*2+r_b) {
case 3: /* both valid */
use_b = (super_b.sb_serial > super_a.sb_serial);
break;
}
int cq_super_save(coquet_t *cq, int which_file, struct cq_super *super, bool wait, bool sync) {
- int r, ret;
+ int r, ret, use_b;
struct cq_super old;
+ struct cq_filemeta *fm;
+
+ fm = &(cq->filemeta[which_file]);
+
+ /* sync on first open, so at least one (actually both) superblocks
+ * are synced before we trample over one of them.
+ * (see doc/syncing-superblock.txt)
+ */
+ if(fm->flags & COQUET_FM_FLAG_SUPERSYNC) {
+ ret = (cq->vfs_funcs.sync)(cq->vfs_data,COQUET_FILE_MAIN,1);
+ if(ret != COQUET_RET_OK) {
+ return ret;
+ }
+ fm->flags &=~ COQUET_FM_FLAG_SUPERSYNC;
+ fm->flags |= COQUET_FM_FLAG_SUPERFLIP;
+ }
/* lock */
r = lock_super(cq,which_file,COQUET_LMODE_EXCL,wait);
/* load */
ret = cq_super_load(cq,which_file,&old,0);
+
+ /* which half? */
+ use_b = old.from_b;
+ if(fm->flags & COQUET_FM_FLAG_SUPERFLIP) {
+ use_b = !use_b;
+ }
+
+ /* save */
if(ret == COQUET_RET_OK) {
- ret = super_write(cq,super,!old.from_b,old.sb_serial+1);
+ ret = super_write(cq,super,use_b,old.sb_serial+1);
} else if(ret == COQUET_RET_CORRUPT) {
- // XXX log
- /* save */
+ // XXX log TODO check this arm for correctness
ret = super_write(cq,super,0,1);
}
/* sync */
+ fm->flags &=~ COQUET_FM_FLAG_SUPERFLIP;
if(sync) {
ret = (cq->vfs_funcs.sync)(cq->vfs_data,COQUET_FILE_MAIN,1);
+ fm->flags |= COQUET_FM_FLAG_SUPERFLIP;
}
/* unlock */
static void test_open(coquet_t *cq) {
int r;
- r = (cq->vfs_funcs.open)(cq->vfs_data,
- COQUET_FILE_MAIN,COQUET_CMODE_EITHER);
+ r = cq_open(cq,COQUET_FILE_MAIN,COQUET_CMODE_EITHER);
+ test_bail(cq,r);
+ r = coquet_write_start(cq,COQUET_FILE_MAIN,1);
test_bail(cq,r);
}
static void test_close(coquet_t *cq) {
int r;
- r = (cq->vfs_funcs.close)(cq->vfs_data,COQUET_FILE_MAIN);
+ r = coquet_write_end(cq,COQUET_FILE_MAIN);
+ test_bail(cq,r);
+
+ r = cq_close(cq,COQUET_FILE_MAIN);
test_bail(cq,r);
}
struct cq_super super;
char *desc;
- printf("testing superblock\n");
-
r = coquet_init(&cq,"tmp/test");
test_bail(&cq,r);
testvfs_fakerandom(cq.vfs_data,0xA5);
test_bail(&cq,r);
}
+static int test_nursery(coquet_t *cq, int use_b) {
+ int r,ok;
+ struct cq_super super;
+
+ r = super_load_half(cq,COQUET_FILE_MAIN,&super,&ok,use_b);
+ test_bail(cq,r);
+ if(!ok)
+ return -1;
+ return super.current.nursery_size;
+}
+
+static void test_superblock_synclogic() {
+ coquet_t cq;
+ struct cq_super super;
+ int r;
+
+ r = coquet_init(&cq,"tmp/test");
+ test_bail(&cq,r);
+
+ test_unlink("tmp/test.coquet");
+ test_open(&cq);
+
+ r = cq_super_load(&cq,COQUET_FILE_MAIN,&super,1);
+ test_bail(&cq,r);
+
+ /* should do a sync before first save (and one after, here) */
+ super.current.nursery_size = 11; /* save 11 to A */
+ cq_super_save(&cq,COQUET_FILE_MAIN,&super,1,1);
+ test_bail(&cq,r);
+ test_eq_int(testvfs_synccount(cq.vfs_data),2,"sync A");
+ super.current.nursery_size = 12; /* save 12 to B */
+ cq_super_save(&cq,COQUET_FILE_MAIN,&super,1,1);
+ test_bail(&cq,r);
+ test_eq_int(testvfs_synccount(cq.vfs_data),3,"sync B");
+ /* check 11 went to A, 12 to B */
+ test_eq_int(11,test_nursery(&cq,0),"sync C");
+ test_eq_int(12,test_nursery(&cq,1),"sync D");
+ /* we did a sync so 13 should go to A, but DON'T SYNC AFTER */
+ super.current.nursery_size = 13; /* save 13 to A */
+ cq_super_save(&cq,COQUET_FILE_MAIN,&super,1,0);
+ test_eq_int(13,test_nursery(&cq,0),"sync E");
+ test_eq_int(12,test_nursery(&cq,1),"sync F");
+ /* next sync also to A, now sync */
+ super.current.nursery_size = 14; /* save 14 to A */
+ cq_super_save(&cq,COQUET_FILE_MAIN,&super,1,1);
+ test_eq_int(14,test_nursery(&cq,0),"sync G");
+ test_eq_int(12,test_nursery(&cq,1),"sync H");
+ /* next sync to B, no sync */
+ super.current.nursery_size = 15; /* save 15 to B */
+ cq_super_save(&cq,COQUET_FILE_MAIN,&super,1,0);
+ test_eq_int(14,test_nursery(&cq,0),"sync I");
+ test_eq_int(15,test_nursery(&cq,1),"sync J");
+ /* external sync */
+ cq_datasync(&cq,COQUET_FILE_MAIN);
+ /* next sync to A */
+ super.current.nursery_size = 16; /* save 16 to A */
+ cq_super_save(&cq,COQUET_FILE_MAIN,&super,1,0);
+ test_eq_int(16,test_nursery(&cq,0),"sync K");
+ test_eq_int(15,test_nursery(&cq,1),"sync L");
+
+ test_close(&cq);
+
+ r = coquet_finish(&cq);
+ test_bail(&cq,r);
+}
+
void test_superblock() {
+ printf("testing superblock\n");
test_superblock_main();
test_superblock_corruption();
+ test_superblock_synclogic();
}
-/* To test:
+/* To test: TODO
creation
a/b choice
#include "coquet.h"
struct test_data {
- int fake_random, virtual;
+ int fake_random, virtual, sync_count;
uint8_t * vfiles[COQUET_FILE_NUM];
uint64_t vfile_len[COQUET_FILE_NUM];
vfs_t vfs_funcs;
return NULL;
td->fake_random = -1;
td->virtual = 0;
+ td->sync_count = 0;
td->vfs_funcs = *vfs_funcs;
td->vfs_data = vfs_data;
for(i=0;i<COQUET_FILE_NUM;i++) {
static int test_sync(void * vfs_data, int which_file, bool data_only) {
struct test_data * td = (struct test_data *)vfs_data;
+ td->sync_count++;
if(td->virtual)
return COQUET_RET_OK;
return (td->vfs_funcs.sync)(td->vfs_data, which_file, data_only);
td->virtual = yn;
}
+int testvfs_synccount(void *vfs_data) {
+ struct test_data * td = (struct test_data *)vfs_data;
+
+ return td->sync_count;
+}
+
vfs_t vfs_test = {
.start = test_start,
.get_error_text = test_get_error_text,
void * testvfs_make(vfs_t *vfs_funcs, void *vfs_data);
void testvfs_fakerandom(void * vfs_data, int setting);
void testvfs_virtual(void * vfs_data, bool yn);
+int testvfs_synccount(void *vfs_data);
#endif /* COQUET_TEST */