lib/SentenceBreakProperty.txt
lib/WordBreakTest.txt
lib/DerivedNormalizationProps.txt
+tests/*.log
.B random-play
If unset or \fByes\fR then random play is enabled. Otherwise it is disabled.
Use \fBdisable\fR rather than setting it directly.
+.PP
+Global preferences starting '_' are read-only (in the sense that you cannot
+modify them; the server may modify them as part of its normal operation). They
+are:
+.TP
+.B _dbversion
+The database version string. This is used by DisOrder to detect when it must
+modify the database after an upgrade.
.SH "LIBAO DRIVER"
.SS "Raw Protocol Players"
Raw protocol players are expected to use the \fBdisorder\fR libao driver.
The protocol is liable to change without notice. You are recommended to check
the implementation before believing this document.
.SH "GENERAL SYNTAX"
-Everything is encoded using UTF-8.
+Everything is encoded using UTF-8. See
+.B "CHARACTER ENCODING"
+below for more detail on character encoding issues.
.PP
-Commands and responses consist of a line followed (depending on the
-command or response) by a message.
+Commands and responses consist of a line perhaps followed (depending on the
+command or response) by a body.
.PP
The line syntax is the same as described in \fBdisorder_config\fR(5) except
that comments are prohibited.
is as defined in
.B "TRACK INFORMATION"
above.
+.SH "CHARACTER ENCODING"
+All data sent by both server and client is encoded using UTF-8. Moreover it
+must be valid UTF-8, i.e. non-minimal sequences are not permitted, nor are
+surrogates, nor are code points outside the Unicode code space.
+.PP
+There are no particular normalization requirements on either side of the
+protocol. The server currently converts internally to NFC, the client must
+normalize the responses returned if it needs some normalized form for further
+processing.
+.PP
+The various characters which divide up lines may not be followed by combining
+characters. For instance all of the following are prohibited:
+.TP
+.B o
+LINE FEED followed by a combining character. For example the sequence
+LINE FEED, COMBINING GRAVE ACCENT is never permitted.
+.TP
+.B o
+APOSTROPHE or QUOTATION MARK followed by a combining character when used to
+delimit fields. For instance a line starting APOSTROPHE, COMBINING CEDILLA
+is prohibited.
+.IP
+Note that such sequences are not prohibited when the quote character cannot be
+interpreted as a field delimiter. For instance APOSTROPHE, REVERSE SOLIDUS,
+APOSTROPHE, COMBINING CEDILLA, APOSTROPHE would be permitted.
+.TP
+.B o
+REVERSE SOLIDUS (BACKSLASH) followed by a combining character in a quoted
+string when it is the first character of an escape sequence. For instance a
+line starting APOSTROPHE, REVERSE SOLIDUS, COMBINING TILDE is prohibited.
+.IP
+As above such sequences are not prohibited when the character is not being used
+to start an escape sequence. For instance APOSTROPHE, REVERSE SOLIDUS,
+REVERSE SOLIDS, COMBINING TILDER, APOSTROPHE is permitted.
+.TP
+.B o
+Any of the field-splitting whitespace characters followed by a combining
+character when not part of a quoted field. For instance a line starting COLON,
+SPACE, COMBINING CANDRABINDU is prohibited.
+.IP
+As above non-delimiter uses are fine.
+.TP
+.B o
+The FULL STOP characters used to quote or delimit a body.
+.PP
+Furthermore none of these characters are permitted to appear in the context of
+a canonical decomposition (i.e. they must still be present when converted to
+NFC). In practice however this is not an issue in Unicode 5.0.
+.PP
+These rules are consistent with the observation that the split() function is
+essentially a naive ASCII parser. The implication is not that these sequences
+never actually appear in the protocol, merely that the server is not required
+to honor them in any useful way nor be consistent between versions: in current
+versions the result will be lines and fields that start with combining
+characters and are not necessarily split where you expect, but future versions
+may remove them, reject them or ignore some or all of the delimiters that have
+following combining characters, and no notice will be given of any change.
.SH "SEE ALSO"
\fBdisorder\fR(1),
\fBtime\fR(2),
void attribute((unused)) *u) {
}
+/* TODO: handle combining characters attached to delimiters in some
+ * sane way (might include reporting an error) */
+
char **split(const char *p,
int *np,
unsigned flags,
return v.vec;
}
+/* TODO handle initial combining characters sanely */
+
const char *quoteutf8(const char *s) {
size_t len = 3 + strlen(s);
const char *t;
#include "trackdb.h"
#include "trackdb-int.h"
#include "trackname.h"
+#include "unicode.h"
static DB_TXN *global_tid;
error(0, "cannot convert track path to UTF-8: %s", path);
continue;
}
+ /* We use NFC track names */
+ if(!(track = utf8_compose_canon(track, strlen(track), 0))) {
+ error(0, "cannot convert track path to NFC: %s", path);
+ continue;
+ }
D(("track %s", track));
/* only tracks with a known player are admitted */
for(n = 0; (n < config->player.n
#include "eventlog.h"
#include "defs.h"
#include "cache.h"
+#include "unicode.h"
#ifndef NONCE_SIZE
# define NONCE_SIZE 16
static int c_set_global(struct conn *c,
char **vec,
int attribute((unused)) nvec) {
+ if(vec[0][0] == '_') {
+ sink_writes(ev_writer_sink(c->w), "550 cannot set internal global preferences\n");
+ return 1;
+ }
trackdb_set_global(vec[0], vec[1], c->who);
sink_printf(ev_writer_sink(c->w), "250 OK\n");
return 1;
int nvec, n;
D(("server command %s", line));
+ /* We force everything into NFC as early as possible */
+ if(!(line = utf8_compose_canon(line, strlen(line), 0))) {
+ sink_writes(ev_writer_sink(c->w), "500 cannot normalize command\n");
+ return 1;
+ }
if(!(vec = split(line, &nvec, SPLIT_QUOTES, command_error, c))) {
sink_writes(ev_writer_sink(c->w), "500 cannot parse command\n");
return 1;
return data;
}
+int trackdb_set_global_tid(const char *name,
+ const char *value,
+ DB_TXN *tid);
+int trackdb_get_global_tid(const char *name,
+ DB_TXN *tid,
+ const char **rp);
+
#endif /* TRACKDB_INT_H */
/*
const struct kvp *p,
int *used_db);
static int trackdb_alltags_tid(DB_TXN *tid, char ***taglistp);
-static int trackdb_get_global_tid(const char *name,
- DB_TXN *tid,
- const char **rp);
static char **trackdb_new_tid(int *ntracksp,
int maxtracks,
DB_TXN *tid);
static const char *home; /* home had better not change */
DB_ENV *trackdb_env; /* db environment */
-DB *trackdb_tracksdb; /* the db itself */
-DB *trackdb_prefsdb; /* preferences */
-DB *trackdb_searchdb; /* the search database */
+
+/** @brief The tracks database
+ * - Keys are UTF-8(NFC(unicode(path name)))
+ * - Values are encoded key-value pairs
+ * - Data is reconstructable data about tracks that currently exist
+ */
+DB *trackdb_tracksdb;
+
+/** @brief The preferences database
+ *
+ * - Keys are UTF-8(NFC(unicode(path name)))
+ * - Values are encoded key-value pairs
+ * - Data is user data about tracks (that might not exist any more)
+ * and cannot be reconstructed
+ */
+DB *trackdb_prefsdb;
+
+/** @brief The search database
+ *
+ * - Keys are UTF-8(NFKC(casefold(search term)))
+ * - Values are UTF-8(NFC(unicode(path name)))
+ * - There can be more than one value per key
+ * - Presence of key,value means that path matches the search terms
+ * - Only tracks fond in @ref tracks_tracksdb are represented here
+ * - This database can be reconstructed, it contains no user data
+ */
+DB *trackdb_searchdb;
+
+/** @brief The tags database
+ *
+ * - Keys are UTF-8(NFKC(casefold(tag)))
+ * - Values are UTF-8(NFC(unicode(path name)))
+ * - There can be more than one value per key
+ * - Presence of key,value means that path matches the tag
+ * - This is always in sync with the tags preference
+ * - This database can be reconstructed, it contains no user data
+ */
DB *trackdb_tagsdb; /* the tags database */
+
+/** @brief The global preferences database
+ * - Keys are UTF-8(NFC(preference))
+ * - Values are global preference values
+ * - Data is user data and cannot be reconstructed
+ */
DB *trackdb_globaldb; /* global preferences */
+
+/** @brief The noticed database
+ * - Keys are 64-bit big-endian timestamps
+ * - Values are UTF-8(NFC(unicode(path name)))
+ * - There can be more than one value per key
+ * - Presence of key,value means that path was added at the given time
+ * - Data cannot be reconstructed (but isn't THAT important)
+ */
DB *trackdb_noticeddb; /* when track noticed */
static pid_t db_deadlock_pid = -1; /* deadlock manager PID */
static pid_t rescan_pid = -1; /* rescanner PID */
if((err = db->set_bt_compare(db, compare)))
fatal(0, "db->set_bt_compare %s: %s", path, db_strerror(err));
if((err = db->open(db, 0, path, 0, dbtype,
- openflags | DB_AUTO_COMMIT, mode)))
- fatal(0, "db->open %s: %s", path, db_strerror(err));
+ openflags | DB_AUTO_COMMIT, mode))) {
+ if((openflags & DB_CREATE) || errno != ENOENT)
+ fatal(0, "db->open %s: %s", path, db_strerror(err));
+ db->close(db, 0);
+ db = 0;
+ }
return db;
}
/* open track databases */
void trackdb_open(void) {
+ int newdb, err;
+
/* sanity checks */
assert(opened == 0);
++opened;
+ /* check the database version first */
+ trackdb_globaldb = open_db("global.db", 0, DB_HASH, 0, 0666);
+ if(trackdb_globaldb) {
+ /* This is an existing database */
+ const char *oldversion;
+
+ oldversion = trackdb_get_global("_dbversion");
+ if(!oldversion)
+ oldversion = "1.x";
+ if(strcmp(oldversion, DBVERSION)) {
+ /* This database needs upgrading. This isn't implemented yet so we just
+ * fail. */
+ fatal(0, "database needs upgrading from %s to %s", oldversion, DBVERSION);
+ }
+ newdb = 0;
+ /* Close the database again, we'll open it property below */
+ if((err = trackdb_globaldb->close(trackdb_globaldb, 0)))
+ fatal(0, "error closing global.db: %s", db_strerror(err));
+ trackdb_globaldb = 0;
+ } else {
+ /* This is a brand new database */
+ newdb = 1;
+ }
/* open the databases */
trackdb_tracksdb = open_db("tracks.db",
DB_RECNUM, DB_BTREE, DB_CREATE, 0666);
trackdb_globaldb = open_db("global.db", 0, DB_HASH, DB_CREATE, 0666);
trackdb_noticeddb = open_db("noticed.db",
DB_DUPSORT, DB_BTREE, DB_CREATE, 0666);
+ /* Stash the database version */
+ if(newdb)
+ trackdb_set_global("_dbversion", DBVERSION, 0);
D(("opened databases"));
}
}
/** @brief notice a possibly new track
+ * @param track NFC UTF-8 track name
+ * @param path Raw path name
+ * @param tid Transaction ID
* @return @c DB_NOTFOUND if new, 0 if already known, @c DB_LOCK_DEADLOCK also
*/
int trackdb_notice_tid(const char *track,
const char *value,
const char *who) {
DB_TXN *tid;
- DBT k, d;
int err;
int state;
- memset(&k, 0, sizeof k);
- memset(&d, 0, sizeof d);
- k.data = (void *)name;
- k.size = strlen(name);
- if(value) {
- d.data = (void *)value;
- d.size = strlen(value);
- }
for(;;) {
tid = trackdb_begin_transaction();
- if(value)
- err = trackdb_globaldb->put(trackdb_globaldb, tid, &k, &d, 0);
- else
- err = trackdb_globaldb->del(trackdb_globaldb, tid, &k, 0);
- if(!err || err == DB_NOTFOUND) break;
- if(err != DB_LOCK_DEADLOCK)
- fatal(0, "error updating database: %s", db_strerror(err));
+ if(!(err = trackdb_set_global_tid(name, value, tid)))
+ break;
trackdb_abort_transaction(tid);
}
trackdb_commit_transaction(tid);
reqtracks = 0;
}
+int trackdb_set_global_tid(const char *name,
+ const char *value,
+ DB_TXN *tid) {
+ DBT k, d;
+ int err;
+
+ memset(&k, 0, sizeof k);
+ memset(&d, 0, sizeof d);
+ k.data = (void *)name;
+ k.size = strlen(name);
+ if(value) {
+ d.data = (void *)value;
+ d.size = strlen(value);
+ }
+ if(value)
+ err = trackdb_globaldb->put(trackdb_globaldb, tid, &k, &d, 0);
+ else
+ err = trackdb_globaldb->del(trackdb_globaldb, tid, &k, 0);
+ if(err == DB_LOCK_DEADLOCK) return err;
+ if(err)
+ fatal(0, "error updating database: %s", db_strerror(err));
+ return 0;
+}
+
const char *trackdb_get_global(const char *name) {
DB_TXN *tid;
int err;
return r;
}
-static int trackdb_get_global_tid(const char *name,
- DB_TXN *tid,
- const char **rp) {
+int trackdb_get_global_tid(const char *name,
+ DB_TXN *tid,
+ const char **rp) {
DBT k, d;
int err;
case DB_LOCK_DEADLOCK:
return err;
default:
- fatal(0, "error updating database: %s", db_strerror(err));
+ fatal(0, "error reading database: %s", db_strerror(err));
}
}
struct ev_source;
+/* Database version string */
+#define DBVERSION "2.0"
+
extern const struct cache_type cache_files_type;
extern unsigned long cache_files_hits, cache_files_misses;
/* Cache entry type and tracking for regexp-based lookups */
copyfile("%s/sounds/slap.ogg" % topsrcdir, trackpath)
def stdtracks():
- maketrack("Joe Bloggs/First Album/01:First track.ogg")
+ # We create some tracks with non-ASCII characters in the name and
+ # we (currently) force UTF-8.
+ #
+ # On a traditional UNIX filesystem, that treats filenames as byte strings
+ # with special significant for '/', this should just work, though the
+ # names will look wrong to ls(1) in a non UTF-8 locale.
+ #
+ # On Apple HFS+ filenames normalized to a decomposed form that isn't quite
+ # NFD, so our attempts to have both normalized and denormalized filenames
+ # is frustrated. Provided we test on traditional filesytsems too this
+ # shouldn't be a problem.
+ # (See http://developer.apple.com/qa/qa2001/qa1173.html)
+
+ # C3 8C = 00CC LATIN CAPITAL LETTER I WITH GRAVE
+ # (in NFC)
+ maketrack("Joe Bloggs/First Album/01:F\xC3\x8Crst track.ogg")
+
maketrack("Joe Bloggs/First Album/02:Second track.ogg")
- maketrack("Joe Bloggs/First Album/03:Third track.ogg")
+
+ # CC 81 = 0301 COMBINING ACUTE ACCENT
+ # (giving an NFD i-acute)
+ maketrack("Joe Bloggs/First Album/03:ThI\xCC\x81rd track.ogg")
+ # ...hopefuly giving C3 8D = 00CD LATIN CAPITAL LETTER I WITH ACUTE
maketrack("Joe Bloggs/First Album/04:Fourth track.ogg")
maketrack("Joe Bloggs/First Album/05:Fifth track.ogg")
maketrack("Joe Bloggs/First Album/05:Fifth track.ogg")
maketrack("Joe Bloggs/Second Album/04:Fourth track.ogg")
maketrack("Joe Bloggs/Second Album/05:Fifth track.ogg")
maketrack("Joe Bloggs/Second Album/05:Fifth track.ogg")
- maketrack("Joe Bloggs/First Album/01:First track.ogg")
- maketrack("Joe Bloggs/First Album/02:Second track.ogg")
- maketrack("Joe Bloggs/First Album/03:Third track.ogg")
- maketrack("Joe Bloggs/First Album/04:Fourth track.ogg")
- maketrack("Joe Bloggs/First Album/05:Fifth track.ogg")
+ maketrack("Joe Bloggs/Third Album/01:First track.ogg")
+ maketrack("Joe Bloggs/Third Album/02:Second track.ogg")
+ maketrack("Joe Bloggs/Third Album/03:Third track.ogg")
+ maketrack("Joe Bloggs/Third Album/04:Fourth track.ogg")
+ maketrack("Joe Bloggs/Third Album/05:Fifth track.ogg")
maketrack("Fred Smith/Boring/01:Dull.ogg")
maketrack("Fred Smith/Boring/02:Tedious.ogg")
maketrack("Fred Smith/Boring/03:Drum Solo.ogg")
def notracks():
pass
-def start(test):
- """start(TEST)
-
+def common_setup():
+ remove_dir(testroot)
+ os.mkdir(testroot)
+ open("%s/config" % testroot, "w").write(
+ """player *.ogg shell 'echo "$TRACK" >> %s/played.log'
+ home %s
+ collection fs UTF-8 %s/tracks
+ scratch %s/scratch.ogg
+ gap 0
+ stopword 01 02 03 04 05 06 07 08 09 10
+ stopword 1 2 3 4 5 6 7 8 9
+ stopword 11 12 13 14 15 16 17 18 19 20
+ stopword 21 22 23 24 25 26 27 28 29 30
+ stopword the a an and to too in on of we i am as im for is
+ username fred
+ password fredpass
+ allow fred fredpass
+ plugins ../plugins
+ player *.mp3 execraw disorder-decode
+ player *.ogg execraw disorder-decode
+ player *.wav execraw disorder-decode
+ player *.flac execraw disorder-decode
+ tracklength *.mp3 disorder-tracklength
+ tracklength *.ogg disorder-tracklength
+ tracklength *.wav disorder-tracklength
+ tracklength *.flac disorder-tracklength
+ """ % (testroot, testroot, testroot, testroot))
+ copyfile("%s/sounds/scratch.ogg" % topsrcdir,
+ "%s/scratch.ogg" % testroot)
+
+def start_daemon(test):
+ """start_daemon(TEST)
Start the daemon for test called TEST."""
global daemon
assert daemon == None
if test == None:
errs = sys.stderr
else:
- errs = open("%s/%s.log" % (testroot, test), "w")
+ errs = open("%s.log" % test, "w")
server = None
print " starting daemon"
daemon = subprocess.Popen(["disorderd",
disorder._configfile = "%s/config" % testroot
disorder._userconf = False
-def stop():
- """stop()
+def stop_daemon():
+ """stop_daemon()
Stop the daemon if it has not stopped already"""
global daemon
tests += 1
if setup == None:
setup = stdtracks
+ common_setup()
setup()
- start(name)
+ start_daemon(name)
try:
try:
test()
failures += 1
print e
finally:
- stop()
+ stop_daemon()
if report:
if failures:
print " FAILED"
daemon = None
testroot = "%s/testroot" % os.getcwd()
topsrcdir = os.path.abspath(os.getenv("topsrcdir"))
-remove_dir(testroot)
-os.mkdir(testroot)
-open("%s/config" % testroot, "w").write(
-"""player *.ogg shell 'echo "$TRACK" >> %s/played.log'
-home %s
-collection fs ASCII %s/tracks
-scratch %s/scratch.ogg
-gap 0
-stopword 01 02 03 04 05 06 07 08 09 10
-stopword 1 2 3 4 5 6 7 8 9
-stopword 11 12 13 14 15 16 17 18 19 20
-stopword 21 22 23 24 25 26 27 28 29 30
-stopword the a an and to too in on of we i am as im for is
-username fred
-password fredpass
-allow fred fredpass
-plugins ../plugins
-player *.mp3 execraw disorder-decode
-player *.ogg execraw disorder-decode
-player *.wav execraw disorder-decode
-player *.flac execraw disorder-decode
-tracklength *.mp3 disorder-tracklength
-tracklength *.ogg disorder-tracklength
-tracklength *.wav disorder-tracklength
-tracklength *.flac disorder-tracklength
-""" % (testroot, testroot, testroot, testroot))
-copyfile("%s/sounds/scratch.ogg" % topsrcdir,
- "%s/scratch.ogg" % testroot)
def test():
"""Just start the server and then stop it a few seconds later"""
- time.sleep(5)
+ time.sleep(2)
if __name__ == '__main__':
dtest.run(test)