From 86be0c3098d8e27807bc618126b8babfe7f93928 Mon Sep 17 00:00:00 2001 Message-Id: <86be0c3098d8e27807bc618126b8babfe7f93928.1716352610.git.mdw@distorted.org.uk> From: Mark Wooding Date: Sat, 29 Dec 2007 16:48:25 +0000 Subject: [PATCH] build default stopword list into server Organization: Straylight/Edgeware From: rjk@greenend.org.uk <> --- CHANGES | 3 ++ debian/etc.disorder.config | 7 ---- doc/disorder_config.5.in | 3 ++ examples/config.sample.in | 10 ------ lib/configuration.c | 67 ++++++++++++++++++++++++++++++++++++++ tests/search.py | 2 ++ 6 files changed, 75 insertions(+), 17 deletions(-) diff --git a/CHANGES b/CHANGES index d558d9d..6b1fc20 100644 --- a/CHANGES +++ b/CHANGES @@ -6,6 +6,9 @@ See ChangeLog.d/* for detailed revision history. Users are now stored in the database rather than a configuration file. +The server now has a built-in list of stopwords, so only additions need +be mentioned in the configuration file. + ** Web Interface The web interface now uses cookies to remember user identity, and allows diff --git a/debian/etc.disorder.config b/debian/etc.disorder.config index f5fd10c..55e42d1 100644 --- a/debian/etc.disorder.config +++ b/debian/etc.disorder.config @@ -44,12 +44,5 @@ mixer /dev/mixer # The channel to control channel pcm -# Stopwords (i.e. ignored words) for the track search facility. -stopword 01 02 03 04 05 06 07 08 09 10 -stopword 1 2 3 4 5 6 7 8 9 -stopword 11 12 13 14 15 16 17 18 19 20 -stopword 21 22 23 24 25 26 27 28 29 30 -stopword the a an and to too in on of we i am as im for is - # include debconf configuration include /etc/disorder/conf.debconf diff --git a/doc/disorder_config.5.in b/doc/disorder_config.5.in index 3cb36af..8bc1963 100644 --- a/doc/disorder_config.5.in +++ b/doc/disorder_config.5.in @@ -571,6 +571,9 @@ over track names. .IP If \fBstopword\fR is used without arguments then the list of stopwords is cleared. +.IP +There is a default set of stopwords built in, but this option can be used to +augment or replace that list. .TP .B tracklength \fIPATTERN\fR \fIMODULE\fR Specifies the module used to calculate the length of files matching diff --git a/examples/config.sample.in b/examples/config.sample.in index 1bf4088..6f214be 100644 --- a/examples/config.sample.in +++ b/examples/config.sample.in @@ -44,13 +44,3 @@ user jukebox mixer /dev/mixer # The channel to control channel pcm - -# URL of the web interface -url http://jukebox.anjou.terraraq.org.uk/ - -# Stopwords (i.e. ignored words) for the track search facility. -stopword 01 02 03 04 05 06 07 08 09 10 -stopword 1 2 3 4 5 6 7 8 9 -stopword 11 12 13 14 15 16 17 18 19 20 -stopword 21 22 23 24 25 26 27 28 29 30 -stopword the a an and to too in on of we i am as im for is diff --git a/lib/configuration.c b/lib/configuration.c index 7ed9276..bb30c2f 100644 --- a/lib/configuration.c +++ b/lib/configuration.c @@ -1044,12 +1044,77 @@ static int config_include(struct config *c, const char *path) { return ret; } +static const char *const default_stopwords[] = { + "stopword", + + "01", + "02", + "03", + "04", + "05", + "06", + "07", + "08", + "09", + "1", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "2", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "3", + "30", + "4", + "5", + "6", + "7", + "8", + "9", + "a", + "am", + "an", + "and", + "as", + "for", + "i", + "im", + "in", + "is", + "of", + "on", + "the", + "to", + "too", + "we", +}; +#define NDEFAULT_STOPWORDS (sizeof default_stopwords / sizeof *default_stopwords) + /** @brief Make a new default configuration */ static struct config *config_default(void) { struct config *c = xmalloc(sizeof *c); const char *logname; struct passwd *pw; + struct config_state cs; + cs.path = ""; + cs.line = 0; + cs.config = c; /* Strings had better be xstrdup'd as they will get freed at some point. */ c->gap = 2; c->history = 60; @@ -1082,6 +1147,8 @@ static struct config *config_default(void) { c->dbversion = 2; c->cookie_login_lifetime = 86400; c->cookie_key_lifetime = 86400 * 7; + if(config_set(&cs, (int)NDEFAULT_STOPWORDS, (char **)default_stopwords)) + exit(1); return c; } diff --git a/tests/search.py b/tests/search.py index 3be6ec9..dab3939 100755 --- a/tests/search.py +++ b/tests/search.py @@ -89,6 +89,8 @@ def test(): check_search_results([u"fi\u0300rst"], first) check_search_results([u"THI\u0301RD"], third) check_search_results([u"thI\u0301rd"], third) + # stopwords shouldn't show up + check_search_results(["01"], []) if failures > 0: sys.exit(1) -- [mdw]