chiark
/
gitweb
/
~mdw
/
disorder
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
more tag normalization work
[disorder]
/
server
/
trackdb.c
diff --git
a/server/trackdb.c
b/server/trackdb.c
index 5ffa19e4844bde5007a0b677362c1ab24ccf1201..d1878c01fa4f0ac3b98330f255b71cf3176271ab 100644
(file)
--- a/
server/trackdb.c
+++ b/
server/trackdb.c
@@
-1883,6
+1883,7
@@
static const char *checktag(const char *s) {
char **trackdb_search(char **wordlist, int nwordlist, int *ntracks) {
const char **w, *best = 0, *tag;
char **twords, **tags;
char **trackdb_search(char **wordlist, int nwordlist, int *ntracks) {
const char **w, *best = 0, *tag;
char **twords, **tags;
+ char *istag;
int i, j, n, err, what;
DBC *cursor = 0;
DBT k, d;
int i, j, n, err, what;
DBC *cursor = 0;
DBT k, d;
@@
-1896,6
+1897,7
@@
char **trackdb_search(char **wordlist, int nwordlist, int *ntracks) {
*ntracks = 0; /* for early returns */
/* normalize all the words */
w = xmalloc(nwordlist * sizeof (char *));
*ntracks = 0; /* for early returns */
/* normalize all the words */
w = xmalloc(nwordlist * sizeof (char *));
+ istag = xmalloc_noptr(nwordlist);
for(n = 0; n < nwordlist; ++n) {
uint32_t *w32;
size_t nw32;
for(n = 0; n < nwordlist; ++n) {
uint32_t *w32;
size_t nw32;
@@
-1904,7
+1906,8
@@
char **trackdb_search(char **wordlist, int nwordlist, int *ntracks) {
if(checktag(w[n])) {
++ntags; /* count up tags */
/* Normalize the tag */
if(checktag(w[n])) {
++ntags; /* count up tags */
/* Normalize the tag */
- w[n] = normalize_tag(w[n], strlen(w[n]));
+ w[n] = normalize_tag(w[n] + 4, strlen(w[n] + 4));
+ istag[n] = 1;
} else {
/* Normalize the search term by removing combining characters */
if(!(w32 = utf8_to_utf32(w[n], strlen(w[n]), &nw32)))
} else {
/* Normalize the search term by removing combining characters */
if(!(w32 = utf8_to_utf32(w[n], strlen(w[n]), &nw32)))
@@
-1912,11
+1915,12
@@
char **trackdb_search(char **wordlist, int nwordlist, int *ntracks) {
nw32 = remove_combining_chars(w32, nw32);
if(!(w[n] = utf32_to_utf8(w32, nw32, 0)))
return 0;
nw32 = remove_combining_chars(w32, nw32);
if(!(w[n] = utf32_to_utf8(w32, nw32, 0)))
return 0;
+ istag[n] = 0;
}
}
/* find the longest non-stopword */
for(n = 0; n < nwordlist; ++n)
}
}
/* find the longest non-stopword */
for(n = 0; n < nwordlist; ++n)
- if(!
stopword(w[n]) && !checktag
(w[n]))
+ if(!
istag[n] && !stopword
(w[n]))
if(!best || strlen(w[n]) > strlen(best))
best = w[n];
/* TODO: we should at least in principal be able to identify the word or tag
if(!best || strlen(w[n]) > strlen(best))
best = w[n];
/* TODO: we should at least in principal be able to identify the word or tag
@@
-1925,7
+1929,7
@@
char **trackdb_search(char **wordlist, int nwordlist, int *ntracks) {
if(ntags && !best) {
/* Only tags are listed. We limit to the first and narrow down with the
* rest. */
if(ntags && !best) {
/* Only tags are listed. We limit to the first and narrow down with the
* rest. */
- best =
checktag(w[0])
;
+ best =
istag[0] ? w[0] : 0
;
db = trackdb_tagsdb;
dbname = "tags";
} else if(best) {
db = trackdb_tagsdb;
dbname = "tags";
} else if(best) {
@@
-1974,7
+1978,8
@@
char **trackdb_search(char **wordlist, int nwordlist, int *ntracks) {
twords = track_to_words(v.vec[n], p);
tags = parsetags(kvp_get(p, "tags"));
for(i = 0; i < nwordlist; ++i) {
twords = track_to_words(v.vec[n], p);
tags = parsetags(kvp_get(p, "tags"));
for(i = 0; i < nwordlist; ++i) {
- if((tag = checktag(w[i]))) {
+ if(istag[i]) {
+ tag = w[i];
/* Track must have this tag */
for(j = 0; tags[j]; ++j)
if(!strcmp(tag, tags[j])) break; /* tag found */
/* Track must have this tag */
for(j = 0; tags[j]; ++j)
if(!strcmp(tag, tags[j])) break; /* tag found */