From: Mark Wooding Date: Wed, 29 Nov 2017 11:28:18 +0000 (+0000) Subject: Concentrate knowledge about the `pcre' API in one place. X-Git-Tag: 5.2~46 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/disorder/commitdiff_plain/a2e9d1472af7f3693cd7843eb3ff4d8b23423fc8 Concentrate knowledge about the `pcre' API in one place. Introduce a new `regexp' API in the library which knows about `pcre' and actually invokes `pcre_...' functions. Remove this knowledge from everywhere else. The new API is, of course, suspiciously similar to `pcre' in many ways, but there are some differences. * Sizes, lengths, and offsets, are all `size_t' now. In particular, this means that the `ovector' type has changed. * Errors are reported differently: rather than returning a pointer to a static string, `regexp_compile' writes the error string into a caller-provided buffer. Observant readers might notice that these changes make the interface more similar to the `pcre2' API. They'd be right, and this is not a coincidence. But for now, there's no functional change. --- diff --git a/clients/disorder.c b/clients/disorder.c index a96a059..dc86a65 100644 --- a/clients/disorder.c +++ b/clients/disorder.c @@ -36,7 +36,6 @@ #if HAVE_UNISTD_H # include #endif -#include #include #if HAVE_GCRYPT_H # include @@ -880,8 +879,7 @@ int main(int argc, char **argv) { mem_init(); network_init(); /* garbage-collect PCRE's memory */ - pcre_malloc = xmalloc; - pcre_free = xfree; + regexp_setup(); if(!setlocale(LC_CTYPE, "")) disorder_fatal(errno, "error calling setlocale"); if(!setlocale(LC_TIME, "")) disorder_fatal(errno, "error calling setlocale"); while((n = getopt_long(argc, argv, "+hVc:dHlNu:p:", options, 0)) >= 0) { diff --git a/disobedience/disobedience.c b/disobedience/disobedience.c index 1a54e3e..3b5461f 100644 --- a/disobedience/disobedience.c +++ b/disobedience/disobedience.c @@ -20,11 +20,11 @@ */ #include "disobedience.h" +#include "regexp.h" #include "version.h" #include #include -#include #include /* Apologies for the numerous de-consting casts, but GLib et al do not seem to @@ -547,8 +547,7 @@ int main(int argc, char **argv) { mem_init(); /* garbage-collect PCRE's memory */ - pcre_malloc = xmalloc; - pcre_free = xfree; + regexp_setup(); if(!setlocale(LC_CTYPE, "")) disorder_fatal(errno, "error calling setlocale"); gtkok = gtk_init_check(&argc, &argv); while((n = getopt_long(argc, argv, "hVc:dtHC", options, 0)) >= 0) { diff --git a/lib/Makefile.am b/lib/Makefile.am index 22a9097..279f073 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -67,6 +67,7 @@ libdisorder_a_SOURCES=charset.c charsetf.c charset.h \ asprintf.c fprintf.c snprintf.c \ queue.c queue.h \ random.c random.h \ + regexp.c regexp.h \ regsub.c regsub.h \ resample.c resample.h \ rights.c queue-rights.c rights.h \ diff --git a/lib/configuration.c b/lib/configuration.c index 83423c7..6025964 100644 --- a/lib/configuration.c +++ b/lib/configuration.c @@ -36,7 +36,7 @@ #if HAVE_LANGINFO_H # include #endif -#include + #if HAVE_SHLOBJ_H # include #endif @@ -53,6 +53,7 @@ #include "charset.h" #include "defs.h" #include "printf.h" +#include "regexp.h" #include "regsub.h" #include "signame.h" #include "authhash.h" @@ -402,9 +403,10 @@ static int set_namepart(const struct config_state *cs, int nvec, char **vec) { struct namepartlist *npl = ADDRESS(cs->config, struct namepartlist); unsigned reflags; - const char *errstr; - int erroffset, n; - pcre *re; + regexp *re; + char errstr[RXCERR_LEN]; + size_t erroffset; + int n; if(nvec < 3) { disorder_error(0, "%s:%d: namepart needs at least 3 arguments", @@ -417,11 +419,10 @@ static int set_namepart(const struct config_state *cs, return -1; } reflags = nvec >= 5 ? regsub_flags(vec[4]) : 0; - if(!(re = pcre_compile(vec[1], - PCRE_UTF8 - |regsub_compile_options(reflags), - &errstr, &erroffset, 0))) { - disorder_error(0, "%s:%d: compiling regexp /%s/: %s (offset %d)", + if(!(re = regexp_compile(vec[1], regsub_compile_options(reflags), + errstr, sizeof(errstr), &erroffset))) + { + disorder_error(0, "%s:%d: compiling regexp /%s/: %s (offset %zu)", cs->path, cs->line, vec[1], errstr, erroffset); return -1; } @@ -449,10 +450,10 @@ static int set_transform(const struct config_state *cs, const struct conf *whoami, int nvec, char **vec) { struct transformlist *tl = ADDRESS(cs->config, struct transformlist); - pcre *re; + regexp *re; + char errstr[RXCERR_LEN]; unsigned reflags; - const char *errstr; - int erroffset; + size_t erroffset; if(nvec < 3) { disorder_error(0, "%s:%d: transform needs at least 3 arguments", @@ -465,11 +466,10 @@ static int set_transform(const struct config_state *cs, return -1; } reflags = (nvec >= 5 ? regsub_flags(vec[4]) : 0); - if(!(re = pcre_compile(vec[1], - PCRE_UTF8 - |regsub_compile_options(reflags), - &errstr, &erroffset, 0))) { - disorder_error(0, "%s:%d: compiling regexp /%s/: %s (offset %d)", + if(!(re = regexp_compile(vec[1], regsub_compile_options(reflags), + errstr, sizeof(errstr), &erroffset))) + { + disorder_error(0, "%s:%d: compiling regexp /%s/: %s (offset %zu)", cs->path, cs->line, vec[1], errstr, erroffset); return -1; } @@ -572,7 +572,7 @@ static void free_namepartlist(struct config *c, for(n = 0; n < npl->n; ++n) { np = &npl->s[n]; xfree(np->part); - pcre_free(np->re); /* ...whatever pcre_free is set to. */ + regexp_free(np->re); xfree(np->res); xfree(np->replace); xfree(np->context); @@ -589,7 +589,7 @@ static void free_transformlist(struct config *c, for(n = 0; n < tl->n; ++n) { t = &tl->t[n]; xfree(t->type); - pcre_free(t->re); /* ...whatever pcre_free is set to. */ + regexp_free(t->re); xfree(t->replace); xfree(t->context); } diff --git a/lib/configuration.h b/lib/configuration.h index f5b2061..148d08f 100644 --- a/lib/configuration.h +++ b/lib/configuration.h @@ -22,9 +22,8 @@ #ifndef CONFIGURATION_H #define CONFIGURATION_H -#include - #include "speaker-protocol.h" +#include "regexp.h" #include "rights.h" #include "addr.h" @@ -71,7 +70,7 @@ struct collectionlist { /** @brief A track name part */ struct namepart { char *part; /* part */ - pcre *re; /* compiled regexp */ + regexp *re; /* compiled regexp */ char *res; /* regexp as a string */ char *replace; /* replacement string */ char *context; /* context glob */ @@ -89,7 +88,7 @@ struct transform { char *type; /* track or dir */ char *context; /* sort or choose */ char *replace; /* substitution string */ - pcre *re; /* compiled re */ + regexp *re; /* compiled re */ unsigned flags; /* regexp flags */ }; diff --git a/lib/regexp.c b/lib/regexp.c new file mode 100644 index 0000000..cb5ecc6 --- /dev/null +++ b/lib/regexp.c @@ -0,0 +1,72 @@ +/* + * This file is part of DisOrder + * Copyright (C) 2017 Mark Wooding + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/** @file lib/regexp.c + * @brief Regular expressions + */ +#include "common.h" + +#include "regexp.h" +#include "mem.h" + +void regexp_setup(void) +{ + pcre_malloc = xmalloc; + pcre_free = xfree; +} + +regexp *regexp_compile(const char *pat, unsigned f, + char *errbuf, size_t errlen, size_t *erroff_out) +{ + char *p; + const char *e; + int erroff; + regexp *re; + size_t i; + + re = pcre_compile(pat, f, &e, &erroff, 0); + if(!re) { + *erroff_out = erroff; + for(p = errbuf, i = errlen - 1; i && *e; i--) *p++ = *e++; + *p = 0; + } + return re; +} + +int regexp_match(const regexp *re, const char *s, size_t n, unsigned f, + size_t *ov, size_t on) +{ + int rc; + int *myov; + size_t i; + + myov = xmalloc(on*sizeof(*myov)); + rc = pcre_exec(re, 0, s, n, 0, f, myov, on); + for(i = 0; i < on; i++) ov[i] = myov[i]; + xfree(myov); + return rc; +} + +void regexp_free(regexp *re) + { pcre_free(re); } + +/* +Local Variables: +c-basic-offset:2 +comment-column:40 +End: +*/ diff --git a/lib/regexp.h b/lib/regexp.h new file mode 100644 index 0000000..ea9bfaf --- /dev/null +++ b/lib/regexp.h @@ -0,0 +1,51 @@ +/* + * This file is part of DisOrder + * Copyright (C) 2017 Mark Wooding + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +/** @file lib/regexp.h + * @brief Regular expressions + */ +#ifndef REGEXP_H +#define REGEXP_H + +#if defined(HAVE_PCRE_H) +# include + typedef pcre regexp; +# define RXF_CASELESS PCRE_CASELESS +# define RXERR_NOMATCH PCRE_ERROR_NOMATCH +#else +# error "no supported regular expression library found" +#endif + +void regexp_setup(void); + +#define RXCERR_LEN 128 +regexp *regexp_compile(const char *pat, unsigned f, + char *errbuf, size_t errlen, size_t *erroff_out); + +int regexp_match(const regexp *re, const char *s, size_t n, unsigned f, + size_t *ov, size_t on); + +void regexp_free(regexp *re); + +#endif /* REGEXP_H */ + +/* +Local Variables: +c-basic-offset:2 +comment-column:40 +End: +*/ diff --git a/lib/regsub.c b/lib/regsub.c index 1b7cad4..d83a2ae 100644 --- a/lib/regsub.c +++ b/lib/regsub.c @@ -20,8 +20,7 @@ */ #include "common.h" -#include - +#include "regexp.h" #include "regsub.h" #include "mem.h" #include "vector.h" @@ -30,9 +29,9 @@ #define PREMATCH (-1) /* fictitious pre-match substring */ #define POSTMATCH (-2) /* fictitious post-match substring */ -static inline int substring_start(const char attribute((unused)) *subject, - const int *ovector, - int n) { +static inline size_t substring_start(const char attribute((unused)) *subject, + const size_t *ovector, + size_t n) { switch(n) { case PREMATCH: return 0; case POSTMATCH: return ovector[1]; @@ -40,9 +39,9 @@ static inline int substring_start(const char attribute((unused)) *subject, } } -static inline int substring_end(const char *subject, - const int *ovector, - int n) { +static inline size_t substring_end(const char *subject, + const size_t *ovector, + size_t n) { switch(n) { case PREMATCH: return ovector[0]; case POSTMATCH: return strlen(subject); @@ -52,8 +51,8 @@ static inline int substring_end(const char *subject, static void transform_append(struct dynstr *d, const char *subject, - const int *ovector, - int n) { + const size_t *ovector, + size_t n) { int start = substring_start(subject, ovector, n); int end = substring_end(subject, ovector, n); @@ -64,9 +63,9 @@ static void transform_append(struct dynstr *d, static void replace_core(struct dynstr *d, const char *subject, const char *replace, - int rc, - const int *ovector) { - int substr; + size_t rc, + const size_t *ovector) { + size_t substr; while(*replace) { if(*replace == '$') @@ -113,20 +112,21 @@ int regsub_compile_options(unsigned flags) { int options = 0; if(flags & REGSUB_CASE_INDEPENDENT) - options |= PCRE_CASELESS; + options |= RXF_CASELESS; return options; } -const char *regsub(const pcre *re, const char *subject, const char *replace, - unsigned flags) { - int rc, ovector[99], matches; +const char *regsub(const regexp *re, const char *subject, + const char *replace, unsigned flags) { + int rc, matches; + size_t ovector[99]; struct dynstr d; dynstr_init(&d); matches = 0; /* find the next match */ - while((rc = pcre_exec(re, 0, subject, strlen(subject), 0, - 0, ovector, sizeof ovector / sizeof (int))) > 0) { + while((rc = regexp_match(re, subject, strlen(subject), 0, + ovector, sizeof ovector / sizeof (ovector[0]))) > 0) { /* text just before the match */ if(!(flags & REGSUB_REPLACE)) transform_append(&d, subject, ovector, PREMATCH); @@ -142,8 +142,8 @@ const char *regsub(const pcre *re, const char *subject, const char *replace, if(!(flags & REGSUB_GLOBAL)) break; } - if(rc <= 0 && rc != PCRE_ERROR_NOMATCH) { - disorder_error(0, "pcre_exec returned %d, subject '%s'", rc, subject); + if(rc <= 0 && rc != RXERR_NOMATCH) { + disorder_error(0, "regexp_match returned %d, subject '%s'", rc, subject); return 0; } if((flags & REGSUB_MUST_MATCH) && matches == 0) diff --git a/lib/regsub.h b/lib/regsub.h index 1a5aa43..1c9afc7 100644 --- a/lib/regsub.h +++ b/lib/regsub.h @@ -21,7 +21,7 @@ #ifndef REGSUB_H #define REGSUB_H -#include +#include "regexp.h" #define REGSUB_GLOBAL 0x0001 /* global replace */ #define REGSUB_MUST_MATCH 0x0002 /* return 0 if no match */ @@ -34,8 +34,8 @@ unsigned regsub_flags(const char *flags); int regsub_compile_options(unsigned flags); /* convert compile-time options */ -const char *regsub(const pcre *re, const char *subject, const char *replace, - unsigned flags); +const char *regsub(const regexp *re, const char *subject, + const char *replace, unsigned flags); #endif /* REGSUB_H */ diff --git a/lib/trackdb.c b/lib/trackdb.c index 3d3bf93..9e667e2 100644 --- a/lib/trackdb.c +++ b/lib/trackdb.c @@ -25,7 +25,6 @@ #include #include -#include #include #include #include @@ -39,6 +38,7 @@ #include "event.h" #include "mem.h" +#include "regexp.h" #include "kvp.h" #include "log.h" #include "vector.h" @@ -2239,18 +2239,19 @@ fail: * If @p re is NULL then always matches. */ static int track_matches(size_t dl, const char *track, size_t tl, - const pcre *re) { - int ovec[3], rc; + const regexp *re) { + size_t ovec[3]; + int rc; if(!re) return 1; track += dl + 1; tl -= (dl + 1); - switch(rc = pcre_exec(re, 0, track, tl, 0, 0, ovec, 3)) { - case PCRE_ERROR_NOMATCH: return 0; + switch(rc = regexp_match(re, track, tl, 0, ovec, 3)) { + case RXERR_NOMATCH: return 0; default: if(rc < 0) { - disorder_error(0, "pcre_exec returned %d, subject '%s'", rc, track); + disorder_error(0, "regexp_match returned %d, subject '%s'", rc, track); return 0; } return 1; @@ -2266,7 +2267,7 @@ static int track_matches(size_t dl, const char *track, size_t tl, * @return 0 or DB_LOCK_DEADLOCK */ static int do_list(struct vector *v, const char *dir, - enum trackdb_listable what, const pcre *re, DB_TXN *tid) { + enum trackdb_listable what, const regexp *re, DB_TXN *tid) { DBC *cursor; DBT k, d; size_t dl; @@ -2364,7 +2365,7 @@ deadlocked: * @return List of tracks */ char **trackdb_list(const char *dir, int *np, enum trackdb_listable what, - const pcre *re) { + const regexp *re) { DB_TXN *tid; int n; struct vector v; diff --git a/lib/trackdb.h b/lib/trackdb.h index 1a74bb7..de2c677 100644 --- a/lib/trackdb.h +++ b/lib/trackdb.h @@ -21,9 +21,8 @@ #ifndef TRACKDB_H #define TRACKDB_H -#include - #include "event.h" +#include "regexp.h" #include "rights.h" extern const struct cache_type cache_files_type; @@ -125,7 +124,7 @@ enum trackdb_listable { }; char **trackdb_list(const char *dir, int *np, enum trackdb_listable what, - const pcre *rec); + const regexp *rec); /* Return the directories and/or files below DIR. If DIR is a null pointer * then concatenate the listing of all collections. * diff --git a/libtests/t-regsub.c b/libtests/t-regsub.c index a1e2220..70c156e 100644 --- a/libtests/t-regsub.c +++ b/libtests/t-regsub.c @@ -18,9 +18,9 @@ #include "test.h" static void test_regsub(void) { - pcre *re; - const char *errstr; - int erroffset; + regexp *re; + char errstr[RXCERR_LEN]; + size_t erroffset; check_integer(regsub_flags(""), 0); check_integer(regsub_flags("g"), REGSUB_GLOBAL); @@ -28,11 +28,11 @@ static void test_regsub(void) { check_integer(regsub_flags("gi"), REGSUB_GLOBAL|REGSUB_CASE_INDEPENDENT); check_integer(regsub_flags("iiggxx"), REGSUB_GLOBAL|REGSUB_CASE_INDEPENDENT); check_integer(regsub_compile_options(0), 0); - check_integer(regsub_compile_options(REGSUB_CASE_INDEPENDENT), PCRE_CASELESS); - check_integer(regsub_compile_options(REGSUB_GLOBAL|REGSUB_CASE_INDEPENDENT), PCRE_CASELESS); + check_integer(regsub_compile_options(REGSUB_CASE_INDEPENDENT), RXF_CASELESS); + check_integer(regsub_compile_options(REGSUB_GLOBAL|REGSUB_CASE_INDEPENDENT), RXF_CASELESS); check_integer(regsub_compile_options(REGSUB_GLOBAL), 0); - re = pcre_compile("foo", PCRE_UTF8, &errstr, &erroffset, 0); + re = regexp_compile("foo", 0, errstr, sizeof(errstr), &erroffset); assert(re != 0); check_string(regsub(re, "wibble-foo-foo-bar", "spong", 0), "wibble-spong-foo-bar"); @@ -42,7 +42,7 @@ static void test_regsub(void) { "wibble-x-x-bar"); insist(regsub(re, "wibble-x-x-bar", "spong", REGSUB_MUST_MATCH) == 0); - re = pcre_compile("a+", PCRE_UTF8, &errstr, &erroffset, 0); + re = regexp_compile("a+", 0, errstr, sizeof(errstr), &erroffset); assert(re != 0); check_string(regsub(re, "baaaaa", "spong", 0), "bspong"); @@ -53,7 +53,8 @@ static void test_regsub(void) { check_string(regsub(re, "baaaaa", "foo-$&-bar$x", 0), "bfoo-aaaaa-bar$x"); - re = pcre_compile("(a+)(b+)", PCRE_UTF8|PCRE_CASELESS, &errstr, &erroffset, 0); + re = regexp_compile("(a+)(b+)", RXF_CASELESS, + errstr, sizeof(errstr), &erroffset); assert(re != 0); check_string(regsub(re, "foo-aaaabbb-bar", "spong", 0), "foo-spong-bar"); diff --git a/libtests/test.h b/libtests/test.h index 983633b..cfc4ebb 100644 --- a/libtests/test.h +++ b/libtests/test.h @@ -33,7 +33,6 @@ #include #include #include -#include #include #include "mem.h" @@ -61,6 +60,7 @@ #include "addr.h" #include "base64.h" #include "url.h" +#include "regexp.h" #include "regsub.h" extern long long tests, errors; diff --git a/server/disorderd.c b/server/disorderd.c index e3e7d86..06651a1 100644 --- a/server/disorderd.c +++ b/server/disorderd.c @@ -208,8 +208,7 @@ int main(int argc, char **argv) { if(!setlocale(LC_CTYPE, "")) disorder_fatal(errno, "error calling setlocale"); /* garbage-collect PCRE's memory */ - pcre_malloc = xmalloc; - pcre_free = xfree; + regexp_setup(); while((n = getopt_long(argc, argv, "hVc:dfP:NsW", options, 0)) >= 0) { switch(n) { case 'h': help(); diff --git a/server/server.c b/server/server.c index 4f06ab4..844e091 100644 --- a/server/server.c +++ b/server/server.c @@ -684,9 +684,10 @@ static int files_dirs(struct conn *c, char **vec, int nvec, enum trackdb_listable what) { - const char *dir, *re, *errstr; - int erroffset; - pcre *rec; + const char *dir, *re; + char errstr[RXCERR_LEN]; + size_t erroffset; + regexp *rec; char **fvec, *key; switch(nvec) { @@ -717,8 +718,8 @@ static int files_dirs(struct conn *c, } else { /* Cache miss, we'll do the lookup and key != 0 so we'll store the answer * in the cache. */ - if(!(rec = pcre_compile(re, PCRE_CASELESS|PCRE_UTF8, - &errstr, &erroffset, 0))) { + if(!(rec = regexp_compile(re, RXF_CASELESS, + errstr, sizeof(errstr), &erroffset))) { sink_printf(ev_writer_sink(c->w), "550 Error compiling regexp: %s\n", errstr); return 1;