Also, add some documentation and tweak some of the algorithms.
/*----- Help text functions -----------------------------------------------*/
static void usage(FILE *fp)
-{
- pquis(fp, "Usage: $ [-f file] expression\n");
-}
+ { pquis(fp, "Usage: $ [-f file] expression\n"); }
static void version(FILE *fp)
-{
- pquis(fp, "$, version " VERSION "\n");
-}
+ { pquis(fp, "$, version " VERSION "\n"); }
static void help(FILE *fp)
{
size_t sz;
const char *p;
- /* --- Pick the next option off the front --- */
-
+ /* Pick the next option off the front. */
*arg = av + ai;
- if (ai >= ac)
- return (O_EOF);
+ if (ai >= ac) return (O_EOF);
p = av[ai++];
- /* --- Cope with various forms of magic --- */
-
+ /* Cope with various forms of magic. */
if (p[0] != '-') {
if (!p[1]) switch (*p) {
case '&': return (O_AND);
goto bad;
}
- /* --- Now cope with other sorts of weirdies --- *
- *
- * By the end of this, a leading `-' or `--' will have been stripped.
+ /* Now cope with other sorts of weirdies. By the end of this, a leading
+ * `-' or `--' will have been stripped.
*/
-
p++;
- if (!*p)
- goto bad;
- if (*p == '-')
- p++;
+ if (!*p) goto bad;
+ if (*p == '-') p++;
if (!*p) {
- if (ai < ac)
- die("syntax error near `--': rubbish at end of line");
+ if (ai < ac) die("syntax error near `--': rubbish at end of line");
return (O_EOF);
}
- /* --- Now look the word up in my table --- */
-
+ /*Now look the word up in my table. */
sz = strlen(p);
oo = 0;
for (o = opttab; o->name; o++) {
if (strncmp(p, o->name, sz) == 0) {
- if (strlen(o->name) == sz || ((o->f & OF_SHORT) && sz == 1)) {
- oo = o;
- break;
- }
- if (oo) {
+ if (strlen(o->name) == sz || ((o->f & OF_SHORT) && sz == 1))
+ { oo = o; break; }
+ if (oo)
die("ambiguous option name `-%s' (could match `-%s' or `-%s')",
p, oo->name, o->name);
- }
oo = o;
}
}
- if (!oo)
- die("unrecognized option name `-%s'", p);
-
- /* --- Sort out the arguments --- */
+ if (!oo) die("unrecognized option name `-%s'", p);
+ /* Sort out the arguments. */
if (ai + oo->nargs > ac)
die("too few arguments for `-%s' (need %u)", oo->name, oo->nargs);
ai += oo->nargs;
- /* --- Now process the option --- */
-
+ /* Now process the option. */
switch (oo->tag) {
- case O_HELP:
- help(stdout);
- exit(0);
- case O_VERSION:
- version(stdout);
- exit(0);
- case O_USAGE:
- usage(stdout);
- exit(0);
- case O_FILE:
- file = (*arg)[1];
- break;
- default:
- return (oo->tag);
+ case O_HELP: help(stdout); exit(0);
+ case O_VERSION: version(stdout); exit(0);
+ case O_USAGE: usage(stdout); exit(0);
+ case O_FILE: file = (*arg)[1]; break;
+ default: return (oo->tag);
}
continue;
bad:
{
static const char *const eof[] = { "<end>", 0 };
p->t = nextopt(&p->a);
- if (p->t == O_EOF)
- p->a = eof;
+ if (p->t == O_EOF) p->a = eof;
}
static void p_factor(p_ctx *p, node **nn)
if (p->t == O_LPAREN) {
p_next(p);
p_expr(p, nn);
- if (p->t != O_RPAREN)
- die("syntax error near `%s': missing `)'", *p->a);
+ if (p->t != O_RPAREN) die("syntax error near `%s': missing `)'", *p->a);
p_next(p);
} else if (p->t == O_NOT) {
n = xmalloc(sizeof(node_un));
for (;;) {
p_factor(p, nn);
switch (p->t) {
- case O_AND:
- p_next(p);
- default:
- break;
- case O_RPAREN:
- case O_OR:
- case O_EOF:
- return;
+ case O_AND: p_next(p); break;
+ default: break;
+ case O_RPAREN: case O_OR: case O_EOF: return;
}
n = xmalloc(sizeof(node_bin));
n->left = *nn;
node_bin *n;
for (;;) {
p_term(p, nn);
- if (p->t != O_OR)
- break;
+ if (p->t != O_OR) break;
p_next(p);
n = xmalloc(sizeof(node_bin));
n->left = *nn;
exit(EXIT_FAILURE);
}
p_expr(&p, &n);
- if (p.t != O_EOF) {
+ if (p.t != O_EOF)
die("syntax error near `%s': rubbish at end of line (too many `)'s?)",
*p.a);
- }
return (n);
}
die("error opening `%s': %s", file, strerror(errno));
for (;;) {
dstr_reset(&d);
- if (dstr_putline(&d, fp) < 0)
- break;
+ if (dstr_putline(&d, fp) < 0) break;
l = d.buf + d.len;
for (p = q = d.buf; p < l; p++) {
- if (!isalnum((unsigned char)*p))
- continue;
+ if (!isalnum((unsigned char)*p)) continue;
*q++ = tolower((unsigned char)*p);
}
*q = 0;
}
if (ferror(fp) || fclose(fp))
die("error reading `%s': %s", file, strerror(errno));
- for (a = aa_head; a; a = a->next) {
- if (a->func(a->p))
- ok = 1;
- }
+ for (a = aa_head; a; a = a->next)
+ if (a->func(a->p)) ok = 1;
if (fflush(stdout) || ferror(stdout) || fclose(stdout))
die("error writing output: %s", strerror(errno));
if (!ok) pquis(stderr, "$: no matches found\n");
/*----- Data structures ---------------------------------------------------*/
+typedef unsigned countmap[UCHAR_MAX + 1];
+
typedef struct node_gram {
node n;
size_t sz;
- unsigned f[UCHAR_MAX + 1];
+ countmap count;
} node_gram;
/*----- Main code ---------------------------------------------------------*/
* Arguments: @node_gram *n@ = pointer to node to fill in
* @const char *p@ = pointer to string to compile from
*
- * Returns: The length of the string.
+ * Returns: ---
*
* Use: Fills in an anagram node with letter frequencies.
*/
-static size_t compile(node_gram *n, const char *p)
+static void compile(const char *p, countmap count)
{
- size_t len = 0;
- unsigned i;
- memset(n->f, 0, sizeof(n->f));
- while (*p) {
- i = (unsigned char)*p++;
- n->f[i]++;
- len++;
- }
- return (len);
+ memset(count, 0, sizeof(countmap));
+ while (*p) count[(unsigned char)*p++]++;
}
/* --- Node matcher --- */
static int n_gram(node *nn, const char *p, size_t sz)
{
+ /* A string X is a subgram of Y unless, for some character CH, X has more
+ * characters of value CH than Y.
+ *
+ * Special feature: count `.' in the pattern as meaning `don't care'.
+ */
+
node_gram *n = (node_gram *)nn;
- unsigned f[UCHAR_MAX];
+ countmap count;
const char *q;
unsigned i;
- if (n->sz && sz != n->sz)
- return (0);
- memcpy(f, n->f, sizeof(f));
+ if (n->sz && sz != n->sz) return (0);
+ memcpy(count, n->count, sizeof(count));
q = p + sz;
while (p < q) {
i = (unsigned char)*p++;
- if (f[i])
- f[i]--;
- else if (f['.'])
- f['.']--;
- else
- return (0);
+ if (count[i]) count[i]--;
+ else if (count['.']) count['.']--;
+ else return (0);
}
return (1);
}
{
node_gram *n = xmalloc(sizeof(*n));
n->n.func = n_gram;
- n->sz = compile(n, av[0]);
+ compile(av[0], n->count);
+ n->sz = strlen(av[0]);
return (&n->n);
}
{
node_gram *n = xmalloc(sizeof(*n));
n->n.func = n_gram;
+ compile(av[0], n->count);
n->sz = 0;
- compile(n, av[0]);
return (&n->n);
}
link *l;
if (!n->l) return (0);
- for (l = n->l; l; l = l->next)
- puts(l->p);
+ for (l = n->l; l; l = l->next) puts(l->p);
return (1);
}
static int n_longest(node *nn, const char *p, size_t sz)
{
node_longest *n = (node_longest *)nn;
- if (sz < n->len)
- return (0);
- if (sz > n->len)
- empty(n);
+ if (sz < n->len) return (0);
+ if (sz > n->len) empty(n);
insert(n, p, sz);
return (0);
}
static int n_shortest(node *nn, const char *p, size_t sz)
{
node_longest *n = (node_longest *)nn;
- if (n->len && sz > n->len)
- return (0);
- if (!n->len || sz < n->len)
- empty(n);
+ if (n->len && sz > n->len) return (0);
+ if (!n->len || sz < n->len) empty(n);
insert(n, p, sz);
return (0);
}
static int n_mono(node *nn, const char *p, size_t sz)
{
node_mono *n = (node_mono *)nn;
- unsigned map[UCHAR_MAX], imap[UCHAR_MAX];
+ unsigned map[UCHAR_MAX];
const unsigned char *q = n->p;
- int ch, i;
+ unsigned max = 0;
+ int ch;
+
+ /* Monoalphabetic substitution preserves length. */
+ if (sz != n->len) return (0);
- if (sz != n->len)
- return (0);
- memset(map, 0, sizeof(map));
- memset(imap, 0, sizeof(imap));
+ /* Canonicalize this string as we did the pattern. Fail if we find a
+ * mismatch.
+ */
+ memset(map, UCHAR_MAX, sizeof(map));
while (*p) {
ch = *p++;
- i = *q++;
- if (!map[i]) {
- if (imap[ch])
- return (0);
- map[i] = ch;
- imap[ch] = 1;
- } else if (map[i] != ch)
- return (0);
+ if (map[ch] >= max) map[ch] = max++;
+ if (max >= UCHAR_MAX) return (0);
+ if (*q++ != map[ch]) return (0);
}
return (1);
}
node *mono(const char *const *av)
{
unsigned char map[UCHAR_MAX];
- unsigned max;
- int ch;
- const char *p;
+ const char *p = av[0];
unsigned char *q;
+ unsigned max = 0;
+ int ch;
node_mono *n = xmalloc(sizeof(*n));
n->n.func = n_mono;
- memset(map, UCHAR_MAX, sizeof(map));
- max = 0;
- p = av[0];
+ n->p = q = xmalloc(n->len);
n->len = strlen(p);
- q = xmalloc(n->len);
- n->p = q;
+
+ /* Convert the string into the canonical representative of its equivalence
+ * class, specifically the lexically first string which is equivalent under
+ * monoalphabetic substitution. That is: replace the first distinct
+ * character with zero, the second with one, and so on.
+ */
+ memset(map, UCHAR_MAX, sizeof(map));
while (*p) {
ch = *p++;
- if (map[ch] >= max)
- map[ch] = max++;
+ if (map[ch] >= max) map[ch] = max++;
+ assert(max < UCHAR_MAX);
*q++ = map[ch];
}
+
return (&n->n);
}
{
long i, q;
- /* --- Initial guess is half the input length --- */
-
- for (i = q = a; q; q >>= 2)
- i >>= 1;
-
- /* --- Do the main series iteration --- */
+ /* Initial guess is half the input length. */
+ for (i = q = a; q; q >>= 2) i >>= 1;
+ /* Do the main series iteration. */
for (;;) {
- q = i * i - a;
- if (!q || (q < 0 && -q <= 2 * i))
- return (i);
- q /= 2 * i;
- if (!q)
- i--;
- else
- i -= q;
+ q = i*i - a;
+ if (q > -2*i && q <= 0) return (i);
+ q /= 2*i;
+ if (!q) i--;
+ else i -= q;
}
}
{
tcell **cc;
- if (*p++ != c->ch)
- return (0);
- if (!*p)
- return (1);
- c->f = 1;
+ if (*p++ != c->ch) return (0);
+ if (!*p) return (1);
+ c->f = 1;
for (cc = c->hop; *cc; cc++) {
- if ((*cc)->f)
- continue;
- if (track(*cc, p)) {
- c->f = 0;
- return (1);
- }
+ if ((*cc)->f) continue;
+ if (track(*cc, p)) { c->f = 0; return (1); }
}
c->f = 0;
+
return (0);
}
node_track *n = (node_track *)nn;
tcell *c;
- if (!*p)
- return (1);
- for (c = n->c; c->ch; c++) {
- if (track(c, p))
- return (1);
- }
+ if (!*p) return (1);
+ for (c = n->c; c->ch; c++)
+ if (track(c, p)) return (1);
return (0);
}
size_t sz = strlen(p);
tcell *c, **cc, **ccc;
- /* --- Work out the dimensions --- */
-
+ /* Work out the dimensions. */
x = strcspn(p, "/");
if (!p[x]) {
x = isqrt(sz);
- if (x * x != sz)
+ if (x*x != sz)
die("bad trackword `%s': not square, and no line markers", p);
y = x;
}
- if (!x)
- die("bad trackword `%s': no columns", p);
+ if (!x) die("bad trackword `%s': no columns", p);
y = 0;
l = p + sz;
q = p;
for (;;) {
- if (*q == '/')
- q++;
- if (!*q)
- break;
+ if (*q == '/') q++;
+ if (!*q) break;
if (l - q < x)
die("bad trackword `%s': inconsistent line lengths", p);
q += x;
y++;
}
- if (!y)
- die("bad trackword `%s': no rows", p);
-
- /* --- Build the match node --- */
+ if (!y) die("bad trackword `%s': no rows", p);
+ /* Build the match node. */
n = xmalloc(sizeof(*n));
n->n.func = n_track;
n->x = x; n->y = y;
- n->c = xmalloc((x * y + 1) * sizeof(tcell));
+ n->c = xmalloc((x*y + 1)*sizeof(tcell));
q = p;
c = n->c;
for (j = 0; j < y; j++) {
- if (*q == '/')
- q++;
+ if (*q == '/') q++;
for (i = 0; i < x; i++) {
c->ch = *q++;
c->f = 0;
}
c->ch = 0;
- /* --- Now prune out bogus links --- */
-
+ /* Now prune out bogus links. */
for (c = n->c; c->ch; c++) {
ccc = c->hop;
- for (cc = c->hop; *cc; cc++) {
- if (isalpha((unsigned char)(*cc)->ch)) {
- *ccc++ = *cc;
- }
- }
+ for (cc = c->hop; *cc; cc++)
+ if (isalpha((unsigned char)(*cc)->ch)) *ccc++ = *cc;
*ccc++ = 0;
}
- /* --- Done --- */
-
+ /* Done. */
return (&n->n);
}
void ego(const char *p)
{
const char *q = p;
- while (*q) {
- if (*q++ == PATHSEP)
- p = q;
- }
- if (*p == '-')
- p++;
+ while (*q)
+ if (*q++ == PATHSEP) p = q;
+ if (*p == '-') p++;
quis = p;
}
while (*p) {
sz = strcspn(p, "$");
if (sz) {
- if (fwrite(p, 1, sz, fp) < sz)
- return (EOF);
+ if (fwrite(p, 1, sz, fp) < sz) return (EOF);
p += sz;
}
if (*p == '$') {
p++;
if (*p == '$') {
- if (fputc('$', fp) == EOF)
- return (EOF);
+ if (fputc('$', fp) == EOF) return (EOF);
p++;
- } else {
- if (fputs(quis, fp) == EOF)
- return (EOF);
- }
+ } else if (fputs(quis, fp) == EOF)
+ return (EOF);
}
}
return (0);
void *xmalloc(size_t sz)
{
void *p = malloc(sz);
- if (!p)
- die("not enough memory");
+ if (!p) die("not enough memory");
return (p);
}
void *xrealloc(void *p, size_t sz)
{
p = realloc(p, sz);
- if (!p)
- die("not enough memory");
+ if (!p) die("not enough memory");
return (p);
}
size_t rq = d->len + sz;
size_t nsz;
- /* --- If we have enough space, just leave it --- */
-
- if (rq <= d->sz)
- return;
-
- /* --- Grow the buffer --- */
+ /* If we have enough space, just leave it. */
+ if (rq <= d->sz) return;
+ /* Grow the buffer. */
nsz = d->sz;
-
- if (nsz == 0)
- nsz = (DSTR_INITSZ >> 1);
+ if (nsz == 0) nsz = (DSTR_INITSZ >> 1);
do nsz <<= 1; while (nsz < rq);
-
- if (d->buf)
- d->buf = xrealloc(d->buf, nsz);
- else
- d->buf = xmalloc(nsz);
+ if (d->buf) d->buf = xrealloc(d->buf, nsz);
+ else d->buf = xmalloc(nsz);
d->sz = nsz;
}
for (;;) {
- /* --- Read the next byte --- */
-
+ /* Read the next byte. */
ch = getc(fp);
- /* --- End-of-file when no characters read is special --- */
-
- if (ch == EOF && !rd)
- return (EOF);
-
- /* --- Make sure there's some buffer space --- */
+ /* End-of-file when no characters read is special. */
+ if (ch == EOF && !rd) return (EOF);
+ /* Make sure there's some buffer space. */
if (!left) {
d->len = off;
dstr_ensure(d, 1);
left = d->sz - off;
}
- /* --- End-of-file or newline ends the loop --- */
-
+ /* End-of-file or newline ends the loop. */
if (ch == EOF || ch == '\n') {
d->buf[off] = 0;
d->len = off;
return rd;
}
- /* --- Append the character and continue --- */
-
+ /* Append the character and continue. */
d->buf[off++] = ch;
left--; rd++;
}
static int match(const char *p, const char *s)
{
- for (;;) {
- char pch = *p++, pche, sch;
- int sense;
+ char pch, pche, sch;
+ int sense;
+ for (;;) {
+ pch = *p++;
switch (pch) {
+
case '?':
- if (!*s)
- return (0);
- s++;
+ /* If there's no character left, then we fail; otherwise consume
+ * anything and move on.
+ */
+
+ if (!*s++) return (0);
break;
+
case '*':
- if (!*p)
- return (1);
- while (*s) {
- if (match(p, s))
- return (1);
+ /* If there's no more pattern then we win. */
+ if (!*p) return (1);
+
+ /* Try skipping any number of characters from the pattern looking for
+ * a match.
+ */
+ do {
+ if (match(p, s)) return (1);
s++;
- }
+ } while (*s);
return (0);
+
case '[':
- if (!*s)
- return (0);
+ /* Character sets. This is the hard part. */
+
+ /* If there is no character left, then we fail. */
+ if (!*s) return (0);
+
+ /* Fetch the string character, and start munching through the
+ * pattern.
+ */
sch = *s++;
- pch = *p++;
- sense = 1;
- if (pch == '^' || pch == '!') {
- sense = !sense;
- pch = *p++;
- }
+ pch = *p++; sense = 1;
+
+ /* Maybe we need to negate. */
+ if (pch == '^' || pch == '!') { sense = !sense; pch = *p++; }
+
+ /* A close bracket here is literal. Watch for ranges. */
if (pch == ']') {
if (*p == '-' && p[1] && p[1] != ']') {
- pche = p[1];
- p += 2;
- if (pch <= sch && sch <= pche)
- goto class_match;
- } else if (pch == sch)
- goto class_match;
+ pche = p[1]; p += 2;
+ if (pch <= sch && sch <= pche) goto class_match;
+ } else if (pch == sch) goto class_match;
pch = *p++;
}
+
+ /* Work through the other characters and ranges in the set. */
for (;; pch = *p++) {
- if (!pch || pch == ']')
- goto class_nomatch;
+ if (!pch || pch == ']') goto class_nomatch;
if (*p == '-' && p[1] && p[1] != ']') {
- pche = p[1];
- p += 2;
- if (pch <= sch && sch <= pche)
- goto class_match;
- } else if (pch == sch)
- goto class_match;
+ pche = p[1]; p += 2;
+ if (pch <= sch && sch <= pche) goto class_match;
+ } else if (pch == sch) goto class_match;
}
+
class_match:
- if (!sense)
- return (0);
+ /* Found a match. Chew through the rest of the pattern. */
+ if (!sense) return (0);
for (;;) {
- pch = *p++;
- if (!pch)
- return (0);
- if (pch == ']')
- break;
- if (*p == '-' && p[1] && p[1] != ']')
- p += 2;
+ pch = *p++; if (!pch) return (0);
+ if (pch == ']') break;
+ if (*p == '-' && p[1] && p[1] != ']') p += 2;
}
break;
+
class_nomatch:
- if (sense)
- return (0);
+ /* Found the end of the set, so it's a mismatch. */
+ if (sense) return (0);
break;
+
case '\\':
+ /* Treat the next thing literally. */
pch = *p++;
+ /* fall through... */
+
default:
- if (pch != *s)
- return (0);
- if (!pch)
- return (1);
- s++;
+ /* A plain character match.
+ *
+ * Trick: If this is the end of the pattern, we expect the end of
+ * the string.
+ */
+
+ if (pch != *s++) return (0);
+ if (!pch) return (1);
break;
}
}