static int n_mono(node *nn, const char *p, size_t sz)
{
node_mono *n = (node_mono *)nn;
- unsigned map[UCHAR_MAX], imap[UCHAR_MAX];
+ unsigned map[UCHAR_MAX];
const unsigned char *q = n->p;
- int ch, i;
+ unsigned max = 0;
+ int ch;
+
+ /* Monoalphabetic substitution preserves length. */
+ if (sz != n->len) return (0);
- if (sz != n->len)
- return (0);
- memset(map, 0, sizeof(map));
- memset(imap, 0, sizeof(imap));
+ /* Canonicalize this string as we did the pattern. Fail if we find a
+ * mismatch.
+ */
+ memset(map, UCHAR_MAX, sizeof(map));
while (*p) {
ch = *p++;
- i = *q++;
- if (!map[i]) {
- if (imap[ch])
- return (0);
- map[i] = ch;
- imap[ch] = 1;
- } else if (map[i] != ch)
- return (0);
+ if (map[ch] >= max) map[ch] = max++;
+ if (max >= UCHAR_MAX) return (0);
+ if (*q++ != map[ch]) return (0);
}
return (1);
}
node *mono(const char *const *av)
{
unsigned char map[UCHAR_MAX];
- unsigned max;
- int ch;
- const char *p;
+ const char *p = av[0];
unsigned char *q;
+ unsigned max = 0;
+ int ch;
node_mono *n = xmalloc(sizeof(*n));
n->n.func = n_mono;
- memset(map, UCHAR_MAX, sizeof(map));
- max = 0;
- p = av[0];
+ n->p = q = xmalloc(n->len);
n->len = strlen(p);
- q = xmalloc(n->len);
- n->p = q;
+
+ /* Convert the string into the canonical representative of its equivalence
+ * class, specifically the lexically first string which is equivalent under
+ * monoalphabetic substitution. That is: replace the first distinct
+ * character with zero, the second with one, and so on.
+ */
+ memset(map, UCHAR_MAX, sizeof(map));
while (*p) {
ch = *p++;
- if (map[ch] >= max)
- map[ch] = max++;
+ if (map[ch] >= max) map[ch] = max++;
+ assert(max < UCHAR_MAX);
*q++ = map[ch];
}
+
return (&n->n);
}