/* -*-c-*-
- *
- * $Id: mono.c,v 1.1 2003/09/15 02:48:54 mdw Exp $
*
* Monoalphabetic matcher
*
* (c) 2003 Mark Wooding
*/
-/*----- Licensing notice --------------------------------------------------*
+/*----- Licensing notice --------------------------------------------------*
*
* This file is part of Anag: a simple wordgame helper.
*
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
- *
+ *
* Anag is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with Anag; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
-/*----- Revision history --------------------------------------------------*
- *
- * $Log: mono.c,v $
- * Revision 1.1 2003/09/15 02:48:54 mdw
- * Monoalphabetic match filter.
- *
- */
-
/*----- Header files ------------------------------------------------------*/
#include "anag.h"
static int n_mono(node *nn, const char *p, size_t sz)
{
node_mono *n = (node_mono *)nn;
- unsigned map[UCHAR_MAX], imap[UCHAR_MAX];
+ unsigned map[UCHAR_MAX];
const unsigned char *q = n->p;
- int ch, i;
+ unsigned max = 0;
+ int ch;
- if (sz != n->len)
- return (0);
- memset(map, 0, sizeof(map));
- memset(imap, 0, sizeof(imap));
+ /* Monoalphabetic substitution preserves length. */
+ if (sz != n->len) return (0);
+
+ /* Canonicalize this string as we did the pattern. Fail if we find a
+ * mismatch.
+ */
+ memset(map, UCHAR_MAX, sizeof(map));
while (*p) {
ch = *p++;
- i = *q++;
- if (!map[i]) {
- if (imap[ch])
- return (0);
- map[i] = ch;
- imap[ch] = 1;
- } else if (map[i] != ch)
- return (0);
+ if (map[ch] >= max) map[ch] = max++;
+ if (max >= UCHAR_MAX) return (0);
+ if (*q++ != map[ch]) return (0);
}
return (1);
}
node *mono(const char *const *av)
{
unsigned char map[UCHAR_MAX];
- unsigned max;
- int ch;
- const char *p;
+ const char *p = av[0];
unsigned char *q;
+ unsigned max = 0;
+ int ch;
node_mono *n = xmalloc(sizeof(*n));
n->n.func = n_mono;
- memset(map, UCHAR_MAX, sizeof(map));
- max = 0;
- p = av[0];
+ n->p = q = xmalloc(n->len);
n->len = strlen(p);
- q = xmalloc(n->len);
- n->p = q;
+
+ /* Convert the string into the canonical representative of its equivalence
+ * class, specifically the lexically first string which is equivalent under
+ * monoalphabetic substitution. That is: replace the first distinct
+ * character with zero, the second with one, and so on.
+ */
+ memset(map, UCHAR_MAX, sizeof(map));
while (*p) {
ch = *p++;
- if (map[ch] >= max)
- map[ch] = max++;
+ if (map[ch] >= max) map[ch] = max++;
+ assert(max < UCHAR_MAX);
*q++ = map[ch];
}
+
return (&n->n);
}