From: Ben Harris Date: Tue, 17 Dec 2024 10:58:16 +0000 (+0000) Subject: Remove ASCII dependency X-Git-Tag: bedstead-3.251~65 X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~bjharris/git?a=commitdiff_plain;h=602db761c25f8924122daba2bd4a11b420d5a9f9;p=bedstead.git Remove ASCII dependency ISO C doesn't guarantee that the execution character set is ASCII, or anything like it. Bedstead tries to require only ISO C, but it used strcmp() to sort glyph names and so the output depended on the sort order of characters. Moreover, the code for finding variants of characters required that '.' have a lower value than any other character that appeared in glyph names. To avoid this dependency, we now have a table that assigns values to each character that can appear in glyph names, and a strcmp-compatible function that compares two strings after mapping through that table. This means that our sort order is explicitly specified in the code, and also provides a convenient place to catch unusual characters in glyph names. This change has no effect on the output TTX files (at least on an ASCII system). All remaining uses of strcmp() are testing solely for equality. --- diff --git a/bedstead.c b/bedstead.c index 6a8b108..75f27eb 100644 --- a/bedstead.c +++ b/bedstead.c @@ -2865,13 +2865,56 @@ time_for_ttx(void) return timestr; } +/* + * Various parts of the code depend on the precise ordering of glyph + * names produced and consumed by these functions. ISO C doesn't make + * many guarantees about the execution character set, so we put some + * effort into getting the right ordering. One useful thing that ISO + * C does guarantee is that all characters in the basic execution + * character set (which includes everything we use in glyph names) are + * non-negative. + * + * The following table defines the sort order that we use. + * Unspecified characters get mapped to zero, which we catch with an + * assertion. + */ +unsigned char const glyphcharset[CHAR_MAX] = { + ['\0']=1, + ['.']=2, + ['0']=10,['1']=11,['2']=12,['3']=13,['4']=14, + ['5']=15,['6']=16,['7']=17,['8']=18,['9']=19, + ['A']=21,['B']=22,['C']=23,['D']=24,['E']=25,['F']=26,['G']=27, + ['H']=28,['I']=29,['J']=30,['K']=31,['L']=32,['M']=33,['N']=34, + ['O']=35,['P']=36,['Q']=37,['R']=38,['S']=39,['T']=40,['U']=41, + ['V']=42,['W']=43,['X']=44,['Y']=45,['Z']=46, + ['_']=47, + ['a']=51,['b']=52,['c']=53,['d']=54,['e']=55,['f']=56,['g']=57, + ['h']=58,['i']=59,['j']=60,['k']=61,['l']=62,['m']=63,['n']=64, + ['o']=65,['p']=66,['q']=67,['r']=68,['s']=69,['t']=70,['u']=71, + ['v']=72,['w']=73,['x']=74,['y']=75,['z']=76 +}; + +static int namecmp(char const *ap, char const *bp) +{ + for (;; ap++, bp++) { + unsigned int a = *ap, b = *bp; + + assert(a <= CHAR_MAX && b <= CHAR_MAX); + assert(glyphcharset[a] != 0 && glyphcharset[b] != 0); + if (glyphcharset[a] < glyphcharset[b]) return -1; + if (glyphcharset[a] > glyphcharset[b]) return +1; + assert(a == b); + if (a == '\0') return 0; + } +} + static int compare_glyphs_by_name(const void *va, const void *vb) { struct glyph const * const *ap = va, * const *bp = vb; struct glyph const *a = *ap, *b = *bp; - return strcmp(a->name, b->name); + return namecmp(a->name, b->name); } static int @@ -2881,7 +2924,7 @@ compare_glyph_to_name(const void *vn, const void *vg) struct glyph const *g = *gp; char const *name = vn; - return strcmp(name, g->name); + return namecmp(name, g->name); } static struct glyph * @@ -2919,7 +2962,7 @@ compare_glyphs_by_ffid(const void *va, const void *vb) if ((unsigned)a->unicode < (unsigned)b->unicode) return -1; if ((unsigned)a->unicode > (unsigned)b->unicode) return +1; /* Finally sort by glyph name for an arbitrary stable order. */ - return strcmp(a->name, b->name); + return namecmp(a->name, b->name); } static int nsubrs; @@ -3557,7 +3600,6 @@ doaltsubs(void) * with that name followed by a '.'. By sorting them * by name, we guarantee that each qualified glyph * name comes immediately after the unqualified one. - * [Does that depend on ASCII ordering?] */ if (HASDOT(i)) { printf(" ", @@ -4474,7 +4516,7 @@ byunicode(const void *va, const void *vb) /* Cast to unsigned so -1 sorts last. */ if ((unsigned)a->unicode < (unsigned)b->unicode) return -1; if ((unsigned)a->unicode > (unsigned)b->unicode) return +1; - return strcmp(a->name, b->name); + return namecmp(a->name, b->name); } static void