From: Ben Harris <bjh21@bjh21.me.uk>
Date: Tue, 17 Dec 2024 10:58:16 +0000 (+0000)
Subject: Remove ASCII dependency
X-Git-Tag: bedstead-3.251~65
X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~bjharris/git?a=commitdiff_plain;h=602db761c25f8924122daba2bd4a11b420d5a9f9;p=bedstead.git

Remove ASCII dependency

ISO C doesn't guarantee that the execution character set is ASCII, or
anything like it.  Bedstead tries to require only ISO C, but it used
strcmp() to sort glyph names and so the output depended on the sort
order of characters.  Moreover, the code for finding variants of
characters required that '.' have a lower value than any other
character that appeared in glyph names.

To avoid this dependency, we now have a table that assigns values to
each character that can appear in glyph names, and a strcmp-compatible
function that compares two strings after mapping through that table.
This means that our sort order is explicitly specified in the code,
and also provides a convenient place to catch unusual characters in
glyph names.

This change has no effect on the output TTX files (at least on an ASCII
system).  All remaining uses of strcmp() are testing solely for
equality.
---

diff --git a/bedstead.c b/bedstead.c
index 6a8b108..75f27eb 100644
--- a/bedstead.c
+++ b/bedstead.c
@@ -2865,13 +2865,56 @@ time_for_ttx(void)
 	return timestr;
 }
 
+/*
+ * Various parts of the code depend on the precise ordering of glyph
+ * names produced and consumed by these functions.  ISO C doesn't make
+ * many guarantees about the execution character set, so we put some
+ * effort into getting the right ordering.  One useful thing that ISO
+ * C does guarantee is that all characters in the basic execution
+ * character set (which includes everything we use in glyph names) are
+ * non-negative.
+ *
+ * The following table defines the sort order that we use.
+ * Unspecified characters get mapped to zero, which we catch with an
+ * assertion.
+ */
+unsigned char const glyphcharset[CHAR_MAX] = {
+	['\0']=1,
+	['.']=2,
+	['0']=10,['1']=11,['2']=12,['3']=13,['4']=14,
+	['5']=15,['6']=16,['7']=17,['8']=18,['9']=19,
+	['A']=21,['B']=22,['C']=23,['D']=24,['E']=25,['F']=26,['G']=27,
+	['H']=28,['I']=29,['J']=30,['K']=31,['L']=32,['M']=33,['N']=34,
+	['O']=35,['P']=36,['Q']=37,['R']=38,['S']=39,['T']=40,['U']=41,
+	['V']=42,['W']=43,['X']=44,['Y']=45,['Z']=46,
+	['_']=47,
+	['a']=51,['b']=52,['c']=53,['d']=54,['e']=55,['f']=56,['g']=57,
+	['h']=58,['i']=59,['j']=60,['k']=61,['l']=62,['m']=63,['n']=64,
+	['o']=65,['p']=66,['q']=67,['r']=68,['s']=69,['t']=70,['u']=71,
+	['v']=72,['w']=73,['x']=74,['y']=75,['z']=76
+};
+
+static int namecmp(char const *ap, char const *bp)
+{
+	for (;; ap++, bp++) {
+		unsigned int a = *ap, b = *bp;
+
+		assert(a <= CHAR_MAX && b <= CHAR_MAX);
+		assert(glyphcharset[a] != 0 && glyphcharset[b] != 0);
+		if (glyphcharset[a] < glyphcharset[b]) return -1;
+		if (glyphcharset[a] > glyphcharset[b]) return +1;
+		assert(a == b);
+		if (a == '\0') return 0;
+	}
+}
+
 static int
 compare_glyphs_by_name(const void *va, const void *vb)
 {
 	struct glyph const * const *ap = va, * const *bp = vb;
 	struct glyph const *a = *ap, *b = *bp;
 
-	return strcmp(a->name, b->name);
+	return namecmp(a->name, b->name);
 }
 
 static int
@@ -2881,7 +2924,7 @@ compare_glyph_to_name(const void *vn, const void *vg)
 	struct glyph const *g = *gp;
 	char const *name = vn;
 
-	return strcmp(name, g->name);
+	return namecmp(name, g->name);
 }
 
 static struct glyph *
@@ -2919,7 +2962,7 @@ compare_glyphs_by_ffid(const void *va, const void *vb)
 	if ((unsigned)a->unicode < (unsigned)b->unicode) return -1;
 	if ((unsigned)a->unicode > (unsigned)b->unicode) return +1;
 	/* Finally sort by glyph name for an arbitrary stable order. */
-	return strcmp(a->name, b->name);
+	return namecmp(a->name, b->name);
 }
 
 static int nsubrs;
@@ -3557,7 +3600,6 @@ doaltsubs(void)
 		 * with that name followed by a '.'.  By sorting them
 		 * by name, we guarantee that each qualified glyph
 		 * name comes immediately after the unqualified one.
-		 * [Does that depend on ASCII ordering?]
 		 */
 		if (HASDOT(i)) {
 			printf("      <AlternateSet glyph='%s'>",
@@ -4474,7 +4516,7 @@ byunicode(const void *va, const void *vb)
 	/* Cast to unsigned so -1 sorts last. */
 	if ((unsigned)a->unicode < (unsigned)b->unicode) return -1;
 	if ((unsigned)a->unicode > (unsigned)b->unicode) return +1;
-	return strcmp(a->name, b->name);
+	return namecmp(a->name, b->name);
 }
 
 static void