unicode_gc_cat -> unicode_General_Category

author Richard Kettlewell <rjk@greenend.org.uk>

Sun, 18 Nov 2007 12:14:24 +0000 (12:14 +0000)

committer Richard Kettlewell <rjk@greenend.org.uk>

Sun, 18 Nov 2007 12:14:24 +0000 (12:14 +0000)
author Richard Kettlewell <rjk@greenend.org.uk>
Sun, 18 Nov 2007 12:14:24 +0000 (12:14 +0000)
committer Richard Kettlewell <rjk@greenend.org.uk>
Sun, 18 Nov 2007 12:14:24 +0000 (12:14 +0000)
diff --git a/lib/charset.c b/lib/charset.c

index 9d77adcca35f1c462153d7fad35a20266205e94a..c763d1070a675cdbf0cf16a9bae9b98e447663f2 100644 (file)
--- a/lib/charset.c
+++ b/lib/charset.c
@@ -183,7 +183,7 @@ static int combining(int c) {
    if(c < UNICODE_NCHARS) {
      const struct unidata *const ud = &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS];
  
-    return ud->gc == unicode_gc_Mn || ud->ccc != 0;
+    return ud->general_category == unicode_General_Category_Mn || ud->ccc != 0;
    }
    /* Assume unknown characters are noncombining */
    return 0;
diff --git a/lib/unicode.c b/lib/unicode.c

index 032e36ee42f6f037dae0351f22b6173785cbe9a5..618ff06b64ec5a15c908c53015503c0cb721a915 100644 (file)
--- a/lib/unicode.c
+++ b/lib/unicode.c
@@ -577,12 +577,12 @@ int utf32_cmp(const uint32_t *a, const uint32_t *b) {
   * @param Code point
   * @return General_Category property value
   */
-static inline enum unicode_gc_cat utf32__general_category(uint32_t c) {
+static inline enum unicode_General_Category utf32__general_category(uint32_t c) {
    if(c < UNICODE_NCHARS) {
      const struct unidata *const ud = &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS];
-    return ud->gc;
+    return ud->general_category;
    } else
-    return unicode_gc_Cn;
+    return unicode_General_Category_Cn;
  }
  
  /** @brief Check Grapheme_Cluster_Break property
@@ -593,11 +593,11 @@ static int utf32__is_control_or_cr_or_lf(uint32_t c) {
    switch(utf32__general_category(c)) {
    default:
      return 0;
-  case unicode_gc_Zl:
-  case unicode_gc_Zp:
-  case unicode_gc_Cc:
+  case unicode_General_Category_Zl:
+  case unicode_General_Category_Zp:
+  case unicode_General_Category_Cc:
      return 1;
-  case unicode_gc_Cf:
+  case unicode_General_Category_Cf:
      if(c == 0x200C || c == 0x200D)
        return 0;
      return 1;
diff --git a/lib/unidata.c b/lib/unidata.c

index 5ee07ee20bd726a12be66655e2f0137523da87ad..09bb353cf33a8857e96cc9af4faaa46a58e0f8b3 100644 (file)
--- a/lib/unidata.c
+++ b/lib/unidata.c
@@ -2,36 +2,36 @@
  #include <config.h>
  #include "types.h"
  #include "unidata.h"
-#define Cc unicode_gc_Cc
-#define Cf unicode_gc_Cf
-#define Cn unicode_gc_Cn
-#define Co unicode_gc_Co
-#define Cs unicode_gc_Cs
-#define Ll unicode_gc_Ll
-#define Lm unicode_gc_Lm
-#define Lo unicode_gc_Lo
-#define Lt unicode_gc_Lt
-#define Lu unicode_gc_Lu
-#define Mc unicode_gc_Mc
-#define Me unicode_gc_Me
-#define Mn unicode_gc_Mn
-#define Nd unicode_gc_Nd
-#define Nl unicode_gc_Nl
-#define No unicode_gc_No
-#define Pc unicode_gc_Pc
-#define Pd unicode_gc_Pd
-#define Pe unicode_gc_Pe
-#define Pf unicode_gc_Pf
-#define Pi unicode_gc_Pi
-#define Po unicode_gc_Po
-#define Ps unicode_gc_Ps
-#define Sc unicode_gc_Sc
-#define Sk unicode_gc_Sk
-#define Sm unicode_gc_Sm
-#define So unicode_gc_So
-#define Zl unicode_gc_Zl
-#define Zp unicode_gc_Zp
-#define Zs unicode_gc_Zs
+#define Cc unicode_General_Category_Cc
+#define Cf unicode_General_Category_Cf
+#define Cn unicode_General_Category_Cn
+#define Co unicode_General_Category_Co
+#define Cs unicode_General_Category_Cs
+#define Ll unicode_General_Category_Ll
+#define Lm unicode_General_Category_Lm
+#define Lo unicode_General_Category_Lo
+#define Lt unicode_General_Category_Lt
+#define Lu unicode_General_Category_Lu
+#define Mc unicode_General_Category_Mc
+#define Me unicode_General_Category_Me
+#define Mn unicode_General_Category_Mn
+#define Nd unicode_General_Category_Nd
+#define Nl unicode_General_Category_Nl
+#define No unicode_General_Category_No
+#define Pc unicode_General_Category_Pc
+#define Pd unicode_General_Category_Pd
+#define Pe unicode_General_Category_Pe
+#define Pf unicode_General_Category_Pf
+#define Pi unicode_General_Category_Pi
+#define Po unicode_General_Category_Po
+#define Ps unicode_General_Category_Ps
+#define Sc unicode_General_Category_Sc
+#define Sk unicode_General_Category_Sk
+#define Sm unicode_General_Category_Sm
+#define So unicode_General_Category_So
+#define Zl unicode_General_Category_Zl
+#define Zp unicode_General_Category_Zp
+#define Zs unicode_General_Category_Zs
  #define GBCR unicode_Grapheme_Break_CR
  #define GBControl unicode_Grapheme_Break_Control
  #define GBExtend unicode_Grapheme_Break_Extend
diff --git a/lib/unidata.h b/lib/unidata.h

index 5f22127a33e9a2aa00e6b7a07d2ffc478778db15..3688a769e795be512fd4e599f8db4b6bee5a08dc 100644 (file)
--- a/lib/unidata.h
+++ b/lib/unidata.h
@@ -1,37 +1,37 @@
  /* Automatically generated file, see scripts/make-unidata */
  #ifndef UNIDATA_H
  #define UNIDATA_H
-enum unicode_gc_cat {
-  unicode_gc_Cc,
-  unicode_gc_Cf,
-  unicode_gc_Cn,
-  unicode_gc_Co,
-  unicode_gc_Cs,
-  unicode_gc_Ll,
-  unicode_gc_Lm,
-  unicode_gc_Lo,
-  unicode_gc_Lt,
-  unicode_gc_Lu,
-  unicode_gc_Mc,
-  unicode_gc_Me,
-  unicode_gc_Mn,
-  unicode_gc_Nd,
-  unicode_gc_Nl,
-  unicode_gc_No,
-  unicode_gc_Pc,
-  unicode_gc_Pd,
-  unicode_gc_Pe,
-  unicode_gc_Pf,
-  unicode_gc_Pi,
-  unicode_gc_Po,
-  unicode_gc_Ps,
-  unicode_gc_Sc,
-  unicode_gc_Sk,
-  unicode_gc_Sm,
-  unicode_gc_So,
-  unicode_gc_Zl,
-  unicode_gc_Zp,
-  unicode_gc_Zs
+enum unicode_General_Category {
+  unicode_General_Category_Cc,
+  unicode_General_Category_Cf,
+  unicode_General_Category_Cn,
+  unicode_General_Category_Co,
+  unicode_General_Category_Cs,
+  unicode_General_Category_Ll,
+  unicode_General_Category_Lm,
+  unicode_General_Category_Lo,
+  unicode_General_Category_Lt,
+  unicode_General_Category_Lu,
+  unicode_General_Category_Mc,
+  unicode_General_Category_Me,
+  unicode_General_Category_Mn,
+  unicode_General_Category_Nd,
+  unicode_General_Category_Nl,
+  unicode_General_Category_No,
+  unicode_General_Category_Pc,
+  unicode_General_Category_Pd,
+  unicode_General_Category_Pe,
+  unicode_General_Category_Pf,
+  unicode_General_Category_Pi,
+  unicode_General_Category_Po,
+  unicode_General_Category_Ps,
+  unicode_General_Category_Sc,
+  unicode_General_Category_Sk,
+  unicode_General_Category_Sm,
+  unicode_General_Category_So,
+  unicode_General_Category_Zl,
+  unicode_General_Category_Zp,
+  unicode_General_Category_Zs
  };
  enum unicode_Grapheme_Break {
    unicode_Grapheme_Break_CR,
@@ -84,7 +84,7 @@ struct unidata {
    int16_t upper_offset;
    int16_t lower_offset;
    unsigned char ccc;
-  char gc;
+  char general_category;
    uint8_t flags;
    char grapheme_break;
    char word_break;
diff --git a/lib/words.c b/lib/words.c

index 01c9db2c9e719b67d2fbef41cae620937aa7030d..2638ea645c991974696fef808afaf29850f6cbf9 100644 (file)
--- a/lib/words.c
+++ b/lib/words.c
@@ -39,12 +39,12 @@ const char *casefold(const char *ptr) {
    return utf8_casefold_canon(ptr, strlen(ptr), 0);
  }
  
-static enum unicode_gc_cat cat(uint32_t c) {
+static enum unicode_General_Category cat(uint32_t c) {
    if(c < UNICODE_NCHARS) {
      const struct unidata *const ud = &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS];
-    return ud->gc;
+    return ud->general_category;
    } else
-    return unicode_gc_Cn;
+    return unicode_General_Category_Cn;
  }
  
  /* XXX this is a bit kludgy */
@@ -73,18 +73,18 @@ char **words(const char *s, int *nvecp) {
      }
      /* do the rest on category */
      switch(cat(c)) {
-    case unicode_gc_Ll:
-    case unicode_gc_Lm:
-    case unicode_gc_Lo:
-    case unicode_gc_Lt:
-    case unicode_gc_Lu:
-    case unicode_gc_Nd:
-    case unicode_gc_Nl:
-    case unicode_gc_No:
-    case unicode_gc_Sc:
-    case unicode_gc_Sk:
-    case unicode_gc_Sm:
-    case unicode_gc_So:
+    case unicode_General_Category_Ll:
+    case unicode_General_Category_Lm:
+    case unicode_General_Category_Lo:
+    case unicode_General_Category_Lt:
+    case unicode_General_Category_Lu:
+    case unicode_General_Category_Nd:
+    case unicode_General_Category_Nl:
+    case unicode_General_Category_No:
+    case unicode_General_Category_Sc:
+    case unicode_General_Category_Sk:
+    case unicode_General_Category_Sm:
+    case unicode_General_Category_So:
        /* letters, digits and symbols are considered to be part of
         * words */
        if(!in_word) {
@@ -94,15 +94,15 @@ char **words(const char *s, int *nvecp) {
        dynstr_append_bytes(&d, start, s - start);
        break;
  
-    case unicode_gc_Cc:
-    case unicode_gc_Cf:
-    case unicode_gc_Co:
-    case unicode_gc_Cs:
-    case unicode_gc_Zl:
-    case unicode_gc_Zp:
-    case unicode_gc_Zs:
-    case unicode_gc_Pe:
-    case unicode_gc_Ps:
+    case unicode_General_Category_Cc:
+    case unicode_General_Category_Cf:
+    case unicode_General_Category_Co:
+    case unicode_General_Category_Cs:
+    case unicode_General_Category_Zl:
+    case unicode_General_Category_Zp:
+    case unicode_General_Category_Zs:
+    case unicode_General_Category_Pe:
+    case unicode_General_Category_Ps:
      separator:
        if(in_word) {
         dynstr_terminate(&d);
@@ -111,15 +111,15 @@ char **words(const char *s, int *nvecp) {
        }
        break;
  
-    case unicode_gc_Mc:
-    case unicode_gc_Me:
-    case unicode_gc_Mn:
-    case unicode_gc_Pc:
-    case unicode_gc_Pd:
-    case unicode_gc_Pf:
-    case unicode_gc_Pi:
-    case unicode_gc_Po:
-    case unicode_gc_Cn:
+    case unicode_General_Category_Mc:
+    case unicode_General_Category_Me:
+    case unicode_General_Category_Mn:
+    case unicode_General_Category_Pc:
+    case unicode_General_Category_Pd:
+    case unicode_General_Category_Pf:
+    case unicode_General_Category_Pi:
+    case unicode_General_Category_Po:
+    case unicode_General_Category_Cn:
        /* control and punctuation is completely ignored */
        break;
  
diff --git a/scripts/make-unidata b/scripts/make-unidata

index bbb4aff995ac0148f1f25224efe5019581d36c92..81f347d8e544659d49470bc303bbbfe01c0f5720 100755 (executable)
--- a/scripts/make-unidata
+++ b/scripts/make-unidata
@@ -252,9 +252,9 @@ out("/* Automatically generated file, see scripts/make-unidata */\n",
      "#define UNIDATA_H\n");
  
  # TODO choose stable values for General_Category
-out("enum unicode_gc_cat {\n",
+out("enum unicode_General_Category {\n",
      join(",\n",
-        map("  unicode_gc_$_", sort keys %cats)), "\n};\n");
+        map("  unicode_General_Category_$_", sort keys %cats)), "\n};\n");
  
  out("enum unicode_Grapheme_Break {\n",
      join(",\n",
@@ -302,7 +302,7 @@ out("struct unidata {\n",
      "  ".choosetype($minud, $maxud)." upper_offset;\n",
      "  ".choosetype($minld, $maxld)." lower_offset;\n",
      "  ".choosetype(0, $maxccc)." ccc;\n",
-    "  char gc;\n",
+    "  char general_category;\n",
      "  uint8_t flags;\n",
      "  char grapheme_break;\n",
      "  char word_break;\n",
@@ -331,10 +331,14 @@ out("/* Automatically generated file, see scripts/make-unidata */\n",
  
  # Short aliases to keep .c file small
  
-out(map(sprintf("#define %s unicode_gc_%s\n", $_, $_), sort keys %cats));
-out(map(sprintf("#define GB%s unicode_Grapheme_Break_%s\n", $_, $_), sort keys %gbreak));
-out(map(sprintf("#define WB%s unicode_Word_Break_%s\n", $_, $_), sort keys %wbreak));
-out(map(sprintf("#define SB%s unicode_Sentence_Break_%s\n", $_, $_), sort keys %sbreak));
+out(map(sprintf("#define %s unicode_General_Category_%s\n", $_, $_),
+       sort keys %cats));
+out(map(sprintf("#define GB%s unicode_Grapheme_Break_%s\n", $_, $_),
+       sort keys %gbreak));
+out(map(sprintf("#define WB%s unicode_Word_Break_%s\n", $_, $_),
+       sort keys %wbreak));
+out(map(sprintf("#define SB%s unicode_Sentence_Break_%s\n", $_, $_),
+       sort keys %sbreak));
  
  # Names for *_Break properties
  out("const char *const unicode_Grapheme_Break_names[] = {\n",
author	Richard Kettlewell <rjk@greenend.org.uk>
	Sun, 18 Nov 2007 12:14:24 +0000 (12:14 +0000)
committer	Richard Kettlewell <rjk@greenend.org.uk>
	Sun, 18 Nov 2007 12:14:24 +0000 (12:14 +0000)
lib/charset.c		patch \| blob \| blame \| history
lib/unicode.c		patch \| blob \| blame \| history
lib/unidata.c		patch \| blob \| blame \| history
lib/unidata.h		patch \| blob \| blame \| history
lib/words.c		patch \| blob \| blame \| history
scripts/make-unidata		patch \| blob \| blame \| history