1 /*************************************************
2 * Unicode Property Table handler *
3 *************************************************/
8 /* This file contains definitions of the property values that are returned by
9 the UCD access macros. New values that are added for new releases of Unicode
10 should always be at the end of each enum, for backwards compatibility.
12 IMPORTANT: Note also that the specific numeric values of the enums have to be
13 the same as the values that are generated by the maint/MultiStage2.py script,
14 where the equivalent property descriptive names are listed in vectors.
16 ALSO: The specific values of the first two enums are assumed for the table
17 called catposstab in pcre_compile.c. */
19 /* These are the general character categories. */
26 ucp_P, /* Punctuation */
31 /* These are the particular character categories. */
36 ucp_Cn, /* Unassigned */
37 ucp_Co, /* Private use */
38 ucp_Cs, /* Surrogate */
39 ucp_Ll, /* Lower case letter */
40 ucp_Lm, /* Modifier letter */
41 ucp_Lo, /* Other letter */
42 ucp_Lt, /* Title case letter */
43 ucp_Lu, /* Upper case letter */
44 ucp_Mc, /* Spacing mark */
45 ucp_Me, /* Enclosing mark */
46 ucp_Mn, /* Non-spacing mark */
47 ucp_Nd, /* Decimal number */
48 ucp_Nl, /* Letter number */
49 ucp_No, /* Other number */
50 ucp_Pc, /* Connector punctuation */
51 ucp_Pd, /* Dash punctuation */
52 ucp_Pe, /* Close punctuation */
53 ucp_Pf, /* Final punctuation */
54 ucp_Pi, /* Initial punctuation */
55 ucp_Po, /* Other punctuation */
56 ucp_Ps, /* Open punctuation */
57 ucp_Sc, /* Currency symbol */
58 ucp_Sk, /* Modifier symbol */
59 ucp_Sm, /* Mathematical symbol */
60 ucp_So, /* Other symbol */
61 ucp_Zl, /* Line separator */
62 ucp_Zp, /* Paragraph separator */
63 ucp_Zs /* Space separator */
66 /* These are grapheme break properties. Note that the code for processing them
67 assumes that the values are less than 16. If more values are added that take
68 the number to 16 or more, the code will have to be rewritten. */
73 ucp_gbControl, /* 2 */
75 ucp_gbPrepend, /* 4 */
76 ucp_gbSpacingMark, /* 5 */
77 ucp_gbL, /* 6 Hangul syllable type L */
78 ucp_gbV, /* 7 Hangul syllable type V */
79 ucp_gbT, /* 8 Hangul syllable type T */
80 ucp_gbLV, /* 9 Hangul syllable type LV */
81 ucp_gbLVT, /* 10 Hangul syllable type LVT */
82 ucp_gbRegionalIndicator, /* 11 */
86 /* These are the script identifications. */
96 ucp_Canadian_Aboriginal,
150 /* New for Unicode 5.0: */
156 /* New for Unicode 5.1: */
168 /* New for Unicode 5.2: */
171 ucp_Egyptian_Hieroglyphs,
172 ucp_Imperial_Aramaic,
173 ucp_Inscriptional_Pahlavi,
174 ucp_Inscriptional_Parthian,
179 ucp_Old_South_Arabian,
184 /* New for Unicode 6.0.0: */
188 /* New for Unicode 6.1.0: */
190 ucp_Meroitic_Cursive,
191 ucp_Meroitic_Hieroglyphs,
196 /* New for Unicode 7.0.0: */
198 ucp_Caucasian_Albanian,
211 ucp_Old_North_Arabian,