unicode_Grapheme_Break_Extend,
unicode_Grapheme_Break_L,
unicode_Grapheme_Break_LF,
+ unicode_Grapheme_Break_LV,
unicode_Grapheme_Break_LVT,
unicode_Grapheme_Break_Other,
unicode_Grapheme_Break_T,
};
extern const char *const unicode_Sentence_Break_names[];
enum unicode_flags {
- unicode_normalize_before_casefold = 1
+ unicode_normalize_before_casefold = 1,
+ unicode_compatibility_decomposition = 2
};
struct unidata {
- const uint32_t *compat;
- const uint32_t *canon;
+ const uint32_t *decomp;
const uint32_t *casefold;
+ const uint32_t *composed;
unsigned char ccc;
char general_category;
uint8_t flags;
char sentence_break;
};
extern const struct unidata *const unidata[];
+extern const struct unicode_utf8_row {
+ uint8_t count;
+ uint8_t min2, max2;
+} unicode_utf8_valid[];
#define UNICODE_NCHARS 1114112
-#define UNICODE_MODULUS 128
+#define UNICODE_MODULUS 16
#define UNICODE_BREAK_START 196608
#define UNICODE_BREAK_END 917504
#define UNICODE_BREAK_TOP 918016