+/** @file lib/unidata.h
+ * @brief Unicode tables
+ *
+ * Automatically generated file, see scripts/make-unidata
+ *
+ * DO NOT EDIT.
+ */
#ifndef UNIDATA_H
#define UNIDATA_H
-enum unicode_gc_cat {
- unicode_gc_Cc,
- unicode_gc_Cf,
- unicode_gc_Cn,
- unicode_gc_Co,
- unicode_gc_Cs,
- unicode_gc_Ll,
- unicode_gc_Lm,
- unicode_gc_Lo,
- unicode_gc_Lt,
- unicode_gc_Lu,
- unicode_gc_Mc,
- unicode_gc_Me,
- unicode_gc_Mn,
- unicode_gc_Nd,
- unicode_gc_Nl,
- unicode_gc_No,
- unicode_gc_Pc,
- unicode_gc_Pd,
- unicode_gc_Pe,
- unicode_gc_Pf,
- unicode_gc_Pi,
- unicode_gc_Po,
- unicode_gc_Ps,
- unicode_gc_Sc,
- unicode_gc_Sk,
- unicode_gc_Sm,
- unicode_gc_So,
- unicode_gc_Zl,
- unicode_gc_Zp,
- unicode_gc_Zs
+enum unicode_General_Category {
+ unicode_General_Category_Cc,
+ unicode_General_Category_Cf,
+ unicode_General_Category_Cn,
+ unicode_General_Category_Co,
+ unicode_General_Category_Cs,
+ unicode_General_Category_Ll,
+ unicode_General_Category_Lm,
+ unicode_General_Category_Lo,
+ unicode_General_Category_Lt,
+ unicode_General_Category_Lu,
+ unicode_General_Category_Mc,
+ unicode_General_Category_Me,
+ unicode_General_Category_Mn,
+ unicode_General_Category_Nd,
+ unicode_General_Category_Nl,
+ unicode_General_Category_No,
+ unicode_General_Category_Pc,
+ unicode_General_Category_Pd,
+ unicode_General_Category_Pe,
+ unicode_General_Category_Pf,
+ unicode_General_Category_Pi,
+ unicode_General_Category_Po,
+ unicode_General_Category_Ps,
+ unicode_General_Category_Sc,
+ unicode_General_Category_Sk,
+ unicode_General_Category_Sm,
+ unicode_General_Category_So,
+ unicode_General_Category_Zl,
+ unicode_General_Category_Zp,
+ unicode_General_Category_Zs
};
+enum unicode_Grapheme_Break {
+ unicode_Grapheme_Break_CR,
+ unicode_Grapheme_Break_Control,
+ unicode_Grapheme_Break_Extend,
+ unicode_Grapheme_Break_L,
+ unicode_Grapheme_Break_LF,
+ unicode_Grapheme_Break_LV,
+ unicode_Grapheme_Break_LVT,
+ unicode_Grapheme_Break_Other,
+ unicode_Grapheme_Break_Prepend,
+ unicode_Grapheme_Break_SpacingMark,
+ unicode_Grapheme_Break_T,
+ unicode_Grapheme_Break_V
+};
+extern const char *const unicode_Grapheme_Break_names[];
+enum unicode_Word_Break {
+ unicode_Word_Break_ALetter,
+ unicode_Word_Break_CR,
+ unicode_Word_Break_Extend,
+ unicode_Word_Break_ExtendNumLet,
+ unicode_Word_Break_Format,
+ unicode_Word_Break_Katakana,
+ unicode_Word_Break_LF,
+ unicode_Word_Break_MidLetter,
+ unicode_Word_Break_MidNum,
+ unicode_Word_Break_MidNumLet,
+ unicode_Word_Break_Newline,
+ unicode_Word_Break_Numeric,
+ unicode_Word_Break_Other
+};
+extern const char *const unicode_Word_Break_names[];
+enum unicode_Sentence_Break {
+ unicode_Sentence_Break_ATerm,
+ unicode_Sentence_Break_CR,
+ unicode_Sentence_Break_Close,
+ unicode_Sentence_Break_Extend,
+ unicode_Sentence_Break_Format,
+ unicode_Sentence_Break_LF,
+ unicode_Sentence_Break_Lower,
+ unicode_Sentence_Break_Numeric,
+ unicode_Sentence_Break_OLetter,
+ unicode_Sentence_Break_Other,
+ unicode_Sentence_Break_SContinue,
+ unicode_Sentence_Break_STerm,
+ unicode_Sentence_Break_Sep,
+ unicode_Sentence_Break_Sp,
+ unicode_Sentence_Break_Upper
+};
+extern const char *const unicode_Sentence_Break_names[];
+enum unicode_flags {
+ unicode_normalize_before_casefold = 1,
+ unicode_compatibility_decomposition = 2
+};
+
struct unidata {
- enum unicode_gc_cat gc;
- int ccc;
- int upper_offset;
- int lower_offset;
+ const uint32_t *decomp;
+ const uint32_t *casefold;
+ const uint32_t *composed;
+ unsigned char ccc;
+ char general_category;
+ uint8_t flags;
+ char grapheme_break;
+ char word_break;
+ char sentence_break;
};
extern const struct unidata *const unidata[];
-#define UNICODE_NCHARS 195328
+extern const struct unicode_utf8_row {
+ uint8_t count;
+ uint8_t min2, max2;
+} unicode_utf8_valid[];
+#define UNICODE_NCHARS 1114112
+#define UNICODE_MODULUS 16
+#define UNICODE_BREAK_START 196608
+#define UNICODE_BREAK_END 917504
+#define UNICODE_BREAK_TOP 918016
#endif