X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/disorder/blobdiff_plain/35b651f0cc0999deae42c92f2cbca3ecf88fe157..4265e5d362914f3732b4035dcf67162e525e0142:/lib/unidata.h diff --git a/lib/unidata.h b/lib/unidata.h index 16ff5fa..1d89322 100644 --- a/lib/unidata.h +++ b/lib/unidata.h @@ -1,54 +1,117 @@ -/* Automatically generated file, see scripts/make-unidata */ +/** @file lib/unidata.h + * @brief Unicode tables + * + * Automatically generated file, see scripts/make-unidata + * + * DO NOT EDIT. + */ #ifndef UNIDATA_H #define UNIDATA_H -enum unicode_gc_cat { - unicode_gc_Cc, - unicode_gc_Cf, - unicode_gc_Cn, - unicode_gc_Co, - unicode_gc_Cs, - unicode_gc_Ll, - unicode_gc_Lm, - unicode_gc_Lo, - unicode_gc_Lt, - unicode_gc_Lu, - unicode_gc_Mc, - unicode_gc_Me, - unicode_gc_Mn, - unicode_gc_Nd, - unicode_gc_Nl, - unicode_gc_No, - unicode_gc_Pc, - unicode_gc_Pd, - unicode_gc_Pe, - unicode_gc_Pf, - unicode_gc_Pi, - unicode_gc_Po, - unicode_gc_Ps, - unicode_gc_Sc, - unicode_gc_Sk, - unicode_gc_Sm, - unicode_gc_So, - unicode_gc_Zl, - unicode_gc_Zp, - unicode_gc_Zs +enum unicode_General_Category { + unicode_General_Category_Cc, + unicode_General_Category_Cf, + unicode_General_Category_Cn, + unicode_General_Category_Co, + unicode_General_Category_Cs, + unicode_General_Category_Ll, + unicode_General_Category_Lm, + unicode_General_Category_Lo, + unicode_General_Category_Lt, + unicode_General_Category_Lu, + unicode_General_Category_Mc, + unicode_General_Category_Me, + unicode_General_Category_Mn, + unicode_General_Category_Nd, + unicode_General_Category_Nl, + unicode_General_Category_No, + unicode_General_Category_Pc, + unicode_General_Category_Pd, + unicode_General_Category_Pe, + unicode_General_Category_Pf, + unicode_General_Category_Pi, + unicode_General_Category_Po, + unicode_General_Category_Ps, + unicode_General_Category_Sc, + unicode_General_Category_Sk, + unicode_General_Category_Sm, + unicode_General_Category_So, + unicode_General_Category_Zl, + unicode_General_Category_Zp, + unicode_General_Category_Zs }; +enum unicode_Grapheme_Break { + unicode_Grapheme_Break_CR, + unicode_Grapheme_Break_Control, + unicode_Grapheme_Break_Extend, + unicode_Grapheme_Break_L, + unicode_Grapheme_Break_LF, + unicode_Grapheme_Break_LV, + unicode_Grapheme_Break_LVT, + unicode_Grapheme_Break_Other, + unicode_Grapheme_Break_Prepend, + unicode_Grapheme_Break_SpacingMark, + unicode_Grapheme_Break_T, + unicode_Grapheme_Break_V +}; +extern const char *const unicode_Grapheme_Break_names[]; +enum unicode_Word_Break { + unicode_Word_Break_ALetter, + unicode_Word_Break_CR, + unicode_Word_Break_Extend, + unicode_Word_Break_ExtendNumLet, + unicode_Word_Break_Format, + unicode_Word_Break_Katakana, + unicode_Word_Break_LF, + unicode_Word_Break_MidLetter, + unicode_Word_Break_MidNum, + unicode_Word_Break_MidNumLet, + unicode_Word_Break_Newline, + unicode_Word_Break_Numeric, + unicode_Word_Break_Other +}; +extern const char *const unicode_Word_Break_names[]; +enum unicode_Sentence_Break { + unicode_Sentence_Break_ATerm, + unicode_Sentence_Break_CR, + unicode_Sentence_Break_Close, + unicode_Sentence_Break_Extend, + unicode_Sentence_Break_Format, + unicode_Sentence_Break_LF, + unicode_Sentence_Break_Lower, + unicode_Sentence_Break_Numeric, + unicode_Sentence_Break_OLetter, + unicode_Sentence_Break_Other, + unicode_Sentence_Break_SContinue, + unicode_Sentence_Break_STerm, + unicode_Sentence_Break_Sep, + unicode_Sentence_Break_Sp, + unicode_Sentence_Break_Upper +}; +extern const char *const unicode_Sentence_Break_names[]; enum unicode_flags { unicode_normalize_before_casefold = 1, - unicode_grapheme_break_extend = 2 + unicode_compatibility_decomposition = 2 }; struct unidata { - const uint32_t *compat; - const uint32_t *canon; + const uint32_t *decomp; const uint32_t *casefold; - int16_t upper_offset; - int16_t lower_offset; + const uint32_t *composed; unsigned char ccc; - char gc; + char general_category; uint8_t flags; + char grapheme_break; + char word_break; + char sentence_break; }; extern const struct unidata *const unidata[]; -#define UNICODE_NCHARS 195200 -#define UNICODE_MODULUS 128 +extern const struct unicode_utf8_row { + uint8_t count; + uint8_t min2, max2; +} unicode_utf8_valid[]; +#define UNICODE_NCHARS 1114112 +#define UNICODE_MODULUS 16 +#define UNICODE_BREAK_START 196608 +#define UNICODE_BREAK_END 917504 +#define UNICODE_BREAK_TOP 918016 #endif