X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/disorder/blobdiff_plain/1a05e381782c0c3135a48cc35dd1e58c2a5d0c25..92db088e5b292c1180a090ed369b9851e933e610:/lib/unidata.h diff --git a/lib/unidata.h b/lib/unidata.h index d8d71d4..1d89322 100644 --- a/lib/unidata.h +++ b/lib/unidata.h @@ -1,4 +1,10 @@ -/* Automatically generated file, see scripts/make-unidata */ +/** @file lib/unidata.h + * @brief Unicode tables + * + * Automatically generated file, see scripts/make-unidata + * + * DO NOT EDIT. + */ #ifndef UNIDATA_H #define UNIDATA_H enum unicode_General_Category { @@ -39,33 +45,43 @@ enum unicode_Grapheme_Break { unicode_Grapheme_Break_Extend, unicode_Grapheme_Break_L, unicode_Grapheme_Break_LF, + unicode_Grapheme_Break_LV, unicode_Grapheme_Break_LVT, unicode_Grapheme_Break_Other, + unicode_Grapheme_Break_Prepend, + unicode_Grapheme_Break_SpacingMark, unicode_Grapheme_Break_T, unicode_Grapheme_Break_V }; extern const char *const unicode_Grapheme_Break_names[]; enum unicode_Word_Break { unicode_Word_Break_ALetter, + unicode_Word_Break_CR, unicode_Word_Break_Extend, unicode_Word_Break_ExtendNumLet, unicode_Word_Break_Format, unicode_Word_Break_Katakana, + unicode_Word_Break_LF, unicode_Word_Break_MidLetter, unicode_Word_Break_MidNum, + unicode_Word_Break_MidNumLet, + unicode_Word_Break_Newline, unicode_Word_Break_Numeric, unicode_Word_Break_Other }; extern const char *const unicode_Word_Break_names[]; enum unicode_Sentence_Break { unicode_Sentence_Break_ATerm, + unicode_Sentence_Break_CR, unicode_Sentence_Break_Close, unicode_Sentence_Break_Extend, unicode_Sentence_Break_Format, + unicode_Sentence_Break_LF, unicode_Sentence_Break_Lower, unicode_Sentence_Break_Numeric, unicode_Sentence_Break_OLetter, unicode_Sentence_Break_Other, + unicode_Sentence_Break_SContinue, unicode_Sentence_Break_STerm, unicode_Sentence_Break_Sep, unicode_Sentence_Break_Sp, @@ -73,13 +89,14 @@ enum unicode_Sentence_Break { }; extern const char *const unicode_Sentence_Break_names[]; enum unicode_flags { - unicode_normalize_before_casefold = 1 + unicode_normalize_before_casefold = 1, + unicode_compatibility_decomposition = 2 }; struct unidata { - const uint32_t *compat; - const uint32_t *canon; + const uint32_t *decomp; const uint32_t *casefold; + const uint32_t *composed; unsigned char ccc; char general_category; uint8_t flags; @@ -88,8 +105,12 @@ struct unidata { char sentence_break; }; extern const struct unidata *const unidata[]; +extern const struct unicode_utf8_row { + uint8_t count; + uint8_t min2, max2; +} unicode_utf8_valid[]; #define UNICODE_NCHARS 1114112 -#define UNICODE_MODULUS 128 +#define UNICODE_MODULUS 16 #define UNICODE_BREAK_START 196608 #define UNICODE_BREAK_END 917504 #define UNICODE_BREAK_TOP 918016