+out(map(sprintf("#define %s unicode_General_Category_%s\n", $_, $_),
+ sort keys %cats));
+out(map(sprintf("#define GB%s unicode_Grapheme_Break_%s\n", $_, $_),
+ sort keys %gbreak));
+out(map(sprintf("#define WB%s unicode_Word_Break_%s\n", $_, $_),
+ sort keys %wbreak));
+out(map(sprintf("#define SB%s unicode_Sentence_Break_%s\n", $_, $_),
+ sort keys %sbreak));
+out("#define NBC unicode_normalize_before_casefold\n");
+out("#define CD unicode_compatibility_decomposition\n");
+
+# Names for *_Break properties
+out("const char *const unicode_Grapheme_Break_names[] = {\n",
+ join(",\n",
+ map(" \"$_\"", sort keys %gbreak)),
+ "\n};\n");
+out("const char *const unicode_Word_Break_names[] = {\n",
+ join(",\n",
+ map(" \"$_\"", sort keys %wbreak)),
+ "\n};\n");
+out("const char *const unicode_Sentence_Break_names[] = {\n",
+ join(",\n",
+ map(" \"$_\"", sort keys %sbreak)),
+ "\n};\n");
+
+our $ddnum = 0;
+our $ddsaved = 0;
+our %ddnums = ();
+my $ddfirst = 1;
+out("static const uint32_t ");
+sub dedupe {
+ my $s = join(",", @_);
+ if(!exists $ddnums{$s}) {
+ if($ddfirst) {
+ $ddfirst = 0;
+ } else {
+ out(",\n");
+ }
+ out("dd$ddnum\[]={$s}");
+ $ddnums{$s} = $ddnum++;
+ } else {
+ ++$ddsaved;
+ }
+ return "dd$ddnums{$s}";
+}
+
+# Generate the decomposition mapping tables.
+print STDERR "> decomposition mappings\n";
+for(my $c = 0; $c <= $max; ++$c) {
+ if(exists $data{$c} && exists $data{$c}->{decomp}) {
+ $data{$c}->{decompsym} = dedupe(@{$data{$c}->{decomp}}, 0);
+ }
+}
+
+print STDERR "> composition mappings\n";
+# First we must generate the mapping of each code point to possible
+# compositions.
+for(my $c = 0; $c <= $max; ++$c) {
+ if(exists $data{$c}
+ && exists $data{$c}->{decomp}
+ && !exists $data{$c}->{compat}
+ && !$data{$c}->{Full_Composition_Exclusion}) {
+ # $c has a non-excluded canonical decomposition, i.e. it is
+ # a primary composite. Find the first code point of the decomposition
+ my $first = ${$data{$c}->{decomp}}[0];
+ if(!exists $data{$first}->{compose}) {
+ $data{$first}->{compose} = [$c];
+ } else {
+ push(@{$data{$first}->{compose}}, $c);
+ }
+ }
+}
+# Then we can generate the tables.
+for(my $c = 0; $c <= $max; ++$c) {
+ if(exists $data{$c} && exists $data{$c}->{compose}) {
+ $data{$c}->{compsym} = dedupe(@{$data{$c}->{compose}}, 0);
+ }
+}
+
+# The case folding table.
+print STDERR "> case-fold mappings\n";
+for(my $c = 0; $c <= $max; ++$c) {
+ if(exists $data{$c} && exists $data{$c}->{casefold}) {
+ $data{$c}->{cfsym} = dedupe(map(hex($_), split(/\s+/,
+ $data{$c}->{casefold})),
+ 0);
+ }
+}
+
+# End of de-dupable arrays
+out(";\n");
+
+# Visit all the $modulus-character blocks in turn and generate the
+# required subtables. As above we spot duplicates to save space. In
+# Unicode 5.0.0 with $modulus=128 and current table data this saves
+# 1372 subtables or at least three and a half megabytes on 32-bit
+# platforms.
+print STDERR "> subtables\n";