From cee83243c39b08fb8b4ee227179450f2d9ca2d37 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Vladim=C3=ADr=20Vondru=C5=A1?= <mosra@centrum.cz>
Date: Sat, 3 Feb 2018 12:53:58 +0100
Subject: [PATCH] doxygen: save result count instead of node size to search
 binary.

Makes the code much simpler and allows for up to 256 children and 256
results in each node, instead of limiting its size to 512 bytes (thus
e.g. 64 children and 127 results or some other combination).

I like it when things get better, more flexible and simpler by removing
a bunch of complicated code.
---
 doxygen/dox2html5.py                     |  29 ++++++--------
 doxygen/search.js                        |  14 +++----
 doxygen/test/js-test-data/empty.bin      | Bin 18 -> 18 bytes
 doxygen/test/js-test-data/searchdata.b85 |   2 +-
 doxygen/test/js-test-data/searchdata.bin | Bin 630 -> 630 bytes
 doxygen/test/js-test-data/unicode.bin    | Bin 122 -> 122 bytes
 doxygen/test/test_search.py              |  48 ++++++++++-------------
 7 files changed, 41 insertions(+), 52 deletions(-)

diff --git a/doxygen/dox2html5.py b/doxygen/dox2html5.py
index 5adbb1ee..fbc0d238 100755
--- a/doxygen/dox2html5.py
+++ b/doxygen/dox2html5.py
@@ -54,22 +54,22 @@ import m.math
 import ansilexer
 
 class Trie:
-    #  root  |     |     header       | values | child 1 | child 1 | child 1 |
-    # offset | ... | size/2 | value # |  ...   |   char  | barrier | offset  | ...
-    #  32b   |     |   8b   |    8b   | n*16b  |   8b    |    1b   |   23b   |
+    #  root  |     |     header         | results | child 1 | child 1 | child 1 |
+    # offset | ... | result # | value # |   ...   |  char   | barrier | offset  | ...
+    #  32b   |     |    8b    |   8b    |  n*16b  |   8b    |    1b   |   23b   |
     root_offset_struct = struct.Struct('<I')
     header_struct = struct.Struct('<BB')
-    value_struct = struct.Struct('<H')
+    result_struct = struct.Struct('<H')
     child_struct = struct.Struct('<I')
     child_char_struct = struct.Struct('<B')
 
     def __init__(self):
-        self.values = []
+        self.results = []
         self.children = {}
 
-    def _insert(self, path: bytes, value, lookahead_barriers):
+    def _insert(self, path: bytes, result, lookahead_barriers):
         if not path:
-            self.values += [value]
+            self.results += [result]
             return
 
         char = path[0]
@@ -78,11 +78,11 @@ class Trie:
         if lookahead_barriers and lookahead_barriers[0] == 0:
             lookahead_barriers = lookahead_barriers[1:]
             self.children[char] = (True, self.children[char][1])
-        self.children[char][1]._insert(path[1:], value, [b - 1 for b in lookahead_barriers])
+        self.children[char][1]._insert(path[1:], result, [b - 1 for b in lookahead_barriers])
 
-    def insert(self, path: str, value, lookahead_barriers=[]):
+    def insert(self, path: str, result, lookahead_barriers=[]):
         assert not path.isupper() # to avoid unnecessary duplicates
-        self._insert(path.encode('utf-8'), value, lookahead_barriers)
+        self._insert(path.encode('utf-8'), result, lookahead_barriers)
 
     # Returns offset of the serialized thing in `output`
     def _serialize(self, hashtable, output: bytearray, merge_subtrees) -> int:
@@ -93,11 +93,10 @@ class Trie:
             child_offsets += [(char, child[0], offset)]
 
         # Serialize this node
-        size = int(2 + 2*len(self.values) + 4*len(child_offsets))
         serialized = bytearray()
-        serialized += self.header_struct.pack(int(size/2), len(self.values))
-        for v in self.values:
-            serialized += self.value_struct.pack(v)
+        serialized += self.header_struct.pack(len(self.results), len(self.children))
+        for v in self.results:
+            serialized += self.result_struct.pack(v)
 
         # Serialize child offsets
         for char, lookahead_barrier, abs_offset in child_offsets:
@@ -109,8 +108,6 @@ class Trie:
             serialized += self.child_struct.pack(abs_offset | ((1 if lookahead_barrier else 0) << 23))
             self.child_char_struct.pack_into(serialized, offset + 3, char)
 
-        assert size == len(serialized)
-
         # Subtree merging: if this exact tree is already in the table, return
         # its offset. Otherwise add it and return the new offset.
         # TODO: why hashable = bytes(output[base_offset:] + serialized) didn't work?
diff --git a/doxygen/search.js b/doxygen/search.js
index 0bac7a51..06062610 100644
--- a/doxygen/search.js
+++ b/doxygen/search.js
@@ -188,9 +188,8 @@ var Search = {
         for(; foundPrefix != searchString.length; ++foundPrefix) {
             /* Calculate offset and count of children */
             let offset = this.searchStack[this.searchStack.length - 1];
-            let nodeSize = this.trie.getUint8(offset)*2;
-            let relChildOffset = 2 + this.trie.getUint8(offset + 1)*2;
-            let childCount = (nodeSize - relChildOffset)/4;
+            let relChildOffset = 2 + this.trie.getUint8(offset)*2;
+            let childCount = this.trie.getUint8(offset + 1);
 
             /* Go through all children and find the next offset */
             let childOffset = offset + relChildOffset;
@@ -230,10 +229,10 @@ var Search = {
     },
 
     gatherResults: function(offset, suffixLength, results) {
-        let valueCount = this.trie.getUint8(offset + 1);
+        let resultCount = this.trie.getUint8(offset);
 
         /* Populate the results with all values associated with this node */
-        for(let i = 0; i != valueCount; ++i) {
+        for(let i = 0; i != resultCount; ++i) {
             let index = this.trie.getUint16(offset + (i + 1)*2, true);
             let flags = this.map.getUint8(index*4 + 3);
             let resultOffset = this.map.getUint32(index*4, true) & 0x00ffffff;
@@ -279,9 +278,8 @@ var Search = {
 
         /* Dig deeper. If the child already has enough, return. */
         /* TODO: hmmm. this is helluvalot duplicated code. hmm. */
-        let nodeSize = this.trie.getUint8(offset)*2;
-        let relChildOffset = 2 + this.trie.getUint8(offset + 1)*2;
-        let childCount = (nodeSize - relChildOffset)/4;
+        let relChildOffset = 2 + this.trie.getUint8(offset)*2;
+        let childCount = this.trie.getUint8(offset + 1);
         let childOffset = offset + relChildOffset;
         for(let j = 0; j != childCount; ++j) {
             let offsetBarrier = this.trie.getUint32(childOffset + j*4, true);
diff --git a/doxygen/test/js-test-data/empty.bin b/doxygen/test/js-test-data/empty.bin
index 053f032fcdea537178c8a915da4a53c84b6d060e..6d194001ded57661326bd550013ee951d9ef184b 100644
GIT binary patch
literal 18
TcmeZu4rbtEU|?VYVh9NU5E}sc

literal 18
UcmeZu4rbtEU|?VYVn#3t01zPn`v3p{

diff --git a/doxygen/test/js-test-data/searchdata.b85 b/doxygen/test/js-test-data/searchdata.b85
index 2d1c1614..a9f602a1 100644
--- a/doxygen/test/js-test-data/searchdata.b85
+++ b/doxygen/test/js-test-data/searchdata.b85
@@ -1 +1 @@
-O+!-vL;(N*Dggih0s#R40{{d704W0i2mk;m0{{*H0B!>S6aWBe0s#X60{|cZ04W0iBme*?0{|)j0B!>SFaQ8)0{}Jv0Br*RJOBVX1OWm7LI8j|0{}<>0CEEWPyhgL0{~V40CWQYTmS%L0{~(G0A&IJ1pos8ZU6u&0|0UW04M_hcmM!y0|0&i0BHjNga80-0|1Hu06GK#1OSi#fI0&JmH+@{0|1@?0A~XLqyPYJ0|2T30AU9J8UO%oXaE3qumAvZ0|2%F06GK#006`QfI0&J$^Zap0|3$h0CWTc0RRI41pos8-T(k80|4d#04M_h>;M361pwFp0Aca~0BHgN1^@#90s#PJ0{{jA0A~XL3;_UP0{{{M0B{2U7y$rc0{|WY0Cfof_y7QHXaE3qumAvZBmn?(AOHXmHvj-(VgLXlhX4R!z5oCq;Q#<-76AajG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl
\ No newline at end of file
+O+!-vL;(N*Dggih0RRC2009I504V?g2mk;m009mF0B!&Q6aWBe0RRI400AHX04V?gBme*?00Alh0B!&QFaQ8)00A}t0BryPJOBVX0RaL4LI8j|00Bq<0CE5UPyhgL00CA20CWHWTmS%L00CkE0A&FH1poj6ZU6u&00D9U04M+fcmM!y00Djg0BHaLga80-00D{s06GBy1OSi#fI0vHmH+@{00Eu=0A~OJqyPYJ00F810AT<F8UO%oXaE3qumAvZ00FiD06GBy006`QfI0vH$^Zap00Ghf0CWQY0RRI41poj6-T(k800HIz04M+f>;M3600P(m0Aca~0BHdL1^@s70s#PJ009O80A~OJ3;_UP009yK0B`^S7y$rc00ABW0CfNa_y7QHXaE3qumAvZBmn?(AOHXmHvj-(VgLXlhX4R!z5oCq;Q#<-76AajG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl
\ No newline at end of file
diff --git a/doxygen/test/js-test-data/searchdata.bin b/doxygen/test/js-test-data/searchdata.bin
index 12fe6ea17cdd32c9f0e4fd08ada7e5617a356704..3c44891c33b1fa3edb935a8e0ed2e00e8ffac0d3 100644
GIT binary patch
delta 320
zcmW-cAy0x~6osGjeXq>m2+qI^%wSNGff1Mq4Hh>xHj_o16NA8*I+GYoBoonOGP$ww
z3rI$i$w)F8$X?Fgz3JThJohZqG7V3US{w^1@f8mx*hlNIhl<ceTW}-t$ab9VI;%K4
zhz!->79B!e%4qOhM<*~qP3WOBxI^b~iP~@_)`HfBVW<YiJsS69+>3D|;}WmNjbjbH
z!8LmK><3(sZ^vv09zLy`dIx6S`GO9bhdhsJxurj@u(0!&TJ+1l1N7&Pg}5ryN>tEF
Vw4qRFQ}j_uv{6~ylO|O)^bb`ZDOvyk

delta 320
zcmW-cp-aP26otS0&f9b#1~G_13<g~?$igs9gJm!fOqNZ=8TM0#8zyHknM_8L$s`Q?
z3rt3n$!Ibemc73lUOM-D-#fEB%aiurA^Qe(f5{AyVN1-zrdWhREW>#+dA1(eMr8HK
zwmlOK7>HeHW=piVZHfaJiVj?eBe)XBa3Xqenyliq0w%5+2JSj=H-Wng+<o9O4}p91
zns|aU@f^1=(4Aw|Z~HKQ4c%LG;5|AY&=;RB&nl;t((jn?6V6}%cv0cYL4UEa>Icb6
aQPoP(g3%RA#-&&>dScBO)tK&B%l-kB?J30o

diff --git a/doxygen/test/js-test-data/unicode.bin b/doxygen/test/js-test-data/unicode.bin
index ed2bd5051a6b5edf84a1ef12a6f27dede4a1c005..23aa5fc9c091ceca3a549c5e28df3abb58344b97 100644
GIT binary patch
literal 122
zcmW-ZAr6B;6b0us2n4AjA*muEsi>+#;hHonT2+UmSy8>$LG3|Uvv4nd5o6{}-uv-k
z1YU*=RKSc%SkW0gsv)5ZZ2C94LPR&n=njEX3rm04;jmBmRd&`Gw&C)!CBM#elglyW
HNcY_Vj4u?r

literal 122
zcmW-Zp$ddR6h+VYR@ktZ76g+ZY%rNN*@VU5CYw!}VD@(=&4*|i{VyLAPC55*|J}$0
z3`$}^cz_e0a0(e%;SGmSz?B%I#Rt4lLl7F+QcBpr(YDe4V14OhuINS|_Uo{xqni8R
Ix8SLk8<Z;))c^nh

diff --git a/doxygen/test/test_search.py b/doxygen/test/test_search.py
index 4c1a98dd..e915e135 100755
--- a/doxygen/test/test_search.py
+++ b/doxygen/test/test_search.py
@@ -34,7 +34,7 @@ from dox2html5 import Trie, ResultMap, ResultFlag, serialize_search_data, search
 
 from test import IntegrationTestCase
 
-def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, draw_pipe, show_merged, show_lookahead_barriers, color_map) -> str:
+def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, show_merged, show_lookahead_barriers, color_map) -> str:
     # Visualize where the trees were merged
     if show_merged and base_offset in hashtable:
         return color_map['red'] + '#' + color_map['reset']
@@ -42,27 +42,25 @@ def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, ind
     stats.node_count += 1
 
     out = ''
-    size, value_count = Trie.header_struct.unpack_from(serialized, base_offset)
-    stats.max_node_size = max(size, stats.max_node_size)
-    stats.max_node_values = max(value_count, stats.max_node_values)
+    result_count, child_count = Trie.header_struct.unpack_from(serialized, base_offset)
+    stats.max_node_results = max(result_count, stats.max_node_results)
+    stats.max_node_children = max(child_count, stats.max_node_children)
     offset = base_offset + Trie.header_struct.size
 
-    # print values, if any
-    if value_count:
+    # print results, if any
+    if result_count:
         out += color_map['blue'] + ' ['
-        for i in range(value_count):
+        for i in range(result_count):
             if i: out += color_map['blue']+', '
-            value = Trie.value_struct.unpack_from(serialized, offset)[0]
-            stats.max_node_value_index = max(value, stats.max_node_value_index)
-            out += color_map['cyan'] + str(value)
-            offset += Trie.value_struct.size
+            result = Trie.result_struct.unpack_from(serialized, offset)[0]
+            stats.max_node_result_index = max(result, stats.max_node_result_index)
+            out += color_map['cyan'] + str(result)
+            offset += Trie.result_struct.size
         out += color_map['blue'] + ']'
 
-    # print children
-    if base_offset + size*2 - offset > 4: draw_pipe = True
-    child_count = 0
-    while offset < base_offset + size*2:
-        if child_count or value_count:
+    # print children, if any
+    for i in range(child_count):
+        if result_count or i:
             out += color_map['reset'] + '\n'
             out += color_map['blue'] + indent + color_map['white']
         char = Trie.child_char_struct.unpack_from(serialized, offset + 3)[0]
@@ -77,11 +75,9 @@ def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, ind
         child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x007fffff
         stats.max_node_child_offset = max(child_offset, stats.max_node_child_offset)
         offset += Trie.child_struct.size
-        out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if draw_pipe else ' '), draw_pipe=False, show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
+        out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if child_count > 1 else ' '), show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
         child_count += 1
 
-    stats.max_node_children = max(child_count, stats.max_node_children)
-
     hashtable[base_offset] = True
     return out
 
@@ -108,21 +104,19 @@ def pretty_print_trie(serialized: bytes, show_merged=False, show_lookahead_barri
 
     stats = Empty()
     stats.node_count = 0
-    stats.max_node_size = 0
-    stats.max_node_values = 0
+    stats.max_node_results = 0
     stats.max_node_children = 0
-    stats.max_node_value_index = 0
+    stats.max_node_result_index = 0
     stats.max_node_child_offset = 0
 
-    out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', draw_pipe=False, show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
+    out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
     if out: out = color_map['white'] + out
     stats = """
 node count:             {}
-max node size:          {} bytes
-max node values:        {}
+max node results:       {}
 max node children:      {}
-max node value index:   {}
-max node child offset:  {}""".lstrip().format(stats.node_count, stats.max_node_size*2, stats.max_node_values, stats.max_node_children, stats.max_node_value_index, stats.max_node_child_offset)
+max node result index:  {}
+max node child offset:  {}""".lstrip().format(stats.node_count, stats.max_node_results, stats.max_node_children, stats.max_node_result_index, stats.max_node_child_offset)
     return out, stats
 
 def pretty_print_map(serialized: bytes, colors=False):
-- 
2.30.2