From cee83243c39b08fb8b4ee227179450f2d9ca2d37 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Sat, 3 Feb 2018 12:53:58 +0100 Subject: [PATCH] doxygen: save result count instead of node size to search binary. Makes the code much simpler and allows for up to 256 children and 256 results in each node, instead of limiting its size to 512 bytes (thus e.g. 64 children and 127 results or some other combination). I like it when things get better, more flexible and simpler by removing a bunch of complicated code. --- doxygen/dox2html5.py | 29 ++++++-------- doxygen/search.js | 14 +++---- doxygen/test/js-test-data/empty.bin | Bin 18 -> 18 bytes doxygen/test/js-test-data/searchdata.b85 | 2 +- doxygen/test/js-test-data/searchdata.bin | Bin 630 -> 630 bytes doxygen/test/js-test-data/unicode.bin | Bin 122 -> 122 bytes doxygen/test/test_search.py | 48 ++++++++++------------- 7 files changed, 41 insertions(+), 52 deletions(-) diff --git a/doxygen/dox2html5.py b/doxygen/dox2html5.py index 5adbb1ee..fbc0d238 100755 --- a/doxygen/dox2html5.py +++ b/doxygen/dox2html5.py @@ -54,22 +54,22 @@ import m.math import ansilexer class Trie: - # root | | header | values | child 1 | child 1 | child 1 | - # offset | ... | size/2 | value # | ... | char | barrier | offset | ... - # 32b | | 8b | 8b | n*16b | 8b | 1b | 23b | + # root | | header | results | child 1 | child 1 | child 1 | + # offset | ... | result # | value # | ... | char | barrier | offset | ... + # 32b | | 8b | 8b | n*16b | 8b | 1b | 23b | root_offset_struct = struct.Struct(' int: @@ -93,11 +93,10 @@ class Trie: child_offsets += [(char, child[0], offset)] # Serialize this node - size = int(2 + 2*len(self.values) + 4*len(child_offsets)) serialized = bytearray() - serialized += self.header_struct.pack(int(size/2), len(self.values)) - for v in self.values: - serialized += self.value_struct.pack(v) + serialized += self.header_struct.pack(len(self.results), len(self.children)) + for v in self.results: + serialized += self.result_struct.pack(v) # Serialize child offsets for char, lookahead_barrier, abs_offset in child_offsets: @@ -109,8 +108,6 @@ class Trie: serialized += self.child_struct.pack(abs_offset | ((1 if lookahead_barrier else 0) << 23)) self.child_char_struct.pack_into(serialized, offset + 3, char) - assert size == len(serialized) - # Subtree merging: if this exact tree is already in the table, return # its offset. Otherwise add it and return the new offset. # TODO: why hashable = bytes(output[base_offset:] + serialized) didn't work? diff --git a/doxygen/search.js b/doxygen/search.js index 0bac7a51..06062610 100644 --- a/doxygen/search.js +++ b/doxygen/search.js @@ -188,9 +188,8 @@ var Search = { for(; foundPrefix != searchString.length; ++foundPrefix) { /* Calculate offset and count of children */ let offset = this.searchStack[this.searchStack.length - 1]; - let nodeSize = this.trie.getUint8(offset)*2; - let relChildOffset = 2 + this.trie.getUint8(offset + 1)*2; - let childCount = (nodeSize - relChildOffset)/4; + let relChildOffset = 2 + this.trie.getUint8(offset)*2; + let childCount = this.trie.getUint8(offset + 1); /* Go through all children and find the next offset */ let childOffset = offset + relChildOffset; @@ -230,10 +229,10 @@ var Search = { }, gatherResults: function(offset, suffixLength, results) { - let valueCount = this.trie.getUint8(offset + 1); + let resultCount = this.trie.getUint8(offset); /* Populate the results with all values associated with this node */ - for(let i = 0; i != valueCount; ++i) { + for(let i = 0; i != resultCount; ++i) { let index = this.trie.getUint16(offset + (i + 1)*2, true); let flags = this.map.getUint8(index*4 + 3); let resultOffset = this.map.getUint32(index*4, true) & 0x00ffffff; @@ -279,9 +278,8 @@ var Search = { /* Dig deeper. If the child already has enough, return. */ /* TODO: hmmm. this is helluvalot duplicated code. hmm. */ - let nodeSize = this.trie.getUint8(offset)*2; - let relChildOffset = 2 + this.trie.getUint8(offset + 1)*2; - let childCount = (nodeSize - relChildOffset)/4; + let relChildOffset = 2 + this.trie.getUint8(offset)*2; + let childCount = this.trie.getUint8(offset + 1); let childOffset = offset + relChildOffset; for(let j = 0; j != childCount; ++j) { let offsetBarrier = this.trie.getUint32(childOffset + j*4, true); diff --git a/doxygen/test/js-test-data/empty.bin b/doxygen/test/js-test-data/empty.bin index 053f032fcdea537178c8a915da4a53c84b6d060e..6d194001ded57661326bd550013ee951d9ef184b 100644 GIT binary patch literal 18 TcmeZu4rbtEU|?VYVh9NU5E}sc literal 18 UcmeZu4rbtEU|?VYVn#3t01zPn`v3p{ diff --git a/doxygen/test/js-test-data/searchdata.b85 b/doxygen/test/js-test-data/searchdata.b85 index 2d1c1614..a9f602a1 100644 --- a/doxygen/test/js-test-data/searchdata.b85 +++ b/doxygen/test/js-test-data/searchdata.b85 @@ -1 +1 @@ -O+!-vL;(N*Dggih0s#R40{{d704W0i2mk;m0{{*H0B!>S6aWBe0s#X60{|cZ04W0iBme*?0{|)j0B!>SFaQ8)0{}Jv0Br*RJOBVX1OWm7LI8j|0{}<>0CEEWPyhgL0{~V40CWQYTmS%L0{~(G0A&IJ1pos8ZU6u&0|0UW04M_hcmM!y0|0&i0BHjNga80-0|1Hu06GK#1OSi#fI0&JmH+@{0|1@?0A~XLqyPYJ0|2T30AU9J8UO%oXaE3qumAvZ0|2%F06GK#006`QfI0&J$^Zap0|3$h0CWTc0RRI41pos8-T(k80|4d#04M_h>;M361pwFp0Aca~0BHgN1^@#90s#PJ0{{jA0A~XL3;_UP0{{{M0B{2U7y$rc0{|WY0Cfof_y7QHXaE3qumAvZBmn?(AOHXmHvj-(VgLXlhX4R!z5oCq;Q#<-76AajG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl \ No newline at end of file +O+!-vL;(N*Dggih0RRC2009I504V?g2mk;m009mF0B!&Q6aWBe0RRI400AHX04V?gBme*?00Alh0B!&QFaQ8)00A}t0BryPJOBVX0RaL4LI8j|00Bq<0CE5UPyhgL00CA20CWHWTmS%L00CkE0A&FH1poj6ZU6u&00D9U04M+fcmM!y00Djg0BHaLga80-00D{s06GBy1OSi#fI0vHmH+@{00Eu=0A~OJqyPYJ00F810AT;M3600P(m0Aca~0BHdL1^@s70s#PJ009O80A~OJ3;_UP009yK0B`^S7y$rc00ABW0CfNa_y7QHXaE3qumAvZBmn?(AOHXmHvj-(VgLXlhX4R!z5oCq;Q#<-76AajG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl \ No newline at end of file diff --git a/doxygen/test/js-test-data/searchdata.bin b/doxygen/test/js-test-data/searchdata.bin index 12fe6ea17cdd32c9f0e4fd08ada7e5617a356704..3c44891c33b1fa3edb935a8e0ed2e00e8ffac0d3 100644 GIT binary patch delta 320 zcmW-cAy0x~6osGjeXq>m2+qI^%wSNGff1Mq4Hh>xHj_o16NA8*I+GYoBoonOGP$ww z3rI$i$w)F8$X?Fgz3JThJohZqG7V3US{w^1@f8mx*hlNIhl79B!e%4qOhM<*~qP3WOBxI^b~iP~@_)`HfBVW3D|;}WmNjbjbH z!8LmK><3(sZ^vv09zLy`dIx6S`GO9bhdhsJxurj@u(0!&TJ+1l1N7&Pg}5ryN>tEF Vw4qRFQ}j_uv{6~ylO|O)^bb`ZDOvyk delta 320 zcmW-cp-aP26otS0&f9b#1~G_13#+dA1(eMr8HK zwmlOK7>HeHW=piVZHfaJiVj?eBe)XBa3Xqenyliq0w%5+2JSj=H-Wng+Icb6 aQPoP(g3%RA#-&&>dScBO)tK&B%l-kB?J30o diff --git a/doxygen/test/js-test-data/unicode.bin b/doxygen/test/js-test-data/unicode.bin index ed2bd5051a6b5edf84a1ef12a6f27dede4a1c005..23aa5fc9c091ceca3a549c5e28df3abb58344b97 100644 GIT binary patch literal 122 zcmW-ZAr6B;6b0us2n4AjA*muEsi>+#;hHonT2+UmSy8>$LG3|Uvv4nd5o6{}-uv-k z1YU*=RKSc%SkW0gsv)5ZZ2C94LPR&n=njEX3rm04;jmBmRd&`Gw&C)!CBM#elglyW HNcY_Vj4u?r literal 122 zcmW-Zp$ddR6h+VYR@ktZ76g+ZY%rNN*@VU5CYw!}VD@(=&4*|i{VyLAPC55*|J}$0 z3`$}^cz_e0a0(e%;SGmSz?B%I#Rt4lLl7F+QcBpr(YDe4V14OhuINS|_Uo{xqni8R Ix8SLk8 str: +def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, show_merged, show_lookahead_barriers, color_map) -> str: # Visualize where the trees were merged if show_merged and base_offset in hashtable: return color_map['red'] + '#' + color_map['reset'] @@ -42,27 +42,25 @@ def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, ind stats.node_count += 1 out = '' - size, value_count = Trie.header_struct.unpack_from(serialized, base_offset) - stats.max_node_size = max(size, stats.max_node_size) - stats.max_node_values = max(value_count, stats.max_node_values) + result_count, child_count = Trie.header_struct.unpack_from(serialized, base_offset) + stats.max_node_results = max(result_count, stats.max_node_results) + stats.max_node_children = max(child_count, stats.max_node_children) offset = base_offset + Trie.header_struct.size - # print values, if any - if value_count: + # print results, if any + if result_count: out += color_map['blue'] + ' [' - for i in range(value_count): + for i in range(result_count): if i: out += color_map['blue']+', ' - value = Trie.value_struct.unpack_from(serialized, offset)[0] - stats.max_node_value_index = max(value, stats.max_node_value_index) - out += color_map['cyan'] + str(value) - offset += Trie.value_struct.size + result = Trie.result_struct.unpack_from(serialized, offset)[0] + stats.max_node_result_index = max(result, stats.max_node_result_index) + out += color_map['cyan'] + str(result) + offset += Trie.result_struct.size out += color_map['blue'] + ']' - # print children - if base_offset + size*2 - offset > 4: draw_pipe = True - child_count = 0 - while offset < base_offset + size*2: - if child_count or value_count: + # print children, if any + for i in range(child_count): + if result_count or i: out += color_map['reset'] + '\n' out += color_map['blue'] + indent + color_map['white'] char = Trie.child_char_struct.unpack_from(serialized, offset + 3)[0] @@ -77,11 +75,9 @@ def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, ind child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x007fffff stats.max_node_child_offset = max(child_offset, stats.max_node_child_offset) offset += Trie.child_struct.size - out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if draw_pipe else ' '), draw_pipe=False, show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map) + out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if child_count > 1 else ' '), show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map) child_count += 1 - stats.max_node_children = max(child_count, stats.max_node_children) - hashtable[base_offset] = True return out @@ -108,21 +104,19 @@ def pretty_print_trie(serialized: bytes, show_merged=False, show_lookahead_barri stats = Empty() stats.node_count = 0 - stats.max_node_size = 0 - stats.max_node_values = 0 + stats.max_node_results = 0 stats.max_node_children = 0 - stats.max_node_value_index = 0 + stats.max_node_result_index = 0 stats.max_node_child_offset = 0 - out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', draw_pipe=False, show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map) + out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map) if out: out = color_map['white'] + out stats = """ node count: {} -max node size: {} bytes -max node values: {} +max node results: {} max node children: {} -max node value index: {} -max node child offset: {}""".lstrip().format(stats.node_count, stats.max_node_size*2, stats.max_node_values, stats.max_node_children, stats.max_node_value_index, stats.max_node_child_offset) +max node result index: {} +max node child offset: {}""".lstrip().format(stats.node_count, stats.max_node_results, stats.max_node_children, stats.max_node_result_index, stats.max_node_child_offset) return out, stats def pretty_print_map(serialized: bytes, colors=False): -- 2.30.2