From af3bd7b6864667d841dc5ab6218125dbc251b22c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Mon, 29 Jan 2018 13:35:01 +0100 Subject: [PATCH] doxygen: implement lookahead barriers for search. --- doxygen/dox2html5.py | 43 ++++++++++++++------ doxygen/search.js | 12 ++++-- doxygen/test/js-test-data/searchdata.b85 | 2 +- doxygen/test/js-test-data/searchdata.bin | Bin 630 -> 630 bytes doxygen/test/populate-js-test-data.py | 10 ++--- doxygen/test/test-search.js | 28 +++---------- doxygen/test/test_search.py | 48 +++++++++++++++-------- 7 files changed, 81 insertions(+), 62 deletions(-) diff --git a/doxygen/dox2html5.py b/doxygen/dox2html5.py index 7fde75d7..81b891f3 100755 --- a/doxygen/dox2html5.py +++ b/doxygen/dox2html5.py @@ -54,9 +54,9 @@ import m.math import ansilexer class Trie: - # root | | header | values | child | - # offset | ... | size/2 | value # | ... | offsets ... | - # 32b | | 8b | 8b | n*16b | 8b + 24b | + # root | | header | values | child 1 | child 1 | child 1 | + # offset | ... | size/2 | value # | ... | char | barrier | offset | ... + # 32b | | 8b | 8b | n*16b | 8b | 1b | 23b | root_offset_struct = struct.Struct(' int: # Serialize all children first child_offsets = [] for char, child in self.children.items(): - offset = child._serialize(hashtable, output) - child_offsets += [(char, offset)] + offset = child[1]._serialize(hashtable, output) + child_offsets += [(char, child[0], offset)] # Serialize this node size = int(2 + 2*len(self.values) + 4*len(child_offsets)) @@ -94,13 +97,13 @@ class Trie: serialized += self.value_struct.pack(v) # Serialize child offsets - for char, abs_offset in child_offsets: - assert abs_offset < 2**24 + for char, lookahead_barrier, abs_offset in child_offsets: + assert abs_offset < 2**23 # write them over each other because that's the only way to pack # a 24 bit field offset = len(serialized) - serialized += self.child_struct.pack(abs_offset) + serialized += self.child_struct.pack(abs_offset | ((1 if lookahead_barrier else 0) << 23)) self.child_char_struct.pack_into(serialized, offset + 3, char.encode('utf-8')) assert size == len(serialized) @@ -1550,7 +1553,14 @@ def _build_search_data(state: State, prefix, id: str, trie: Trie, map: ResultMap # TODO: escape elsewhere so i don't have to unescape here index = map.add(html.unescape(result_joiner.join(prefixed_result_name)), compound.url, suffix_length=suffix_length) for i in range(len(prefixed_name)): - trie.insert(html.unescape(joiner.join(prefixed_name[i:])).lower(), index) + lookahead_barriers = [] + name = '' + for j in prefixed_name[i:]: + if name: + lookahead_barriers += [len(name)] + name += joiner + name += html.unescape(j) + trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers) for i in compound.children: if i in state.compounds: @@ -1589,7 +1599,14 @@ def build_search_data(state: State) -> bytearray: prefixed_name = result.prefix + [name] for i in range(len(prefixed_name)): - trie.insert(html.unescape('::'.join(prefixed_name[i:])).lower(), index) + lookahead_barriers = [] + name = '' + for j in prefixed_name[i:]: + if name: + lookahead_barriers += [len(name)] + name += '::' + name += html.unescape(j) + trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers) return serialize_search_data(trie, map) diff --git a/doxygen/search.js b/doxygen/search.js index ab8af986..064028ac 100644 --- a/doxygen/search.js +++ b/doxygen/search.js @@ -192,7 +192,7 @@ var Search = { if(String.fromCharCode(this.trie.getUint8(childOffset + j*4 + 3)) != searchString[foundPrefix]) continue; - this.searchStack.push(this.trie.getUint32(childOffset + j*4, true) & 0x00ffffff); + this.searchStack.push(this.trie.getUint32(childOffset + j*4, true) & 0x007fffff); found = true; break; } @@ -276,9 +276,15 @@ var Search = { let relChildOffset = 2 + this.trie.getUint8(offset + 1)*2; let childCount = (nodeSize - relChildOffset)/4; let childOffset = offset + relChildOffset; - for(let j = 0; j != childCount; ++j) - if(this.gatherResults(this.trie.getUint32(childOffset + j*4, true) & 0x00ffffff, suffixLength + 1, results)) + for(let j = 0; j != childCount; ++j) { + let offsetBarrier = this.trie.getUint32(childOffset + j*4, true); + + /* Lookahead barrier, don't dig deeper */ + if(offsetBarrier & 0x00800000) continue; + + if(this.gatherResults(offsetBarrier & 0x007fffff, suffixLength + 1, results)) return true; + } /* Still hungry. */ return false; diff --git a/doxygen/test/js-test-data/searchdata.b85 b/doxygen/test/js-test-data/searchdata.b85 index 1fd24166..ba9751d5 100644 --- a/doxygen/test/js-test-data/searchdata.b85 +++ b/doxygen/test/js-test-data/searchdata.b85 @@ -1 +1 @@ -O+!-vL;(N*Dggih0s#R40{{d704W0i2mk;m0{{*H0B!>S6aWBe0s#X60{|cZ04W0iBme*?0{|)j0B!>SFaQ8)0{}Jv0Br*RJOBVX1OWm7LI40d0{}<>0CEEWPyhgL0{~V40CWQYTmS%L0{~(G0A&IJ1pos8ZU6u&0|0UW04M_hcmM!y0|0&i0BHjNga80-0|1Hu06GK#1OSi#06GHzmH+@{0|1@?0A~XLqyPYJ0|2T30AU9J8UO%oXaE3qumAvZ0|2%F06GK#006`Q06GHz$^Zap0|3$h0CWTc0RRI41pos8-T(k80|4d#04M_h>;M361pwFp0Aca~0BHgN1^@#90s#PJ0{{jA0A~XL3;_UP0{{{M0B{2U7y$rc0{|WY0Cfof_y7QHXaE3qumAvZBmn?(AOHXWHvj+uVgLXDhX4Qpz5oCK;Q#;u76AYNG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl \ No newline at end of file +O+!-vL;(N*Dggih0s#R40{{d704W0i2mk;m0{{*H0B!>S6aWBe0s#X60{|cZ04W0iBme*?0{|)j0B!>SFaQ8)0{}Jv0Br*RJOBVX1OWm7LI8j|0{}<>0CEEWPyhgL0{~V40CWQYTmS%L0{~(G0A&IJ1pos8ZU6u&0|0UW04M_hcmM!y0|0&i0BHjNga80-0|1Hu06GK#1OSi#fI0&JmH+@{0|1@?0A~XLqyPYJ0|2T30AU9J8UO%oXaE3qumAvZ0|2%F06GK#006`QfI0&J$^Zap0|3$h0CWTc0RRI41pos8-T(k80|4d#04M_h>;M361pwFp0Aca~0BHgN1^@#90s#PJ0{{jA0A~XL3;_UP0{{{M0B{2U7y$rc0{|WY0Cfof_y7QHXaE3qumAvZBmn?(AOHXWHvj+uVgLXDhX4Qpz5oCK;Q#;u76AYNG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl \ No newline at end of file diff --git a/doxygen/test/js-test-data/searchdata.bin b/doxygen/test/js-test-data/searchdata.bin index e7a0c3e91cb828fd2c1a8d07de752f5f40104bbd..3af09be4e79c36db8c1f664602c790b0cd7f065d 100644 GIT binary patch delta 22 ccmeyy@{MIe7-Pf4a9<#?)R?hhWB