From: Vladimír Vondruš Date: Mon, 29 Jan 2018 12:35:01 +0000 (+0100) Subject: doxygen: implement lookahead barriers for search. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=af3bd7b6864667d841dc5ab6218125dbc251b22c;p=blog.git doxygen: implement lookahead barriers for search. --- diff --git a/doxygen/dox2html5.py b/doxygen/dox2html5.py index 7fde75d7..81b891f3 100755 --- a/doxygen/dox2html5.py +++ b/doxygen/dox2html5.py @@ -54,9 +54,9 @@ import m.math import ansilexer class Trie: - # root | | header | values | child | - # offset | ... | size/2 | value # | ... | offsets ... | - # 32b | | 8b | 8b | n*16b | 8b + 24b | + # root | | header | values | child 1 | child 1 | child 1 | + # offset | ... | size/2 | value # | ... | char | barrier | offset | ... + # 32b | | 8b | 8b | n*16b | 8b | 1b | 23b | root_offset_struct = struct.Struct(' int: # Serialize all children first child_offsets = [] for char, child in self.children.items(): - offset = child._serialize(hashtable, output) - child_offsets += [(char, offset)] + offset = child[1]._serialize(hashtable, output) + child_offsets += [(char, child[0], offset)] # Serialize this node size = int(2 + 2*len(self.values) + 4*len(child_offsets)) @@ -94,13 +97,13 @@ class Trie: serialized += self.value_struct.pack(v) # Serialize child offsets - for char, abs_offset in child_offsets: - assert abs_offset < 2**24 + for char, lookahead_barrier, abs_offset in child_offsets: + assert abs_offset < 2**23 # write them over each other because that's the only way to pack # a 24 bit field offset = len(serialized) - serialized += self.child_struct.pack(abs_offset) + serialized += self.child_struct.pack(abs_offset | ((1 if lookahead_barrier else 0) << 23)) self.child_char_struct.pack_into(serialized, offset + 3, char.encode('utf-8')) assert size == len(serialized) @@ -1550,7 +1553,14 @@ def _build_search_data(state: State, prefix, id: str, trie: Trie, map: ResultMap # TODO: escape elsewhere so i don't have to unescape here index = map.add(html.unescape(result_joiner.join(prefixed_result_name)), compound.url, suffix_length=suffix_length) for i in range(len(prefixed_name)): - trie.insert(html.unescape(joiner.join(prefixed_name[i:])).lower(), index) + lookahead_barriers = [] + name = '' + for j in prefixed_name[i:]: + if name: + lookahead_barriers += [len(name)] + name += joiner + name += html.unescape(j) + trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers) for i in compound.children: if i in state.compounds: @@ -1589,7 +1599,14 @@ def build_search_data(state: State) -> bytearray: prefixed_name = result.prefix + [name] for i in range(len(prefixed_name)): - trie.insert(html.unescape('::'.join(prefixed_name[i:])).lower(), index) + lookahead_barriers = [] + name = '' + for j in prefixed_name[i:]: + if name: + lookahead_barriers += [len(name)] + name += '::' + name += html.unescape(j) + trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers) return serialize_search_data(trie, map) diff --git a/doxygen/search.js b/doxygen/search.js index ab8af986..064028ac 100644 --- a/doxygen/search.js +++ b/doxygen/search.js @@ -192,7 +192,7 @@ var Search = { if(String.fromCharCode(this.trie.getUint8(childOffset + j*4 + 3)) != searchString[foundPrefix]) continue; - this.searchStack.push(this.trie.getUint32(childOffset + j*4, true) & 0x00ffffff); + this.searchStack.push(this.trie.getUint32(childOffset + j*4, true) & 0x007fffff); found = true; break; } @@ -276,9 +276,15 @@ var Search = { let relChildOffset = 2 + this.trie.getUint8(offset + 1)*2; let childCount = (nodeSize - relChildOffset)/4; let childOffset = offset + relChildOffset; - for(let j = 0; j != childCount; ++j) - if(this.gatherResults(this.trie.getUint32(childOffset + j*4, true) & 0x00ffffff, suffixLength + 1, results)) + for(let j = 0; j != childCount; ++j) { + let offsetBarrier = this.trie.getUint32(childOffset + j*4, true); + + /* Lookahead barrier, don't dig deeper */ + if(offsetBarrier & 0x00800000) continue; + + if(this.gatherResults(offsetBarrier & 0x007fffff, suffixLength + 1, results)) return true; + } /* Still hungry. */ return false; diff --git a/doxygen/test/js-test-data/searchdata.b85 b/doxygen/test/js-test-data/searchdata.b85 index 1fd24166..ba9751d5 100644 --- a/doxygen/test/js-test-data/searchdata.b85 +++ b/doxygen/test/js-test-data/searchdata.b85 @@ -1 +1 @@ -O+!-vL;(N*Dggih0s#R40{{d704W0i2mk;m0{{*H0B!>S6aWBe0s#X60{|cZ04W0iBme*?0{|)j0B!>SFaQ8)0{}Jv0Br*RJOBVX1OWm7LI40d0{}<>0CEEWPyhgL0{~V40CWQYTmS%L0{~(G0A&IJ1pos8ZU6u&0|0UW04M_hcmM!y0|0&i0BHjNga80-0|1Hu06GK#1OSi#06GHzmH+@{0|1@?0A~XLqyPYJ0|2T30AU9J8UO%oXaE3qumAvZ0|2%F06GK#006`Q06GHz$^Zap0|3$h0CWTc0RRI41pos8-T(k80|4d#04M_h>;M361pwFp0Aca~0BHgN1^@#90s#PJ0{{jA0A~XL3;_UP0{{{M0B{2U7y$rc0{|WY0Cfof_y7QHXaE3qumAvZBmn?(AOHXWHvj+uVgLXDhX4Qpz5oCK;Q#;u76AYNG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl \ No newline at end of file +O+!-vL;(N*Dggih0s#R40{{d704W0i2mk;m0{{*H0B!>S6aWBe0s#X60{|cZ04W0iBme*?0{|)j0B!>SFaQ8)0{}Jv0Br*RJOBVX1OWm7LI8j|0{}<>0CEEWPyhgL0{~V40CWQYTmS%L0{~(G0A&IJ1pos8ZU6u&0|0UW04M_hcmM!y0|0&i0BHjNga80-0|1Hu06GK#1OSi#fI0&JmH+@{0|1@?0A~XLqyPYJ0|2T30AU9J8UO%oXaE3qumAvZ0|2%F06GK#006`QfI0&J$^Zap0|3$h0CWTc0RRI41pos8-T(k80|4d#04M_h>;M361pwFp0Aca~0BHgN1^@#90s#PJ0{{jA0A~XL3;_UP0{{{M0B{2U7y$rc0{|WY0Cfof_y7QHXaE3qumAvZBmn?(AOHXWHvj+uVgLXDhX4Qpz5oCK;Q#;u76AYNG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl \ No newline at end of file diff --git a/doxygen/test/js-test-data/searchdata.bin b/doxygen/test/js-test-data/searchdata.bin index e7a0c3e9..3af09be4 100644 Binary files a/doxygen/test/js-test-data/searchdata.bin and b/doxygen/test/js-test-data/searchdata.bin differ diff --git a/doxygen/test/populate-js-test-data.py b/doxygen/test/populate-js-test-data.py index 310e8744..334e8bbf 100755 --- a/doxygen/test/populate-js-test-data.py +++ b/doxygen/test/populate-js-test-data.py @@ -48,21 +48,21 @@ map = ResultMap() trie.insert("math", map.add("Math", "namespaceMath.html")) index = map.add("Math::min(int, int)", "namespaceMath.html#min", suffix_length=8) -trie.insert("math::min()", index) +trie.insert("math::min()", index, lookahead_barriers=[4]) trie.insert("min()", index) index = map.add("Math::Vector", "classMath_1_1Vector.html") trie.insert("math::vector", index) trie.insert("vector", index) index = map.add("Math::Vector::min() const", "classMath_1_1Vector.html#min", suffix_length=6) -trie.insert("math::vector::min()", index) -trie.insert("vector::min()", index) +trie.insert("math::vector::min()", index, lookahead_barriers=[4, 12]) +trie.insert("vector::min()", index, lookahead_barriers=[6]) trie.insert("min()", index) index = map.add("Math::Range", "classMath_1_1Range.html") trie.insert("math::range", index) trie.insert("range", index) index = map.add("Math::Range::min() const", "classMath_1_1Range.html#min", suffix_length=6) -trie.insert("math::range::min()", index) -trie.insert("range::min()", index) +trie.insert("math::range::min()", index, lookahead_barriers=[4, 11]) +trie.insert("range::min()", index, lookahead_barriers=[5]) trie.insert("min()", index) trie.insert("subpage", map.add("Page » Subpage", "subpage.html")) diff --git a/doxygen/test/test-search.js b/doxygen/test/test-search.js index c48dd812..5a3292d0 100644 --- a/doxygen/test/test-search.js +++ b/doxygen/test/test-search.js @@ -108,21 +108,6 @@ const { StringDecoder } = require('string_decoder'); { name: 'Math', url: 'namespaceMath.html', suffixLength: 3 }, - { name: 'Math::min(int, int)', - url: 'namespaceMath.html#min', - suffixLength: 18 }, - { name: 'Math::Vector', - url: 'classMath_1_1Vector.html', - suffixLength: 11 }, - { name: 'Math::Vector::min() const', - url: 'classMath_1_1Vector.html#min', - suffixLength: 24 }, - { name: 'Math::Range', - url: 'classMath_1_1Range.html', - suffixLength: 10 }, - { name: 'Math::Range::min() const', - url: 'classMath_1_1Range.html#min', - suffixLength: 23 }, { name: 'Math::min(int, int)', url: 'namespaceMath.html#min', suffixLength: 12 }, @@ -153,10 +138,7 @@ const { StringDecoder } = require('string_decoder'); let resultsForVec = [ { name: 'Math::Vector', url: 'classMath_1_1Vector.html', - suffixLength: 3 }, - { name: 'Math::Vector::min() const', - url: 'classMath_1_1Vector.html#min', - suffixLength: 16 }]; + suffixLength: 3 }]; assert.deepEqual(Search.search('vec'), resultsForVec); /* Uppercase things and spaces */ @@ -184,10 +166,10 @@ const { StringDecoder } = require('string_decoder'); suffixLength: 3 }, { name: 'Math::min(int, int)', url: 'namespaceMath.html#min', - suffixLength: 18 }, - { name: 'Math::Vector', - url: 'classMath_1_1Vector.html', - suffixLength: 11 }]); + suffixLength: 12 }, + { name: 'Math::Vector::min() const', + url: 'classMath_1_1Vector.html#min', + suffixLength: 10 }]); } /* Search loaded from a base85-encoded file should work properly */ diff --git a/doxygen/test/test_search.py b/doxygen/test/test_search.py index 82252aae..7f8ba91f 100755 --- a/doxygen/test/test_search.py +++ b/doxygen/test/test_search.py @@ -65,7 +65,10 @@ def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, ind out += '\n' out += indent out += Trie.child_char_struct.unpack_from(serialized, offset + 3)[0].decode('utf-8') - child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00ffffff + if Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00800000: + out += '$\n' + out += indent + ' ' + child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x007fffff stats.max_node_child_offset = max(child_offset, stats.max_node_child_offset) offset += Trie.child_struct.size out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if draw_pipe else ' '), draw_pipe=False, show_merged=show_merged) @@ -159,7 +162,7 @@ magnum [1337, 21] trie = Trie() trie.insert("math", 0) - trie.insert("math::vector", 1) + trie.insert("math::vector", 1, lookahead_barriers=[4]) trie.insert("vector", 1) trie.insert("math::range", 2) trie.insert("range", 2) @@ -171,8 +174,8 @@ magnum [1337, 21] trie.insert("math::minmax", 5) trie.insert("minmax", 5) - trie.insert("math::vector::minmax", 6) - trie.insert("vector::minmax", 6) + trie.insert("math::vector::minmax", 6, lookahead_barriers=[4, 12]) + trie.insert("vector::minmax", 6, lookahead_barriers=[6]) trie.insert("minmax", 6) trie.insert("math::vector::min", 7) trie.insert("vector::min", 7) @@ -181,8 +184,8 @@ magnum [1337, 21] trie.insert("vector::max", 8) trie.insert("max", 8) - trie.insert("math::range::min", 9) - trie.insert("range::min", 9) + trie.insert("math::range::min", 9, lookahead_barriers=[4, 11]) + trie.insert("range::min", 9, lookahead_barriers=[5]) trie.insert("min", 9) trie.insert("math::range::max", 10) @@ -192,12 +195,15 @@ magnum [1337, 21] serialized = trie.serialize() self.compare(serialized, """ math [0] -||| ::vector [1] -||| | ::min [7] +||| :$ +||| :vector [1] +||| | :$ +||| | :min [7] ||| | | max [6] ||| | ax [8] ||| range [2] -||| | ::min [9] +||| | :$ +||| | :min [9] ||| | ax [10] ||| min [3] ||| || max [5] @@ -206,11 +212,13 @@ math [0] |in [3, 7, 9] || max [5, 6] vector [1] -| ::min [7] +| :$ +| :min [7] | | max [6] | ax [8] range [2] -| ::min [9] +| :$ +| :min [9] | ax [10] """) self.assertEqual(len(serialized), 340) @@ -308,22 +316,28 @@ class Search(IntegrationTestCase): #print(search_data_pretty) self.assertEqual(search_data_pretty, """ namespace [0] -| ::class [1] -| | ::foo() [6, 7, 8, 9] +| :$ +| :class [1] +| | :$ +| | :foo() [6, 7, 8, 9] | enum [11] -| | ::value [10] +| | :$ +| | :value [10] | typedef [12] | variable [13] class [1] -| ::foo() [6, 7, 8, 9] +| :$ +| :foo() [6, 7, 8, 9] a page [2] subpage [3] dir [4] -| /file.h [5] +| /$ +| file.h [5] file.h [5] |oo() [6, 7, 8, 9] enum [11] -| ::value [10] +| :$ +| :value [10] value [10] | riable [13] typedef [12]