import ansilexer
class Trie:
- # root | | header | values | child |
- # offset | ... | size/2 | value # | ... | offsets ... |
- # 32b | | 8b | 8b | n*16b | 8b + 24b |
+ # root | | header | values | child 1 | child 1 | child 1 |
+ # offset | ... | size/2 | value # | ... | char | barrier | offset | ...
+ # 32b | | 8b | 8b | n*16b | 8b | 1b | 23b |
root_offset_struct = struct.Struct('<I')
header_struct = struct.Struct('<BB')
value_struct = struct.Struct('<H')
self.values = []
self.children = {}
- def insert(self, path: str, value):
+ def insert(self, path: str, value, lookahead_barriers=[]):
if not path:
self.values += [value]
return
char = path[0]
assert not char.isupper() # to avoid unnecessary duplicates
if not char in self.children:
- self.children[char] = Trie()
- self.children[char].insert(path[1:], value)
+ self.children[char] = (False, Trie())
+ if lookahead_barriers and lookahead_barriers[0] == 0:
+ lookahead_barriers = lookahead_barriers[1:]
+ self.children[char] = (True, self.children[char][1])
+ self.children[char][1].insert(path[1:], value, [b - 1 for b in lookahead_barriers])
# Returns offset of the serialized thing in `output`
def _serialize(self, hashtable, output: bytearray) -> int:
# Serialize all children first
child_offsets = []
for char, child in self.children.items():
- offset = child._serialize(hashtable, output)
- child_offsets += [(char, offset)]
+ offset = child[1]._serialize(hashtable, output)
+ child_offsets += [(char, child[0], offset)]
# Serialize this node
size = int(2 + 2*len(self.values) + 4*len(child_offsets))
serialized += self.value_struct.pack(v)
# Serialize child offsets
- for char, abs_offset in child_offsets:
- assert abs_offset < 2**24
+ for char, lookahead_barrier, abs_offset in child_offsets:
+ assert abs_offset < 2**23
# write them over each other because that's the only way to pack
# a 24 bit field
offset = len(serialized)
- serialized += self.child_struct.pack(abs_offset)
+ serialized += self.child_struct.pack(abs_offset | ((1 if lookahead_barrier else 0) << 23))
self.child_char_struct.pack_into(serialized, offset + 3, char.encode('utf-8'))
assert size == len(serialized)
# TODO: escape elsewhere so i don't have to unescape here
index = map.add(html.unescape(result_joiner.join(prefixed_result_name)), compound.url, suffix_length=suffix_length)
for i in range(len(prefixed_name)):
- trie.insert(html.unescape(joiner.join(prefixed_name[i:])).lower(), index)
+ lookahead_barriers = []
+ name = ''
+ for j in prefixed_name[i:]:
+ if name:
+ lookahead_barriers += [len(name)]
+ name += joiner
+ name += html.unescape(j)
+ trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers)
for i in compound.children:
if i in state.compounds:
prefixed_name = result.prefix + [name]
for i in range(len(prefixed_name)):
- trie.insert(html.unescape('::'.join(prefixed_name[i:])).lower(), index)
+ lookahead_barriers = []
+ name = ''
+ for j in prefixed_name[i:]:
+ if name:
+ lookahead_barriers += [len(name)]
+ name += '::'
+ name += html.unescape(j)
+ trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers)
return serialize_search_data(trie, map)
if(String.fromCharCode(this.trie.getUint8(childOffset + j*4 + 3)) != searchString[foundPrefix])
continue;
- this.searchStack.push(this.trie.getUint32(childOffset + j*4, true) & 0x00ffffff);
+ this.searchStack.push(this.trie.getUint32(childOffset + j*4, true) & 0x007fffff);
found = true;
break;
}
let relChildOffset = 2 + this.trie.getUint8(offset + 1)*2;
let childCount = (nodeSize - relChildOffset)/4;
let childOffset = offset + relChildOffset;
- for(let j = 0; j != childCount; ++j)
- if(this.gatherResults(this.trie.getUint32(childOffset + j*4, true) & 0x00ffffff, suffixLength + 1, results))
+ for(let j = 0; j != childCount; ++j) {
+ let offsetBarrier = this.trie.getUint32(childOffset + j*4, true);
+
+ /* Lookahead barrier, don't dig deeper */
+ if(offsetBarrier & 0x00800000) continue;
+
+ if(this.gatherResults(offsetBarrier & 0x007fffff, suffixLength + 1, results))
return true;
+ }
/* Still hungry. */
return false;
-O+!-vL;(N*Dggih0s#R40{{d704W0i2mk;m0{{*H0B!>S6aWBe0s#X60{|cZ04W0iBme*?0{|)j0B!>SFaQ8)0{}Jv0Br*RJOBVX1OWm7LI40d0{}<>0CEEWPyhgL0{~V40CWQYTmS%L0{~(G0A&IJ1pos8ZU6u&0|0UW04M_hcmM!y0|0&i0BHjNga80-0|1Hu06GK#1OSi#06GHzmH+@{0|1@?0A~XLqyPYJ0|2T30AU9J8UO%oXaE3qumAvZ0|2%F06GK#006`Q06GHz$^Zap0|3$h0CWTc0RRI41pos8-T(k80|4d#04M_h>;M361pwFp0Aca~0BHgN1^@#90s#PJ0{{jA0A~XL3;_UP0{{{M0B{2U7y$rc0{|WY0Cfof_y7QHXaE3qumAvZBmn?(AOHXWHvj+uVgLXDhX4Qpz5oCK;Q#;u76AYNG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl
\ No newline at end of file
+O+!-vL;(N*Dggih0s#R40{{d704W0i2mk;m0{{*H0B!>S6aWBe0s#X60{|cZ04W0iBme*?0{|)j0B!>SFaQ8)0{}Jv0Br*RJOBVX1OWm7LI8j|0{}<>0CEEWPyhgL0{~V40CWQYTmS%L0{~(G0A&IJ1pos8ZU6u&0|0UW04M_hcmM!y0|0&i0BHjNga80-0|1Hu06GK#1OSi#fI0&JmH+@{0|1@?0A~XLqyPYJ0|2T30AU9J8UO%oXaE3qumAvZ0|2%F06GK#006`QfI0&J$^Zap0|3$h0CWTc0RRI41pos8-T(k80|4d#04M_h>;M361pwFp0Aca~0BHgN1^@#90s#PJ0{{jA0A~XL3;_UP0{{{M0B{2U7y$rc0{|WY0Cfof_y7QHXaE3qumAvZBmn?(AOHXWHvj+uVgLXDhX4Qpz5oCK;Q#;u76AYNG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl
\ No newline at end of file
trie.insert("math", map.add("Math", "namespaceMath.html"))
index = map.add("Math::min(int, int)", "namespaceMath.html#min", suffix_length=8)
-trie.insert("math::min()", index)
+trie.insert("math::min()", index, lookahead_barriers=[4])
trie.insert("min()", index)
index = map.add("Math::Vector", "classMath_1_1Vector.html")
trie.insert("math::vector", index)
trie.insert("vector", index)
index = map.add("Math::Vector::min() const", "classMath_1_1Vector.html#min", suffix_length=6)
-trie.insert("math::vector::min()", index)
-trie.insert("vector::min()", index)
+trie.insert("math::vector::min()", index, lookahead_barriers=[4, 12])
+trie.insert("vector::min()", index, lookahead_barriers=[6])
trie.insert("min()", index)
index = map.add("Math::Range", "classMath_1_1Range.html")
trie.insert("math::range", index)
trie.insert("range", index)
index = map.add("Math::Range::min() const", "classMath_1_1Range.html#min", suffix_length=6)
-trie.insert("math::range::min()", index)
-trie.insert("range::min()", index)
+trie.insert("math::range::min()", index, lookahead_barriers=[4, 11])
+trie.insert("range::min()", index, lookahead_barriers=[5])
trie.insert("min()", index)
trie.insert("subpage", map.add("Page » Subpage", "subpage.html"))
{ name: 'Math',
url: 'namespaceMath.html',
suffixLength: 3 },
- { name: 'Math::min(int, int)',
- url: 'namespaceMath.html#min',
- suffixLength: 18 },
- { name: 'Math::Vector',
- url: 'classMath_1_1Vector.html',
- suffixLength: 11 },
- { name: 'Math::Vector::min() const',
- url: 'classMath_1_1Vector.html#min',
- suffixLength: 24 },
- { name: 'Math::Range',
- url: 'classMath_1_1Range.html',
- suffixLength: 10 },
- { name: 'Math::Range::min() const',
- url: 'classMath_1_1Range.html#min',
- suffixLength: 23 },
{ name: 'Math::min(int, int)',
url: 'namespaceMath.html#min',
suffixLength: 12 },
let resultsForVec = [
{ name: 'Math::Vector',
url: 'classMath_1_1Vector.html',
- suffixLength: 3 },
- { name: 'Math::Vector::min() const',
- url: 'classMath_1_1Vector.html#min',
- suffixLength: 16 }];
+ suffixLength: 3 }];
assert.deepEqual(Search.search('vec'), resultsForVec);
/* Uppercase things and spaces */
suffixLength: 3 },
{ name: 'Math::min(int, int)',
url: 'namespaceMath.html#min',
- suffixLength: 18 },
- { name: 'Math::Vector',
- url: 'classMath_1_1Vector.html',
- suffixLength: 11 }]);
+ suffixLength: 12 },
+ { name: 'Math::Vector::min() const',
+ url: 'classMath_1_1Vector.html#min',
+ suffixLength: 10 }]);
}
/* Search loaded from a base85-encoded file should work properly */
out += '\n'
out += indent
out += Trie.child_char_struct.unpack_from(serialized, offset + 3)[0].decode('utf-8')
- child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00ffffff
+ if Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00800000:
+ out += '$\n'
+ out += indent + ' '
+ child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x007fffff
stats.max_node_child_offset = max(child_offset, stats.max_node_child_offset)
offset += Trie.child_struct.size
out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if draw_pipe else ' '), draw_pipe=False, show_merged=show_merged)
trie = Trie()
trie.insert("math", 0)
- trie.insert("math::vector", 1)
+ trie.insert("math::vector", 1, lookahead_barriers=[4])
trie.insert("vector", 1)
trie.insert("math::range", 2)
trie.insert("range", 2)
trie.insert("math::minmax", 5)
trie.insert("minmax", 5)
- trie.insert("math::vector::minmax", 6)
- trie.insert("vector::minmax", 6)
+ trie.insert("math::vector::minmax", 6, lookahead_barriers=[4, 12])
+ trie.insert("vector::minmax", 6, lookahead_barriers=[6])
trie.insert("minmax", 6)
trie.insert("math::vector::min", 7)
trie.insert("vector::min", 7)
trie.insert("vector::max", 8)
trie.insert("max", 8)
- trie.insert("math::range::min", 9)
- trie.insert("range::min", 9)
+ trie.insert("math::range::min", 9, lookahead_barriers=[4, 11])
+ trie.insert("range::min", 9, lookahead_barriers=[5])
trie.insert("min", 9)
trie.insert("math::range::max", 10)
serialized = trie.serialize()
self.compare(serialized, """
math [0]
-||| ::vector [1]
-||| | ::min [7]
+||| :$
+||| :vector [1]
+||| | :$
+||| | :min [7]
||| | | max [6]
||| | ax [8]
||| range [2]
-||| | ::min [9]
+||| | :$
+||| | :min [9]
||| | ax [10]
||| min [3]
||| || max [5]
|in [3, 7, 9]
|| max [5, 6]
vector [1]
-| ::min [7]
+| :$
+| :min [7]
| | max [6]
| ax [8]
range [2]
-| ::min [9]
+| :$
+| :min [9]
| ax [10]
""")
self.assertEqual(len(serialized), 340)
#print(search_data_pretty)
self.assertEqual(search_data_pretty, """
namespace [0]
-| ::class [1]
-| | ::foo() [6, 7, 8, 9]
+| :$
+| :class [1]
+| | :$
+| | :foo() [6, 7, 8, 9]
| enum [11]
-| | ::value [10]
+| | :$
+| | :value [10]
| typedef [12]
| variable [13]
class [1]
-| ::foo() [6, 7, 8, 9]
+| :$
+| :foo() [6, 7, 8, 9]
a page [2]
subpage [3]
dir [4]
-| /file.h [5]
+| /$
+| file.h [5]
file.h [5]
|oo() [6, 7, 8, 9]
enum [11]
-| ::value [10]
+| :$
+| :value [10]
value [10]
| riable [13]
typedef [12]