import ansilexer
class Trie:
- # root | | header | values | child 1 | child 1 | child 1 |
- # offset | ... | size/2 | value # | ... | char | barrier | offset | ...
- # 32b | | 8b | 8b | n*16b | 8b | 1b | 23b |
+ # root | | header | results | child 1 | child 1 | child 1 |
+ # offset | ... | result # | value # | ... | char | barrier | offset | ...
+ # 32b | | 8b | 8b | n*16b | 8b | 1b | 23b |
root_offset_struct = struct.Struct('<I')
header_struct = struct.Struct('<BB')
- value_struct = struct.Struct('<H')
+ result_struct = struct.Struct('<H')
child_struct = struct.Struct('<I')
child_char_struct = struct.Struct('<B')
def __init__(self):
- self.values = []
+ self.results = []
self.children = {}
- def _insert(self, path: bytes, value, lookahead_barriers):
+ def _insert(self, path: bytes, result, lookahead_barriers):
if not path:
- self.values += [value]
+ self.results += [result]
return
char = path[0]
if lookahead_barriers and lookahead_barriers[0] == 0:
lookahead_barriers = lookahead_barriers[1:]
self.children[char] = (True, self.children[char][1])
- self.children[char][1]._insert(path[1:], value, [b - 1 for b in lookahead_barriers])
+ self.children[char][1]._insert(path[1:], result, [b - 1 for b in lookahead_barriers])
- def insert(self, path: str, value, lookahead_barriers=[]):
+ def insert(self, path: str, result, lookahead_barriers=[]):
assert not path.isupper() # to avoid unnecessary duplicates
- self._insert(path.encode('utf-8'), value, lookahead_barriers)
+ self._insert(path.encode('utf-8'), result, lookahead_barriers)
# Returns offset of the serialized thing in `output`
def _serialize(self, hashtable, output: bytearray, merge_subtrees) -> int:
child_offsets += [(char, child[0], offset)]
# Serialize this node
- size = int(2 + 2*len(self.values) + 4*len(child_offsets))
serialized = bytearray()
- serialized += self.header_struct.pack(int(size/2), len(self.values))
- for v in self.values:
- serialized += self.value_struct.pack(v)
+ serialized += self.header_struct.pack(len(self.results), len(self.children))
+ for v in self.results:
+ serialized += self.result_struct.pack(v)
# Serialize child offsets
for char, lookahead_barrier, abs_offset in child_offsets:
serialized += self.child_struct.pack(abs_offset | ((1 if lookahead_barrier else 0) << 23))
self.child_char_struct.pack_into(serialized, offset + 3, char)
- assert size == len(serialized)
-
# Subtree merging: if this exact tree is already in the table, return
# its offset. Otherwise add it and return the new offset.
# TODO: why hashable = bytes(output[base_offset:] + serialized) didn't work?
for(; foundPrefix != searchString.length; ++foundPrefix) {
/* Calculate offset and count of children */
let offset = this.searchStack[this.searchStack.length - 1];
- let nodeSize = this.trie.getUint8(offset)*2;
- let relChildOffset = 2 + this.trie.getUint8(offset + 1)*2;
- let childCount = (nodeSize - relChildOffset)/4;
+ let relChildOffset = 2 + this.trie.getUint8(offset)*2;
+ let childCount = this.trie.getUint8(offset + 1);
/* Go through all children and find the next offset */
let childOffset = offset + relChildOffset;
},
gatherResults: function(offset, suffixLength, results) {
- let valueCount = this.trie.getUint8(offset + 1);
+ let resultCount = this.trie.getUint8(offset);
/* Populate the results with all values associated with this node */
- for(let i = 0; i != valueCount; ++i) {
+ for(let i = 0; i != resultCount; ++i) {
let index = this.trie.getUint16(offset + (i + 1)*2, true);
let flags = this.map.getUint8(index*4 + 3);
let resultOffset = this.map.getUint32(index*4, true) & 0x00ffffff;
/* Dig deeper. If the child already has enough, return. */
/* TODO: hmmm. this is helluvalot duplicated code. hmm. */
- let nodeSize = this.trie.getUint8(offset)*2;
- let relChildOffset = 2 + this.trie.getUint8(offset + 1)*2;
- let childCount = (nodeSize - relChildOffset)/4;
+ let relChildOffset = 2 + this.trie.getUint8(offset)*2;
+ let childCount = this.trie.getUint8(offset + 1);
let childOffset = offset + relChildOffset;
for(let j = 0; j != childCount; ++j) {
let offsetBarrier = this.trie.getUint32(childOffset + j*4, true);
-O+!-vL;(N*Dggih0s#R40{{d704W0i2mk;m0{{*H0B!>S6aWBe0s#X60{|cZ04W0iBme*?0{|)j0B!>SFaQ8)0{}Jv0Br*RJOBVX1OWm7LI8j|0{}<>0CEEWPyhgL0{~V40CWQYTmS%L0{~(G0A&IJ1pos8ZU6u&0|0UW04M_hcmM!y0|0&i0BHjNga80-0|1Hu06GK#1OSi#fI0&JmH+@{0|1@?0A~XLqyPYJ0|2T30AU9J8UO%oXaE3qumAvZ0|2%F06GK#006`QfI0&J$^Zap0|3$h0CWTc0RRI41pos8-T(k80|4d#04M_h>;M361pwFp0Aca~0BHgN1^@#90s#PJ0{{jA0A~XL3;_UP0{{{M0B{2U7y$rc0{|WY0Cfof_y7QHXaE3qumAvZBmn?(AOHXmHvj-(VgLXlhX4R!z5oCq;Q#<-76AajG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl
\ No newline at end of file
+O+!-vL;(N*Dggih0RRC2009I504V?g2mk;m009mF0B!&Q6aWBe0RRI400AHX04V?gBme*?00Alh0B!&QFaQ8)00A}t0BryPJOBVX0RaL4LI8j|00Bq<0CE5UPyhgL00CA20CWHWTmS%L00CkE0A&FH1poj6ZU6u&00D9U04M+fcmM!y00Djg0BHaLga80-00D{s06GBy1OSi#fI0vHmH+@{00Eu=0A~OJqyPYJ00F810AT<F8UO%oXaE3qumAvZ00FiD06GBy006`QfI0vH$^Zap00Ghf0CWQY0RRI41poj6-T(k800HIz04M+f>;M3600P(m0Aca~0BHdL1^@s70s#PJ009O80A~OJ3;_UP009yK0B`^S7y$rc00ABW0CfNa_y7QHXaE3qumAvZBmn?(AOHXmHvj-(VgLXlhX4R!z5oCq;Q#<-76AajG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl
\ No newline at end of file
from test import IntegrationTestCase
-def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, draw_pipe, show_merged, show_lookahead_barriers, color_map) -> str:
+def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, show_merged, show_lookahead_barriers, color_map) -> str:
# Visualize where the trees were merged
if show_merged and base_offset in hashtable:
return color_map['red'] + '#' + color_map['reset']
stats.node_count += 1
out = ''
- size, value_count = Trie.header_struct.unpack_from(serialized, base_offset)
- stats.max_node_size = max(size, stats.max_node_size)
- stats.max_node_values = max(value_count, stats.max_node_values)
+ result_count, child_count = Trie.header_struct.unpack_from(serialized, base_offset)
+ stats.max_node_results = max(result_count, stats.max_node_results)
+ stats.max_node_children = max(child_count, stats.max_node_children)
offset = base_offset + Trie.header_struct.size
- # print values, if any
- if value_count:
+ # print results, if any
+ if result_count:
out += color_map['blue'] + ' ['
- for i in range(value_count):
+ for i in range(result_count):
if i: out += color_map['blue']+', '
- value = Trie.value_struct.unpack_from(serialized, offset)[0]
- stats.max_node_value_index = max(value, stats.max_node_value_index)
- out += color_map['cyan'] + str(value)
- offset += Trie.value_struct.size
+ result = Trie.result_struct.unpack_from(serialized, offset)[0]
+ stats.max_node_result_index = max(result, stats.max_node_result_index)
+ out += color_map['cyan'] + str(result)
+ offset += Trie.result_struct.size
out += color_map['blue'] + ']'
- # print children
- if base_offset + size*2 - offset > 4: draw_pipe = True
- child_count = 0
- while offset < base_offset + size*2:
- if child_count or value_count:
+ # print children, if any
+ for i in range(child_count):
+ if result_count or i:
out += color_map['reset'] + '\n'
out += color_map['blue'] + indent + color_map['white']
char = Trie.child_char_struct.unpack_from(serialized, offset + 3)[0]
child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x007fffff
stats.max_node_child_offset = max(child_offset, stats.max_node_child_offset)
offset += Trie.child_struct.size
- out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if draw_pipe else ' '), draw_pipe=False, show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
+ out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if child_count > 1 else ' '), show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
child_count += 1
- stats.max_node_children = max(child_count, stats.max_node_children)
-
hashtable[base_offset] = True
return out
stats = Empty()
stats.node_count = 0
- stats.max_node_size = 0
- stats.max_node_values = 0
+ stats.max_node_results = 0
stats.max_node_children = 0
- stats.max_node_value_index = 0
+ stats.max_node_result_index = 0
stats.max_node_child_offset = 0
- out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', draw_pipe=False, show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
+ out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
if out: out = color_map['white'] + out
stats = """
node count: {}
-max node size: {} bytes
-max node values: {}
+max node results: {}
max node children: {}
-max node value index: {}
-max node child offset: {}""".lstrip().format(stats.node_count, stats.max_node_size*2, stats.max_node_values, stats.max_node_children, stats.max_node_value_index, stats.max_node_child_offset)
+max node result index: {}
+max node child offset: {}""".lstrip().format(stats.node_count, stats.max_node_results, stats.max_node_children, stats.max_node_result_index, stats.max_node_child_offset)
return out, stats
def pretty_print_map(serialized: bytes, colors=False):