self.children[char].insert(path[1:], value)
# Returns offset of the serialized thing in `output`
- def _serialize(self, output: bytearray) -> int:
+ def _serialize(self, hashtable, output: bytearray) -> int:
# Serialize all children first
child_offsets = []
for char, child in self.children.items():
- offset = child._serialize(output)
+ offset = child._serialize(hashtable, output)
child_offsets += [(char, offset)]
# Serialize this node
assert size == len(serialized)
- offset = len(output)
- output += serialized
- return offset
+ # Subtree merging: if this exact tree is already in the table, return
+ # its offset. Otherwise add it and return the new offset.
+ # TODO: why hashable = bytes(output[base_offset:] + serialized) didn't work?
+ hashable = bytes(serialized)
+ if hashable in hashtable:
+ return hashtable[hashable]
+ else:
+ offset = len(output)
+ output += serialized
+ hashtable[hashable] = offset
+ return offset
def serialize(self) -> bytearray:
output = bytearray(b'\x00\x00\x00\x00')
- self.root_offset_struct.pack_into(output, 0, self._serialize(output))
+ hashtable = {}
+ self.root_offset_struct.pack_into(output, 0, self._serialize(hashtable, output))
return output
xref_id_rx = re.compile(r"""(.*)_1(_[a-z-]+[0-9]+)$""")
from dox2html5 import Trie
-def _pretty_print(serialized: bytearray, base_offset, indent, draw_pipe) -> str:
+def _pretty_print(serialized: bytearray, hashtable, base_offset, indent, draw_pipe, show_merged) -> str:
+ # Visualize where the trees were merged
+ if show_merged and base_offset in hashtable: return ' #'
+
out = ''
size, value_count = Trie.header_struct.unpack_from(serialized, base_offset)
offset = base_offset + Trie.header_struct.size
out += Trie.child_char_struct.unpack_from(serialized, offset + 3)[0].decode('utf-8')
child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00ffffff
offset += Trie.child_struct.size
- out += _pretty_print(serialized, child_offset, indent + ('|' if draw_pipe else ' '), False)
+ out += _pretty_print(serialized, hashtable, child_offset, indent + ('|' if draw_pipe else ' '), draw_pipe=False, show_merged=show_merged)
newline = True
+ hashtable[base_offset] = True
return out
-def pretty_print(serialized: bytes) -> str:
- return _pretty_print(serialized, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', False)
+def pretty_print(serialized: bytes, show_merged=False) -> str:
+ hashtable = {}
+ return _pretty_print(serialized, hashtable, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', draw_pipe=False, show_merged=show_merged)
class Serialization(unittest.TestCase):
def __init__(self, *args, **kwargs):
| ::min [9]
| ax [10]
""")
- self.assertEqual(len(serialized), 514)
+ self.assertEqual(len(serialized), 340)
if __name__ == '__main__': # pragma: no cover
parser = argparse.ArgumentParser()
parser.add_argument('file', help="file to pretty-print")
+ parser.add_argument('--show-merged', help="show merged subtrees", action='store_true')
args = parser.parse_args()
with open(args.file, 'rb') as f:
- print(pretty_print(f.read()))
+ print(pretty_print(f.read(), show_merged=args.show_merged))