From 8b3d58a2d26abb4ae7d06ec8e2c5bdff94c6c904 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Sat, 20 Jan 2018 15:54:23 +0100 Subject: [PATCH] doxygen: implement subtree merging. Reduces size of the test case from 514 bytes to 340. Also the visualization tool can visualize where the trees were cut off. --- doxygen/dox2html5.py | 21 +++++++++++++++------ doxygen/test/test_trie.py | 18 ++++++++++++------ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/doxygen/dox2html5.py b/doxygen/dox2html5.py index 4df2b50f..ffddf3c6 100755 --- a/doxygen/dox2html5.py +++ b/doxygen/dox2html5.py @@ -76,11 +76,11 @@ class Trie: self.children[char].insert(path[1:], value) # Returns offset of the serialized thing in `output` - def _serialize(self, output: bytearray) -> int: + def _serialize(self, hashtable, output: bytearray) -> int: # Serialize all children first child_offsets = [] for char, child in self.children.items(): - offset = child._serialize(output) + offset = child._serialize(hashtable, output) child_offsets += [(char, offset)] # Serialize this node @@ -103,13 +103,22 @@ class Trie: assert size == len(serialized) - offset = len(output) - output += serialized - return offset + # Subtree merging: if this exact tree is already in the table, return + # its offset. Otherwise add it and return the new offset. + # TODO: why hashable = bytes(output[base_offset:] + serialized) didn't work? + hashable = bytes(serialized) + if hashable in hashtable: + return hashtable[hashable] + else: + offset = len(output) + output += serialized + hashtable[hashable] = offset + return offset def serialize(self) -> bytearray: output = bytearray(b'\x00\x00\x00\x00') - self.root_offset_struct.pack_into(output, 0, self._serialize(output)) + hashtable = {} + self.root_offset_struct.pack_into(output, 0, self._serialize(hashtable, output)) return output xref_id_rx = re.compile(r"""(.*)_1(_[a-z-]+[0-9]+)$""") diff --git a/doxygen/test/test_trie.py b/doxygen/test/test_trie.py index 8045316e..eacac121 100755 --- a/doxygen/test/test_trie.py +++ b/doxygen/test/test_trie.py @@ -29,7 +29,10 @@ import unittest from dox2html5 import Trie -def _pretty_print(serialized: bytearray, base_offset, indent, draw_pipe) -> str: +def _pretty_print(serialized: bytearray, hashtable, base_offset, indent, draw_pipe, show_merged) -> str: + # Visualize where the trees were merged + if show_merged and base_offset in hashtable: return ' #' + out = '' size, value_count = Trie.header_struct.unpack_from(serialized, base_offset) offset = base_offset + Trie.header_struct.size @@ -53,13 +56,15 @@ def _pretty_print(serialized: bytearray, base_offset, indent, draw_pipe) -> str: out += Trie.child_char_struct.unpack_from(serialized, offset + 3)[0].decode('utf-8') child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00ffffff offset += Trie.child_struct.size - out += _pretty_print(serialized, child_offset, indent + ('|' if draw_pipe else ' '), False) + out += _pretty_print(serialized, hashtable, child_offset, indent + ('|' if draw_pipe else ' '), draw_pipe=False, show_merged=show_merged) newline = True + hashtable[base_offset] = True return out -def pretty_print(serialized: bytes) -> str: - return _pretty_print(serialized, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', False) +def pretty_print(serialized: bytes, show_merged=False) -> str: + hashtable = {} + return _pretty_print(serialized, hashtable, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', draw_pipe=False, show_merged=show_merged) class Serialization(unittest.TestCase): def __init__(self, *args, **kwargs): @@ -147,12 +152,13 @@ range [2] | ::min [9] | ax [10] """) - self.assertEqual(len(serialized), 514) + self.assertEqual(len(serialized), 340) if __name__ == '__main__': # pragma: no cover parser = argparse.ArgumentParser() parser.add_argument('file', help="file to pretty-print") + parser.add_argument('--show-merged', help="show merged subtrees", action='store_true') args = parser.parse_args() with open(args.file, 'rb') as f: - print(pretty_print(f.read())) + print(pretty_print(f.read(), show_merged=args.show_merged)) -- 2.30.2