doxygen: implement subtree merging.

author Vladimír Vondruš <mosra@centrum.cz>

Sat, 20 Jan 2018 14:54:23 +0000 (15:54 +0100)

committer Vladimír Vondruš <mosra@centrum.cz>

Tue, 30 Jan 2018 12:16:30 +0000 (13:16 +0100)
author Vladimír Vondruš <mosra@centrum.cz>
Sat, 20 Jan 2018 14:54:23 +0000 (15:54 +0100)
committer Vladimír Vondruš <mosra@centrum.cz>
Tue, 30 Jan 2018 12:16:30 +0000 (13:16 +0100)
diff --git a/doxygen/dox2html5.py b/doxygen/dox2html5.py

index 4df2b50fef7621d20e379f8875d977627d4db480..ffddf3c695e7f9c8b47a84572e876b1896458058 100755 (executable)
--- a/doxygen/dox2html5.py
+++ b/doxygen/dox2html5.py
@@ -76,11 +76,11 @@ class Trie:
          self.children[char].insert(path[1:], value)
  
      # Returns offset of the serialized thing in `output`
-    def _serialize(self, output: bytearray) -> int:
+    def _serialize(self, hashtable, output: bytearray) -> int:
          # Serialize all children first
          child_offsets = []
          for char, child in self.children.items():
-            offset = child._serialize(output)
+            offset = child._serialize(hashtable, output)
              child_offsets += [(char, offset)]
  
          # Serialize this node
@@ -103,13 +103,22 @@ class Trie:
  
          assert size == len(serialized)
  
-        offset = len(output)
-        output += serialized
-        return offset
+        # Subtree merging: if this exact tree is already in the table, return
+        # its offset. Otherwise add it and return the new offset.
+        # TODO: why hashable = bytes(output[base_offset:] + serialized) didn't work?
+        hashable = bytes(serialized)
+        if hashable in hashtable:
+            return hashtable[hashable]
+        else:
+            offset = len(output)
+            output += serialized
+            hashtable[hashable] = offset
+            return offset
  
      def serialize(self) -> bytearray:
          output = bytearray(b'\x00\x00\x00\x00')
-        self.root_offset_struct.pack_into(output, 0, self._serialize(output))
+        hashtable = {}
+        self.root_offset_struct.pack_into(output, 0, self._serialize(hashtable, output))
          return output
  
  xref_id_rx = re.compile(r"""(.*)_1(_[a-z-]+[0-9]+)$""")
diff --git a/doxygen/test/test_trie.py b/doxygen/test/test_trie.py

index 8045316e5f525d2a25ab43c6ade571a86f7b3d8b..eacac1214b8491faf3b5d560f697eba3a0824ea8 100755 (executable)
--- a/doxygen/test/test_trie.py
+++ b/doxygen/test/test_trie.py
@@ -29,7 +29,10 @@ import unittest
  
  from dox2html5 import Trie
  
-def _pretty_print(serialized: bytearray, base_offset, indent, draw_pipe) -> str:
+def _pretty_print(serialized: bytearray, hashtable, base_offset, indent, draw_pipe, show_merged) -> str:
+    # Visualize where the trees were merged
+    if show_merged and base_offset in hashtable: return ' #'
+
      out = ''
      size, value_count = Trie.header_struct.unpack_from(serialized, base_offset)
      offset = base_offset + Trie.header_struct.size
@@ -53,13 +56,15 @@ def _pretty_print(serialized: bytearray, base_offset, indent, draw_pipe) -> str:
          out += Trie.child_char_struct.unpack_from(serialized, offset + 3)[0].decode('utf-8')
          child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00ffffff
          offset += Trie.child_struct.size
-        out += _pretty_print(serialized, child_offset, indent + ('|' if draw_pipe else ' '), False)
+        out += _pretty_print(serialized, hashtable, child_offset, indent + ('|' if draw_pipe else ' '), draw_pipe=False, show_merged=show_merged)
          newline = True
  
+    hashtable[base_offset] = True
      return out
  
-def pretty_print(serialized: bytes) -> str:
-    return _pretty_print(serialized, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', False)
+def pretty_print(serialized: bytes, show_merged=False) -> str:
+    hashtable = {}
+    return _pretty_print(serialized, hashtable, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', draw_pipe=False, show_merged=show_merged)
  
  class Serialization(unittest.TestCase):
      def __init__(self, *args, **kwargs):
@@ -147,12 +152,13 @@ range [2]
  |    ::min [9]
  |       ax [10]
  """)
-        self.assertEqual(len(serialized), 514)
+        self.assertEqual(len(serialized), 340)
  
  if __name__ == '__main__': # pragma: no cover
      parser = argparse.ArgumentParser()
      parser.add_argument('file', help="file to pretty-print")
+    parser.add_argument('--show-merged', help="show merged subtrees", action='store_true')
      args = parser.parse_args()
  
      with open(args.file, 'rb') as f:
-        print(pretty_print(f.read()))
+        print(pretty_print(f.read(), show_merged=args.show_merged))
author	Vladimír Vondruš <mosra@centrum.cz>
	Sat, 20 Jan 2018 14:54:23 +0000 (15:54 +0100)
committer	Vladimír Vondruš <mosra@centrum.cz>
	Tue, 30 Jan 2018 12:16:30 +0000 (13:16 +0100)
doxygen/dox2html5.py		patch \| blob \| history
doxygen/test/test_trie.py		patch \| blob \| history