self.root_offset_struct.pack_into(output, 0, self._serialize(hashtable, output))
return output
+class ResultMap:
+ # item 1 flags | item 2 flags | | item N flags | file | item 1 |
+ # + offset | + offset | ... | + offset | size | name + url | ...
+ # 8 + 24b | 8 + 24b | | 8 + 24b | 32b | (0-delimited) |
+ offset_struct = struct.Struct('<I')
+ flags_struct = struct.Struct('<B')
+
+ def __init__(self):
+ self.entries = []
+
+ def add(self, name, url, flags = 0) -> int:
+ self.entries += [(name, url, flags)]
+ return len(self.entries) - 1
+
+ def serialize(self) -> bytearray:
+ output = bytearray()
+
+ # Write the offset array. Starting offset for items is after the offset
+ # array and the file size
+ offset = (len(self.entries) + 1)*4
+ for name, url, flags in self.entries:
+ assert offset < 2**24
+ output += self.offset_struct.pack(offset)
+ self.flags_struct.pack_into(output, len(output) - 1, flags)
+
+ # include the 0-delimiter
+ offset += len(name) + len(url) + 1
+
+ # Write file size
+ output += self.offset_struct.pack(offset)
+
+ # Write the entries themselves
+ for name, url, _ in self.entries:
+ output += name.encode('utf-8')
+ output += b'\0'
+ output += url.encode('utf-8')
+
+ assert len(output) == offset
+ return output
+
+search_data_header_struct = struct.Struct('<3sBI')
+
+def serialize_search_data(trie: Trie, map: ResultMap) -> bytearray:
+ serialized_trie = trie.serialize()
+ serialized_map = map.serialize()
+ # magic header, version, offset of result map
+ return search_data_header_struct.pack(b'MCS', 0, len(serialized_trie) + 8) + serialized_trie + serialized_map
+
xref_id_rx = re.compile(r"""(.*)_1(_[a-z-]+[0-9]+)$""")
slugify_nonalnum_rx = re.compile(r"""[^\w\s-]""")
slugify_hyphens_rx = re.compile(r"""[-\s]+""")
import sys
from types import SimpleNamespace as Empty
-from dox2html5 import Trie
+from dox2html5 import Trie, ResultMap, serialize_search_data, search_data_header_struct
-def _pretty_print(serialized: bytearray, hashtable, stats, base_offset, indent, draw_pipe, show_merged) -> str:
+def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, draw_pipe, show_merged) -> str:
# Visualize where the trees were merged
if show_merged and base_offset in hashtable: return ' #'
child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00ffffff
stats.max_node_child_offset = max(child_offset, stats.max_node_child_offset)
offset += Trie.child_struct.size
- out += _pretty_print(serialized, hashtable, stats, child_offset, indent + ('|' if draw_pipe else ' '), draw_pipe=False, show_merged=show_merged)
+ out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if draw_pipe else ' '), draw_pipe=False, show_merged=show_merged)
child_count += 1
stats.max_node_children = max(child_count, stats.max_node_children)
hashtable[base_offset] = True
return out
-def pretty_print(serialized: bytes, show_merged=False):
+def pretty_print_trie(serialized: bytes, show_merged=False):
hashtable = {}
stats = Empty()
stats.max_node_value_index = 0
stats.max_node_child_offset = 0
- out = _pretty_print(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', draw_pipe=False, show_merged=show_merged)
+ out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', draw_pipe=False, show_merged=show_merged)
stats = """
node count: {}
max node size: {} bytes
max node child offset: {}""".lstrip().format(stats.node_count, stats.max_node_size, stats.max_node_values, stats.max_node_children, stats.max_node_value_index, stats.max_node_child_offset)
return out, stats
-class Serialization(unittest.TestCase):
+def pretty_print_map(serialized: bytes):
+ # The first item gives out offset of first value, which can be used to
+ # calculate total value count
+ offset = ResultMap.offset_struct.unpack_from(serialized, 0)[0] & 0x00ffffff
+ size = int(offset/4 - 1)
+
+ out = ''
+ for i in range(size):
+ if i: out += '\n'
+ flags = ResultMap.flags_struct.unpack_from(serialized, i*4 + 3)[0]
+ next_offset = ResultMap.offset_struct.unpack_from(serialized, (i + 1)*4)[0] & 0x00ffffff
+ name, _, url = serialized[offset:next_offset].partition(b'\0')
+ out += "{}: {} [{}] -> {}".format(i, name.decode('utf-8'), flags, url.decode('utf-8'))
+ offset = next_offset
+ return out
+
+def pretty_print(serialized: bytes, show_merged=False):
+ magic, version, map_offset = search_data_header_struct.unpack_from(serialized)
+ assert magic == b'MCS'
+ assert version == 0
+
+ pretty_trie, stats = pretty_print_trie(serialized[search_data_header_struct.size:map_offset], show_merged=show_merged)
+ pretty_map = pretty_print_map(serialized[map_offset:])
+ return pretty_trie + '\n' + pretty_map, stats
+
+class TrieSerialization(unittest.TestCase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.maxDiff = None
def compare(self, serialized: bytes, expected: str):
- pretty = pretty_print(serialized)[0]
+ pretty = pretty_print_trie(serialized)[0]
#print(pretty)
self.assertEqual(pretty, expected.strip())
""")
self.assertEqual(len(serialized), 340)
+class MapSerialization(unittest.TestCase):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.maxDiff = None
+
+ def compare(self, serialized: bytes, expected: str):
+ pretty = pretty_print_map(serialized)
+ #print(pretty)
+ self.assertEqual(pretty, expected.strip())
+
+ def test_empty(self):
+ map = ResultMap()
+
+ serialized = map.serialize()
+ self.compare(serialized, "")
+ self.assertEqual(len(serialized), 4)
+
+ def test_single(self):
+ map = ResultMap()
+ self.assertEqual(map.add("Magnum", "namespaceMagnum.html", 11), 0)
+
+ serialized = map.serialize()
+ self.compare(serialized, """
+0: Magnum [11] -> namespaceMagnum.html
+""")
+ self.assertEqual(len(serialized), 35)
+
+ def test_multiple(self):
+ map = ResultMap()
+
+ self.assertEqual(map.add("Math", "namespaceMath.html"), 0)
+ self.assertEqual(map.add("Math::Vector", "classMath_1_1Vector.html", 42), 1)
+ self.assertEqual(map.add("Math::Range", "classMath_1_1Range.html", 255), 2)
+ self.assertEqual(map.add("Math::min()", "namespaceMath.html#abcdef2875"), 3)
+ self.assertEqual(map.add("Math::max()", "namespaceMath.html#abcdef2875"), 4)
+
+ serialized = map.serialize()
+ self.compare(serialized, """
+0: Math [0] -> namespaceMath.html
+1: Math::Vector [42] -> classMath_1_1Vector.html
+2: Math::Range [255] -> classMath_1_1Range.html
+3: Math::min() [0] -> namespaceMath.html#abcdef2875
+4: Math::max() [0] -> namespaceMath.html#abcdef2875
+""")
+ self.assertEqual(len(serialized), 201)
+
+class Serialization(unittest.TestCase):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.maxDiff = None
+
+ def compare(self, serialized: bytes, expected: str):
+ pretty = pretty_print(serialized)[0]
+ #print(pretty)
+ self.assertEqual(pretty, expected.strip())
+
+ def test(self):
+ trie = Trie()
+ map = ResultMap()
+
+ trie.insert("math", map.add("Math", "namespaceMath.html"))
+ index = map.add("Math::Vector", "classMath_1_1Vector.html", 42)
+ trie.insert("math::vector", index)
+ trie.insert("vector", index)
+ index = map.add("Math::Range", "classMath_1_1Range.html", 255)
+ trie.insert("math::range", index)
+ trie.insert("range", index)
+
+ serialized = serialize_search_data(trie, map)
+ self.compare(serialized, """
+math [0]
+| ::vector [1]
+| range [2]
+vector [1]
+range [2]
+0: Math [0] -> namespaceMath.html
+1: Math::Vector [42] -> classMath_1_1Vector.html
+2: Math::Range [255] -> classMath_1_1Range.html
+""")
+ self.assertEqual(len(serialized), 241)
+
if __name__ == '__main__': # pragma: no cover
parser = argparse.ArgumentParser()
parser.add_argument('file', help="file to pretty-print")