From: Vladimír Vondruš <mosra@centrum.cz>
Date: Sun, 14 Jul 2019 23:14:36 +0000 (+0200)
Subject: documentation: first step to make the result types backend-specific.
X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=8a46373cec9b5e53ce17bdef792fdc06599fe596;p=blog.git

documentation: first step to make the result types backend-specific.

The type is now a part of the doxygen script and is added as plain bits
to ResultFlag. Next step is encoding the type mapping to the search data
and then reading that back in the JS frontend.
---

diff --git a/documentation/_search.py b/documentation/_search.py
index 598e995b..12e31f63 100644
--- a/documentation/_search.py
+++ b/documentation/_search.py
@@ -31,29 +31,45 @@ from enum import Flag
 from types import SimpleNamespace as Empty
 
 class ResultFlag(Flag):
+    @staticmethod
+    def from_type(flag: 'ResultFlag', type) -> 'ResultFlag':
+        assert not flag & ResultFlag._TYPE
+        assert type.value > 0 and type.value <= 0xf
+        return flag|ResultFlag(type.value << 4)
+
+    @property
+    def type(self):
+        return (int(self.value) >> 4) & 0xf
+
+    NONE = 0
+
     HAS_SUFFIX = 1 << 0
     HAS_PREFIX = 1 << 3
     DEPRECATED = 1 << 1
-    DELETED = 1 << 2
+    DELETED = 1 << 2 # TODO: this is C++-specific, put aside as well?
 
     # Result type. Order defines order in which equally-named symbols appear in
-    # search results. Keep in sync with search.js.
+    # search results, every backend supplies its own, ranging from 1 << 4 to
+    # 15 << 4.
     _TYPE = 0xf << 4
     ALIAS = 0 << 4 # This one gets the type from the referenced result
-    PAGE = 1 << 4
-    NAMESPACE = 2 << 4
-    GROUP = 3 << 4
-    CLASS = 4 << 4
-    STRUCT = 5 << 4
-    UNION = 6 << 4
-    TYPEDEF = 7 << 4
-    DIR = 8 << 4
-    FILE = 9 << 4
-    FUNC = 10 << 4
-    DEFINE = 11 << 4
-    ENUM = 12 << 4
-    ENUM_VALUE = 13 << 4
-    VAR = 14 << 4
+
+    # Otherwise it says "32 is not a valid ResultFlag"
+    _TYPE01 = 1 << 4
+    _TYPE02 = 2 << 4
+    _TYPE03 = 3 << 4
+    _TYPE04 = 4 << 4
+    _TYPE05 = 5 << 4
+    _TYPE06 = 6 << 4
+    _TYPE07 = 7 << 4
+    _TYPE08 = 8 << 4
+    _TYPE09 = 9 << 4
+    _TYPE10 = 10 << 4
+    _TYPE11 = 11 << 4
+    _TYPE12 = 12 << 4
+    _TYPE13 = 13 << 4
+    _TYPE14 = 14 << 4
+    _TYPE15 = 15 << 4
 
 class ResultMap:
     # item 1 flags | item 2 flags |     | item N flags | file | item 1 |
@@ -351,3 +367,131 @@ def serialize_search_data(trie: Trie, map: ResultMap, symbol_count, merge_subtre
 def base85encode_search_data(data: bytearray) -> bytearray:
     return (b"/* Generated by https://mcss.mosra.cz/documentation/doxygen/. Do not edit. */\n" +
             b"Search.load('" + base64.b85encode(data, True) + b"');\n")
+
+def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, show_merged, show_lookahead_barriers, color_map) -> str:
+    # Visualize where the trees were merged
+    if show_merged and base_offset in hashtable:
+        return color_map['red'] + '#' + color_map['reset']
+
+    stats.node_count += 1
+
+    out = ''
+    result_count, child_count = Trie.header_struct.unpack_from(serialized, base_offset)
+    stats.max_node_results = max(result_count, stats.max_node_results)
+    stats.max_node_children = max(child_count, stats.max_node_children)
+    offset = base_offset + Trie.header_struct.size
+
+    # print results, if any
+    if result_count:
+        out += color_map['blue'] + ' ['
+        for i in range(result_count):
+            if i: out += color_map['blue']+', '
+            result = Trie.result_struct.unpack_from(serialized, offset)[0]
+            stats.max_node_result_index = max(result, stats.max_node_result_index)
+            out += color_map['cyan'] + str(result)
+            offset += Trie.result_struct.size
+        out += color_map['blue'] + ']'
+
+    # print children, if any
+    for i in range(child_count):
+        if result_count or i:
+            out += color_map['reset'] + '\n'
+            out += color_map['blue'] + indent + color_map['white']
+        char = Trie.child_char_struct.unpack_from(serialized, offset + 3)[0]
+        if char <= 127:
+            out += chr(char)
+        else:
+            out += color_map['reset'] + hex(char)
+        if (show_lookahead_barriers and Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00800000):
+            out += color_map['green'] + '$'
+        if char > 127 or (show_lookahead_barriers and Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00800000):
+            out += color_map['reset'] + '\n' + color_map['blue'] + indent + ' ' + color_map['white']
+        child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x007fffff
+        stats.max_node_child_offset = max(child_offset, stats.max_node_child_offset)
+        offset += Trie.child_struct.size
+        out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if child_count > 1 else ' '), show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
+        child_count += 1
+
+    hashtable[base_offset] = True
+    return out
+
+color_map_colors = {'blue': '\033[0;34m',
+                    'white': '\033[1;39m',
+                    'red': '\033[1;31m',
+                    'green': '\033[1;32m',
+                    'cyan': '\033[1;36m',
+                    'yellow': '\033[1;33m',
+                    'reset': '\033[0m'}
+
+color_map_dummy = {'blue': '',
+                   'white': '',
+                   'red': '',
+                   'green': '',
+                   'cyan': '',
+                   'yellow': '',
+                   'reset': ''}
+
+def pretty_print_trie(serialized: bytes, show_merged=False, show_lookahead_barriers=True, colors=False):
+    color_map = color_map_colors if colors else color_map_dummy
+
+    hashtable = {}
+
+    stats = Empty()
+    stats.node_count = 0
+    stats.max_node_results = 0
+    stats.max_node_children = 0
+    stats.max_node_result_index = 0
+    stats.max_node_child_offset = 0
+
+    out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
+    if out: out = color_map['white'] + out
+    stats = """
+node count:             {}
+max node results:       {}
+max node children:      {}
+max node result index:  {}
+max node child offset:  {}""".lstrip().format(stats.node_count, stats.max_node_results, stats.max_node_children, stats.max_node_result_index, stats.max_node_child_offset)
+    return out, stats
+
+def pretty_print_map(serialized: bytes, *, entryTypeClass, colors=False):
+    color_map = color_map_colors if colors else color_map_dummy
+
+    # The first item gives out offset of first value, which can be used to
+    # calculate total value count
+    offset = ResultMap.offset_struct.unpack_from(serialized, 0)[0] & 0x00ffffff
+    size = int(offset/4 - 1)
+
+    out = ''
+    for i in range(size):
+        if i: out += '\n'
+        flags = ResultFlag(ResultMap.flags_struct.unpack_from(serialized, i*4 + 3)[0])
+        extra = []
+        if flags & ResultFlag._TYPE == ResultFlag.ALIAS:
+            extra += ['alias={}'.format(ResultMap.alias_struct.unpack_from(serialized, offset)[0])]
+            offset += ResultMap.alias_struct.size
+        if flags & ResultFlag.HAS_PREFIX:
+            extra += ['prefix={}[:{}]'.format(*ResultMap.prefix_struct.unpack_from(serialized, offset))]
+            offset += ResultMap.prefix_struct.size
+        if flags & ResultFlag.HAS_SUFFIX:
+            extra += ['suffix_length={}'.format(ResultMap.suffix_length_struct.unpack_from(serialized, offset)[0])]
+            offset += ResultMap.suffix_length_struct.size
+        if flags & ResultFlag.DEPRECATED:
+            extra += ['deprecated']
+        if flags & ResultFlag.DELETED:
+            extra += ['deleted']
+        if flags & ResultFlag._TYPE:
+            extra += ['type={}'.format(entryTypeClass(flags.type).name)]
+        next_offset = ResultMap.offset_struct.unpack_from(serialized, (i + 1)*4)[0] & 0x00ffffff
+        name, _, url = serialized[offset:next_offset].partition(b'\0')
+        out += color_map['cyan'] + str(i) + color_map['blue'] + ': ' + color_map['white'] + name.decode('utf-8') + color_map['blue'] + ' [' + color_map['yellow'] + (color_map['blue'] + ', ' + color_map['yellow']).join(extra) + color_map['blue'] + '] ->' + (' ' + color_map['reset'] + url.decode('utf-8') if url else '')
+        offset = next_offset
+    return out
+
+def pretty_print(serialized: bytes, *, entryTypeClass, show_merged=False, show_lookahead_barriers=True, colors=False):
+    magic, version, symbol_count, map_offset = search_data_header_struct.unpack_from(serialized)
+    assert magic == b'MCS'
+    assert version == 0
+
+    pretty_trie, stats = pretty_print_trie(serialized[search_data_header_struct.size:map_offset], show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, colors=colors)
+    pretty_map = pretty_print_map(serialized[map_offset:], entryTypeClass=entryTypeClass, colors=colors)
+    return '{} symbols\n'.format(symbol_count) + pretty_trie + '\n' + pretty_map, stats
diff --git a/documentation/doxygen.py b/documentation/doxygen.py
index 014b6f58..388befb8 100755
--- a/documentation/doxygen.py
+++ b/documentation/doxygen.py
@@ -27,6 +27,7 @@
 import xml.etree.ElementTree as ET
 import argparse
 import copy
+import enum
 import sys
 import re
 import html
@@ -54,6 +55,22 @@ import latex2svg
 import latex2svgextra
 import ansilexer
 
+class EntryType(enum.Enum):
+    PAGE = 1
+    NAMESPACE = 2
+    GROUP = 3
+    CLASS = 4
+    STRUCT = 5
+    UNION = 6
+    TYPEDEF = 7
+    DIR = 8
+    FILE = 9
+    FUNC = 10
+    DEFINE = 11
+    ENUM = 12
+    ENUM_VALUE = 13
+    VAR = 14
+
 xref_id_rx = re.compile(r"""(.*)_1(_[a-z-]+[0-9]+|@)$""")
 slugify_nonalnum_rx = re.compile(r"""[^\w\s-]""")
 slugify_hyphens_rx = re.compile(r"""[-\s]+""")
@@ -1671,7 +1688,7 @@ def parse_enum(state: State, element: ET.Element):
         if value.brief or value.description:
             if enum.base_url == state.current_compound_url and not state.doxyfile['M_SEARCH_DISABLED']:
                 result = Empty()
-                result.flags = ResultFlag.ENUM_VALUE|(ResultFlag.DEPRECATED if value.is_deprecated else ResultFlag(0))
+                result.flags = ResultFlag.from_type(ResultFlag.DEPRECATED if value.is_deprecated else ResultFlag(0), EntryType.ENUM_VALUE)
                 result.url = enum.base_url + '#' + value.id
                 result.prefix = state.current_prefix + [enum.name]
                 result.name = value.name
@@ -1696,7 +1713,7 @@ def parse_enum(state: State, element: ET.Element):
     if enum.brief or enum.has_details or enum.has_value_details:
         if enum.base_url == state.current_compound_url and not state.doxyfile['M_SEARCH_DISABLED']:
             result = Empty()
-            result.flags = ResultFlag.ENUM|(ResultFlag.DEPRECATED if enum.is_deprecated else ResultFlag(0))
+            result.flags = ResultFlag.from_type(ResultFlag.DEPRECATED if enum.is_deprecated else ResultFlag(0), EntryType.ENUM)
             result.url = enum.base_url + '#' + enum.id
             result.prefix = state.current_prefix
             result.name = enum.name
@@ -1772,7 +1789,7 @@ def parse_typedef(state: State, element: ET.Element):
         # Avoid duplicates in search
         if typedef.base_url == state.current_compound_url and not state.doxyfile['M_SEARCH_DISABLED']:
             result = Empty()
-            result.flags = ResultFlag.TYPEDEF|(ResultFlag.DEPRECATED if typedef.is_deprecated else ResultFlag(0))
+            result.flags = ResultFlag.from_type(ResultFlag.DEPRECATED if typedef.is_deprecated else ResultFlag(0), EntryType.TYPEDEF)
             result.url = typedef.base_url + '#' + typedef.id
             result.prefix = state.current_prefix
             result.name = typedef.name
@@ -1917,7 +1934,7 @@ def parse_func(state: State, element: ET.Element):
         # Avoid duplicates in search
         if func.base_url == state.current_compound_url and not state.doxyfile['M_SEARCH_DISABLED']:
             result = Empty()
-            result.flags = ResultFlag.FUNC|(ResultFlag.DEPRECATED if func.is_deprecated else ResultFlag(0))|(ResultFlag.DELETED if func.is_deleted else ResultFlag(0))
+            result.flags = ResultFlag.from_type((ResultFlag.DEPRECATED if func.is_deprecated else ResultFlag(0))|(ResultFlag.DELETED if func.is_deleted else ResultFlag(0)), EntryType.FUNC)
             result.url = func.base_url + '#' + func.id
             result.prefix = state.current_prefix
             result.name = func.name
@@ -1953,7 +1970,7 @@ def parse_var(state: State, element: ET.Element):
         # Avoid duplicates in search
         if var.base_url == state.current_compound_url and not state.doxyfile['M_SEARCH_DISABLED']:
             result = Empty()
-            result.flags = ResultFlag.VAR|(ResultFlag.DEPRECATED if var.is_deprecated else ResultFlag(0))
+            result.flags = ResultFlag.from_type(ResultFlag.DEPRECATED if var.is_deprecated else ResultFlag(0), EntryType.VAR)
             result.url = var.base_url + '#' + var.id
             result.prefix = state.current_prefix
             result.name = var.name
@@ -1993,7 +2010,7 @@ def parse_define(state: State, element: ET.Element):
         # Avoid duplicates in search
         if define.base_url == state.current_compound_url and not state.doxyfile['M_SEARCH_DISABLED']:
             result = Empty()
-            result.flags = ResultFlag.DEFINE|(ResultFlag.DEPRECATED if define.is_deprecated else ResultFlag(0))
+            result.flags = ResultFlag.from_type(ResultFlag.DEPRECATED if define.is_deprecated else ResultFlag(0), EntryType.DEFINE)
             result.url = define.base_url + '#' + define.id
             result.prefix = []
             result.name = define.name
@@ -2250,11 +2267,11 @@ def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=
         # Decide on prefix joiner. Defines are among the :: ones as well,
         # because we need to add the function macros twice -- but they have no
         # prefix, so it's okay.
-        if result.flags & ResultFlag._TYPE in [ResultFlag.NAMESPACE, ResultFlag.CLASS, ResultFlag.STRUCT, ResultFlag.UNION, ResultFlag.TYPEDEF, ResultFlag.FUNC, ResultFlag.VAR, ResultFlag.ENUM, ResultFlag.ENUM_VALUE, ResultFlag.DEFINE]:
+        if EntryType(result.flags.type) in [EntryType.NAMESPACE, EntryType.CLASS, EntryType.STRUCT, EntryType.UNION, EntryType.TYPEDEF, EntryType.FUNC, EntryType.VAR, EntryType.ENUM, EntryType.ENUM_VALUE, EntryType.DEFINE]:
             joiner = result_joiner = '::'
-        elif result.flags & ResultFlag._TYPE in [ResultFlag.DIR, ResultFlag.FILE]:
+        elif EntryType(result.flags.type) in [EntryType.DIR, EntryType.FILE]:
             joiner = result_joiner = '/'
-        elif result.flags & ResultFlag._TYPE in [ResultFlag.PAGE, ResultFlag.GROUP]:
+        elif EntryType(result.flags.type) in [EntryType.PAGE, EntryType.GROUP]:
             joiner = ''
             result_joiner = ' Â» '
         else:
@@ -3078,25 +3095,25 @@ def parse_xml(state: State, xml: str):
     # TODO: add example sources there? how?
     if not state.doxyfile['M_SEARCH_DISABLED'] and not compound.kind == 'example' and (compound.kind == 'group' or compound.brief or compounddef.find('detaileddescription')):
         if compound.kind == 'namespace':
-            kind = ResultFlag.NAMESPACE
+            kind = EntryType.NAMESPACE
         elif compound.kind == 'struct':
-            kind = ResultFlag.STRUCT
+            kind = EntryType.STRUCT
         elif compound.kind == 'class':
-            kind = ResultFlag.CLASS
+            kind = EntryType.CLASS
         elif compound.kind == 'union':
-            kind = ResultFlag.UNION
+            kind = EntryType.UNION
         elif compound.kind == 'dir':
-            kind = ResultFlag.DIR
+            kind = EntryType.DIR
         elif compound.kind == 'file':
-            kind = ResultFlag.FILE
+            kind = EntryType.FILE
         elif compound.kind == 'page':
-            kind = ResultFlag.PAGE
+            kind = EntryType.PAGE
         elif compound.kind == 'group':
-            kind = ResultFlag.GROUP
+            kind = EntryType.GROUP
         else: assert False # pragma: no cover
 
         result = Empty()
-        result.flags = kind|(ResultFlag.DEPRECATED if compound.is_deprecated else ResultFlag(0))
+        result.flags = ResultFlag.from_type(ResultFlag.DEPRECATED if compound.is_deprecated else ResultFlag(0), kind)
         result.url = compound.url
         result.prefix = state.current_prefix[:-1]
         result.name = state.current_prefix[-1]
diff --git a/documentation/test_doxygen/populate-js-test-data.py b/documentation/test_doxygen/populate-js-test-data.py
index 86fa998f..3d39bb32 100755
--- a/documentation/test_doxygen/populate-js-test-data.py
+++ b/documentation/test_doxygen/populate-js-test-data.py
@@ -30,6 +30,7 @@ import sys
 import pathlib
 sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
 
+from doxygen import EntryType
 from _search import Trie, ResultMap, ResultFlag, serialize_search_data
 
 basedir = pathlib.Path(os.path.dirname(os.path.realpath(__file__)))/'js-test-data'
@@ -46,25 +47,25 @@ with open(basedir/'empty.bin', 'wb') as f:
 trie = Trie()
 map = ResultMap()
 
-trie.insert("math", map.add("Math", "namespaceMath.html", flags=ResultFlag.NAMESPACE))
-index = map.add("Math::min(int, int)", "namespaceMath.html#min", suffix_length=8, flags=ResultFlag.FUNC)
+trie.insert("math", map.add("Math", "namespaceMath.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE)))
+index = map.add("Math::min(int, int)", "namespaceMath.html#min", suffix_length=8, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC))
 trie.insert("math::min()", index, lookahead_barriers=[4])
 trie.insert("min()", index)
-index = map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.CLASS|ResultFlag.DEPRECATED)
+index = map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.from_type(ResultFlag.DEPRECATED, EntryType.CLASS))
 trie.insert("math::vector", index)
 trie.insert("vector", index)
-index = map.add("Math::Vector::min() const", "classMath_1_1Vector.html#min", suffix_length=6, flags=ResultFlag.FUNC)
+index = map.add("Math::Vector::min() const", "classMath_1_1Vector.html#min", suffix_length=6, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC))
 trie.insert("math::vector::min()", index, lookahead_barriers=[4, 12])
 trie.insert("vector::min()", index, lookahead_barriers=[6])
 trie.insert("min()", index)
-range_index = map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.CLASS)
+range_index = map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS))
 trie.insert("math::range", range_index)
 trie.insert("range", range_index)
-index = map.add("Math::Range::min() const", "classMath_1_1Range.html#min", suffix_length=6, flags=ResultFlag.FUNC|ResultFlag.DELETED)
+index = map.add("Math::Range::min() const", "classMath_1_1Range.html#min", suffix_length=6, flags=ResultFlag.from_type(ResultFlag.DELETED, EntryType.FUNC))
 trie.insert("math::range::min()", index, lookahead_barriers=[4, 11])
 trie.insert("range::min()", index, lookahead_barriers=[5])
 trie.insert("min()", index)
-trie.insert("subpage", map.add("Page Â» Subpage", "subpage.html", flags=ResultFlag.PAGE))
+trie.insert("subpage", map.add("Page Â» Subpage", "subpage.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.PAGE)))
 
 trie.insert("rectangle", map.add("Rectangle", "", alias=range_index))
 trie.insert("rect", map.add("Rectangle::Rect()", "", suffix_length=2, alias=range_index))
@@ -77,18 +78,18 @@ with open(basedir/'searchdata.b85', 'wb') as f:
 trie = Trie()
 map = ResultMap()
 
-trie.insert("hÃ½Å¾dÄ", map.add("HÃ½Å¾dÄ", "#a", flags=ResultFlag.PAGE))
-trie.insert("hÃ¡rÃ¡", map.add("HÃ¡rÃ¡", "#b", flags=ResultFlag.PAGE))
+trie.insert("hÃ½Å¾dÄ", map.add("HÃ½Å¾dÄ", "#a", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.PAGE)))
+trie.insert("hÃ¡rÃ¡", map.add("HÃ¡rÃ¡", "#b", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.PAGE)))
 
 with open(basedir/'unicode.bin', 'wb') as f:
     f.write(serialize_search_data(trie, map, 2))
 
 trie = Trie()
 map = ResultMap()
-trie.insert("magnum", map.add("Magnum", "namespaceMagnum.html", flags=ResultFlag.NAMESPACE))
-trie.insert("math", map.add("Magnum::Math", "namespaceMagnum_1_1Math.html", flags=ResultFlag.NAMESPACE))
-trie.insert("geometry", map.add("Magnum::Math::Geometry", "namespaceMagnum_1_1Math_1_1Geometry.html", flags=ResultFlag.NAMESPACE))
-trie.insert("range", map.add("Magnum::Math::Range", "classMagnum_1_1Math_1_1Range.html", flags=ResultFlag.CLASS))
+trie.insert("magnum", map.add("Magnum", "namespaceMagnum.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE)))
+trie.insert("math", map.add("Magnum::Math", "namespaceMagnum_1_1Math.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE)))
+trie.insert("geometry", map.add("Magnum::Math::Geometry", "namespaceMagnum_1_1Math_1_1Geometry.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE)))
+trie.insert("range", map.add("Magnum::Math::Range", "classMagnum_1_1Math_1_1Range.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)))
 
 with open(basedir/'nested.bin', 'wb') as f:
     f.write(serialize_search_data(trie, map, 4))
diff --git a/documentation/test_doxygen/test_search.py b/documentation/test_doxygen/test_search.py
index d9fa9103..37a9d5a8 100755
--- a/documentation/test_doxygen/test_search.py
+++ b/documentation/test_doxygen/test_search.py
@@ -30,138 +30,11 @@ import sys
 import unittest
 from types import SimpleNamespace as Empty
 
-from _search import Trie, ResultMap, ResultFlag, serialize_search_data, search_data_header_struct
+from doxygen import EntryType
+from _search import Trie, ResultMap, ResultFlag, serialize_search_data, pretty_print_trie, pretty_print_map, pretty_print
 
 from test_doxygen import IntegrationTestCase
 
-def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, show_merged, show_lookahead_barriers, color_map) -> str:
-    # Visualize where the trees were merged
-    if show_merged and base_offset in hashtable:
-        return color_map['red'] + '#' + color_map['reset']
-
-    stats.node_count += 1
-
-    out = ''
-    result_count, child_count = Trie.header_struct.unpack_from(serialized, base_offset)
-    stats.max_node_results = max(result_count, stats.max_node_results)
-    stats.max_node_children = max(child_count, stats.max_node_children)
-    offset = base_offset + Trie.header_struct.size
-
-    # print results, if any
-    if result_count:
-        out += color_map['blue'] + ' ['
-        for i in range(result_count):
-            if i: out += color_map['blue']+', '
-            result = Trie.result_struct.unpack_from(serialized, offset)[0]
-            stats.max_node_result_index = max(result, stats.max_node_result_index)
-            out += color_map['cyan'] + str(result)
-            offset += Trie.result_struct.size
-        out += color_map['blue'] + ']'
-
-    # print children, if any
-    for i in range(child_count):
-        if result_count or i:
-            out += color_map['reset'] + '\n'
-            out += color_map['blue'] + indent + color_map['white']
-        char = Trie.child_char_struct.unpack_from(serialized, offset + 3)[0]
-        if char <= 127:
-            out += chr(char)
-        else:
-            out += color_map['reset'] + hex(char)
-        if (show_lookahead_barriers and Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00800000):
-            out += color_map['green'] + '$'
-        if char > 127 or (show_lookahead_barriers and Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00800000):
-            out += color_map['reset'] + '\n' + color_map['blue'] + indent + ' ' + color_map['white']
-        child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x007fffff
-        stats.max_node_child_offset = max(child_offset, stats.max_node_child_offset)
-        offset += Trie.child_struct.size
-        out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if child_count > 1 else ' '), show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
-        child_count += 1
-
-    hashtable[base_offset] = True
-    return out
-
-color_map_colors = {'blue': '\033[0;34m',
-                    'white': '\033[1;39m',
-                    'red': '\033[1;31m',
-                    'green': '\033[1;32m',
-                    'cyan': '\033[1;36m',
-                    'yellow': '\033[1;33m',
-                    'reset': '\033[0m'}
-
-color_map_dummy = {'blue': '',
-                   'white': '',
-                   'red': '',
-                   'green': '',
-                   'cyan': '',
-                   'yellow': '',
-                   'reset': ''}
-
-def pretty_print_trie(serialized: bytes, show_merged=False, show_lookahead_barriers=True, colors=False):
-    color_map = color_map_colors if colors else color_map_dummy
-
-    hashtable = {}
-
-    stats = Empty()
-    stats.node_count = 0
-    stats.max_node_results = 0
-    stats.max_node_children = 0
-    stats.max_node_result_index = 0
-    stats.max_node_child_offset = 0
-
-    out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
-    if out: out = color_map['white'] + out
-    stats = """
-node count:             {}
-max node results:       {}
-max node children:      {}
-max node result index:  {}
-max node child offset:  {}""".lstrip().format(stats.node_count, stats.max_node_results, stats.max_node_children, stats.max_node_result_index, stats.max_node_child_offset)
-    return out, stats
-
-def pretty_print_map(serialized: bytes, colors=False):
-    color_map = color_map_colors if colors else color_map_dummy
-
-    # The first item gives out offset of first value, which can be used to
-    # calculate total value count
-    offset = ResultMap.offset_struct.unpack_from(serialized, 0)[0] & 0x00ffffff
-    size = int(offset/4 - 1)
-
-    out = ''
-    for i in range(size):
-        if i: out += '\n'
-        flags = ResultFlag(ResultMap.flags_struct.unpack_from(serialized, i*4 + 3)[0])
-        extra = []
-        if flags & ResultFlag._TYPE == ResultFlag.ALIAS:
-            extra += ['alias={}'.format(ResultMap.alias_struct.unpack_from(serialized, offset)[0])]
-            offset += ResultMap.alias_struct.size
-        if flags & ResultFlag.HAS_PREFIX:
-            extra += ['prefix={}[:{}]'.format(*ResultMap.prefix_struct.unpack_from(serialized, offset))]
-            offset += ResultMap.prefix_struct.size
-        if flags & ResultFlag.HAS_SUFFIX:
-            extra += ['suffix_length={}'.format(ResultMap.suffix_length_struct.unpack_from(serialized, offset)[0])]
-            offset += ResultMap.suffix_length_struct.size
-        if flags & ResultFlag.DEPRECATED:
-            extra += ['deprecated']
-        if flags & ResultFlag.DELETED:
-            extra += ['deleted']
-        if flags & ResultFlag._TYPE:
-            extra += ['type={}'.format((flags & ResultFlag._TYPE).name)]
-        next_offset = ResultMap.offset_struct.unpack_from(serialized, (i + 1)*4)[0] & 0x00ffffff
-        name, _, url = serialized[offset:next_offset].partition(b'\0')
-        out += color_map['cyan'] + str(i) + color_map['blue'] + ': ' + color_map['white'] + name.decode('utf-8') + color_map['blue'] + ' [' + color_map['yellow'] + (color_map['blue'] + ', ' + color_map['yellow']).join(extra) + color_map['blue'] + '] ->' + (' ' + color_map['reset'] + url.decode('utf-8') if url else '')
-        offset = next_offset
-    return out
-
-def pretty_print(serialized: bytes, show_merged=False, show_lookahead_barriers=True, colors=False):
-    magic, version, symbol_count, map_offset = search_data_header_struct.unpack_from(serialized)
-    assert magic == b'MCS'
-    assert version == 0
-
-    pretty_trie, stats = pretty_print_trie(serialized[search_data_header_struct.size:map_offset], show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, colors=colors)
-    pretty_map = pretty_print_map(serialized[map_offset:], colors=colors)
-    return '{} symbols\n'.format(symbol_count) + pretty_trie + '\n' + pretty_map, stats
-
 class TrieSerialization(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -283,7 +156,7 @@ class MapSerialization(unittest.TestCase):
         self.maxDiff = None
 
     def compare(self, serialized: bytes, expected: str):
-        pretty = pretty_print_map(serialized)
+        pretty = pretty_print_map(serialized, entryTypeClass=EntryType)
         #print(pretty)
         self.assertEqual(pretty, expected.strip())
 
@@ -296,7 +169,7 @@ class MapSerialization(unittest.TestCase):
 
     def test_single(self):
         map = ResultMap()
-        self.assertEqual(map.add("Magnum", "namespaceMagnum.html", suffix_length=11, flags=ResultFlag.NAMESPACE), 0)
+        self.assertEqual(map.add("Magnum", "namespaceMagnum.html", suffix_length=11, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE)), 0)
 
         serialized = map.serialize()
         self.compare(serialized, """
@@ -307,11 +180,11 @@ class MapSerialization(unittest.TestCase):
     def test_multiple(self):
         map = ResultMap()
 
-        self.assertEqual(map.add("Math", "namespaceMath.html", flags=ResultFlag.NAMESPACE), 0)
-        self.assertEqual(map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.CLASS), 1)
-        self.assertEqual(map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.CLASS), 2)
-        self.assertEqual(map.add("Math::min()", "namespaceMath.html#abcdef2875", flags=ResultFlag.FUNC), 3)
-        self.assertEqual(map.add("Math::max(int, int)", "namespaceMath.html#abcdef1234", suffix_length=8, flags=ResultFlag.FUNC|ResultFlag.DEPRECATED|ResultFlag.DELETED), 4)
+        self.assertEqual(map.add("Math", "namespaceMath.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE)), 0)
+        self.assertEqual(map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)), 1)
+        self.assertEqual(map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)), 2)
+        self.assertEqual(map.add("Math::min()", "namespaceMath.html#abcdef2875", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC)), 3)
+        self.assertEqual(map.add("Math::max(int, int)", "namespaceMath.html#abcdef1234", suffix_length=8, flags=ResultFlag.from_type(ResultFlag.DEPRECATED|ResultFlag.DELETED, EntryType.FUNC)), 4)
         self.assertEqual(map.add("Rectangle", "", alias=2), 5)
         self.assertEqual(map.add("Rectangle::Rect()", "", suffix_length=2, alias=2), 6)
 
@@ -333,7 +206,7 @@ class Serialization(unittest.TestCase):
         self.maxDiff = None
 
     def compare(self, serialized: bytes, expected: str):
-        pretty = pretty_print(serialized)[0]
+        pretty = pretty_print(serialized, entryTypeClass=EntryType)[0]
         #print(pretty)
         self.assertEqual(pretty, expected.strip())
 
@@ -341,11 +214,11 @@ class Serialization(unittest.TestCase):
         trie = Trie()
         map = ResultMap()
 
-        trie.insert("math", map.add("Math", "namespaceMath.html", flags=ResultFlag.NAMESPACE))
-        index = map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.CLASS)
+        trie.insert("math", map.add("Math", "namespaceMath.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE)))
+        index = map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS))
         trie.insert("math::vector", index)
         trie.insert("vector", index)
-        index = map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.CLASS)
+        index = map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS))
         trie.insert("math::range", index)
         trie.insert("range", index)
 
@@ -372,7 +245,7 @@ class Search(IntegrationTestCase):
 
         with open(os.path.join(self.path, 'html', 'searchdata.bin'), 'rb') as f:
             serialized = f.read()
-            search_data_pretty = pretty_print(serialized)[0]
+            search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0]
         #print(search_data_pretty)
         self.assertEqual(len(serialized), 4695)
         self.assertEqual(search_data_pretty, """
@@ -549,7 +422,7 @@ class SearchLongSuffixLength(IntegrationTestCase):
 
         with open(os.path.join(self.path, 'html', 'searchdata.bin'), 'rb') as f:
             serialized = f.read()
-            search_data_pretty = pretty_print(serialized)[0]
+            search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0]
         #print(search_data_pretty)
         self.assertEqual(len(serialized), 382)
         # The parameters get cut off with an ellipsis
@@ -578,6 +451,6 @@ if __name__ == '__main__': # pragma: no cover
     args = parser.parse_args()
 
     with open(args.file, 'rb') as f:
-        out, stats = pretty_print(f.read(), show_merged=args.show_merged, show_lookahead_barriers=args.show_lookahead_barriers, colors=args.colors)
+        out, stats = pretty_print(f.read(), entryTypeClass=EntryType, show_merged=args.show_merged, show_lookahead_barriers=args.show_lookahead_barriers, colors=args.colors)
         print(out)
         if args.show_stats: print(stats, file=sys.stderr)