From: Vladimír Vondruš Date: Sun, 14 Jul 2019 23:14:36 +0000 (+0200) Subject: documentation: first step to make the result types backend-specific. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=8a46373cec9b5e53ce17bdef792fdc06599fe596;p=blog.git documentation: first step to make the result types backend-specific. The type is now a part of the doxygen script and is added as plain bits to ResultFlag. Next step is encoding the type mapping to the search data and then reading that back in the JS frontend. --- diff --git a/documentation/_search.py b/documentation/_search.py index 598e995b..12e31f63 100644 --- a/documentation/_search.py +++ b/documentation/_search.py @@ -31,29 +31,45 @@ from enum import Flag from types import SimpleNamespace as Empty class ResultFlag(Flag): + @staticmethod + def from_type(flag: 'ResultFlag', type) -> 'ResultFlag': + assert not flag & ResultFlag._TYPE + assert type.value > 0 and type.value <= 0xf + return flag|ResultFlag(type.value << 4) + + @property + def type(self): + return (int(self.value) >> 4) & 0xf + + NONE = 0 + HAS_SUFFIX = 1 << 0 HAS_PREFIX = 1 << 3 DEPRECATED = 1 << 1 - DELETED = 1 << 2 + DELETED = 1 << 2 # TODO: this is C++-specific, put aside as well? # Result type. Order defines order in which equally-named symbols appear in - # search results. Keep in sync with search.js. + # search results, every backend supplies its own, ranging from 1 << 4 to + # 15 << 4. _TYPE = 0xf << 4 ALIAS = 0 << 4 # This one gets the type from the referenced result - PAGE = 1 << 4 - NAMESPACE = 2 << 4 - GROUP = 3 << 4 - CLASS = 4 << 4 - STRUCT = 5 << 4 - UNION = 6 << 4 - TYPEDEF = 7 << 4 - DIR = 8 << 4 - FILE = 9 << 4 - FUNC = 10 << 4 - DEFINE = 11 << 4 - ENUM = 12 << 4 - ENUM_VALUE = 13 << 4 - VAR = 14 << 4 + + # Otherwise it says "32 is not a valid ResultFlag" + _TYPE01 = 1 << 4 + _TYPE02 = 2 << 4 + _TYPE03 = 3 << 4 + _TYPE04 = 4 << 4 + _TYPE05 = 5 << 4 + _TYPE06 = 6 << 4 + _TYPE07 = 7 << 4 + _TYPE08 = 8 << 4 + _TYPE09 = 9 << 4 + _TYPE10 = 10 << 4 + _TYPE11 = 11 << 4 + _TYPE12 = 12 << 4 + _TYPE13 = 13 << 4 + _TYPE14 = 14 << 4 + _TYPE15 = 15 << 4 class ResultMap: # item 1 flags | item 2 flags | | item N flags | file | item 1 | @@ -351,3 +367,131 @@ def serialize_search_data(trie: Trie, map: ResultMap, symbol_count, merge_subtre def base85encode_search_data(data: bytearray) -> bytearray: return (b"/* Generated by https://mcss.mosra.cz/documentation/doxygen/. Do not edit. */\n" + b"Search.load('" + base64.b85encode(data, True) + b"');\n") + +def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, show_merged, show_lookahead_barriers, color_map) -> str: + # Visualize where the trees were merged + if show_merged and base_offset in hashtable: + return color_map['red'] + '#' + color_map['reset'] + + stats.node_count += 1 + + out = '' + result_count, child_count = Trie.header_struct.unpack_from(serialized, base_offset) + stats.max_node_results = max(result_count, stats.max_node_results) + stats.max_node_children = max(child_count, stats.max_node_children) + offset = base_offset + Trie.header_struct.size + + # print results, if any + if result_count: + out += color_map['blue'] + ' [' + for i in range(result_count): + if i: out += color_map['blue']+', ' + result = Trie.result_struct.unpack_from(serialized, offset)[0] + stats.max_node_result_index = max(result, stats.max_node_result_index) + out += color_map['cyan'] + str(result) + offset += Trie.result_struct.size + out += color_map['blue'] + ']' + + # print children, if any + for i in range(child_count): + if result_count or i: + out += color_map['reset'] + '\n' + out += color_map['blue'] + indent + color_map['white'] + char = Trie.child_char_struct.unpack_from(serialized, offset + 3)[0] + if char <= 127: + out += chr(char) + else: + out += color_map['reset'] + hex(char) + if (show_lookahead_barriers and Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00800000): + out += color_map['green'] + '$' + if char > 127 or (show_lookahead_barriers and Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00800000): + out += color_map['reset'] + '\n' + color_map['blue'] + indent + ' ' + color_map['white'] + child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x007fffff + stats.max_node_child_offset = max(child_offset, stats.max_node_child_offset) + offset += Trie.child_struct.size + out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if child_count > 1 else ' '), show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map) + child_count += 1 + + hashtable[base_offset] = True + return out + +color_map_colors = {'blue': '\033[0;34m', + 'white': '\033[1;39m', + 'red': '\033[1;31m', + 'green': '\033[1;32m', + 'cyan': '\033[1;36m', + 'yellow': '\033[1;33m', + 'reset': '\033[0m'} + +color_map_dummy = {'blue': '', + 'white': '', + 'red': '', + 'green': '', + 'cyan': '', + 'yellow': '', + 'reset': ''} + +def pretty_print_trie(serialized: bytes, show_merged=False, show_lookahead_barriers=True, colors=False): + color_map = color_map_colors if colors else color_map_dummy + + hashtable = {} + + stats = Empty() + stats.node_count = 0 + stats.max_node_results = 0 + stats.max_node_children = 0 + stats.max_node_result_index = 0 + stats.max_node_child_offset = 0 + + out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map) + if out: out = color_map['white'] + out + stats = """ +node count: {} +max node results: {} +max node children: {} +max node result index: {} +max node child offset: {}""".lstrip().format(stats.node_count, stats.max_node_results, stats.max_node_children, stats.max_node_result_index, stats.max_node_child_offset) + return out, stats + +def pretty_print_map(serialized: bytes, *, entryTypeClass, colors=False): + color_map = color_map_colors if colors else color_map_dummy + + # The first item gives out offset of first value, which can be used to + # calculate total value count + offset = ResultMap.offset_struct.unpack_from(serialized, 0)[0] & 0x00ffffff + size = int(offset/4 - 1) + + out = '' + for i in range(size): + if i: out += '\n' + flags = ResultFlag(ResultMap.flags_struct.unpack_from(serialized, i*4 + 3)[0]) + extra = [] + if flags & ResultFlag._TYPE == ResultFlag.ALIAS: + extra += ['alias={}'.format(ResultMap.alias_struct.unpack_from(serialized, offset)[0])] + offset += ResultMap.alias_struct.size + if flags & ResultFlag.HAS_PREFIX: + extra += ['prefix={}[:{}]'.format(*ResultMap.prefix_struct.unpack_from(serialized, offset))] + offset += ResultMap.prefix_struct.size + if flags & ResultFlag.HAS_SUFFIX: + extra += ['suffix_length={}'.format(ResultMap.suffix_length_struct.unpack_from(serialized, offset)[0])] + offset += ResultMap.suffix_length_struct.size + if flags & ResultFlag.DEPRECATED: + extra += ['deprecated'] + if flags & ResultFlag.DELETED: + extra += ['deleted'] + if flags & ResultFlag._TYPE: + extra += ['type={}'.format(entryTypeClass(flags.type).name)] + next_offset = ResultMap.offset_struct.unpack_from(serialized, (i + 1)*4)[0] & 0x00ffffff + name, _, url = serialized[offset:next_offset].partition(b'\0') + out += color_map['cyan'] + str(i) + color_map['blue'] + ': ' + color_map['white'] + name.decode('utf-8') + color_map['blue'] + ' [' + color_map['yellow'] + (color_map['blue'] + ', ' + color_map['yellow']).join(extra) + color_map['blue'] + '] ->' + (' ' + color_map['reset'] + url.decode('utf-8') if url else '') + offset = next_offset + return out + +def pretty_print(serialized: bytes, *, entryTypeClass, show_merged=False, show_lookahead_barriers=True, colors=False): + magic, version, symbol_count, map_offset = search_data_header_struct.unpack_from(serialized) + assert magic == b'MCS' + assert version == 0 + + pretty_trie, stats = pretty_print_trie(serialized[search_data_header_struct.size:map_offset], show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, colors=colors) + pretty_map = pretty_print_map(serialized[map_offset:], entryTypeClass=entryTypeClass, colors=colors) + return '{} symbols\n'.format(symbol_count) + pretty_trie + '\n' + pretty_map, stats diff --git a/documentation/doxygen.py b/documentation/doxygen.py index 014b6f58..388befb8 100755 --- a/documentation/doxygen.py +++ b/documentation/doxygen.py @@ -27,6 +27,7 @@ import xml.etree.ElementTree as ET import argparse import copy +import enum import sys import re import html @@ -54,6 +55,22 @@ import latex2svg import latex2svgextra import ansilexer +class EntryType(enum.Enum): + PAGE = 1 + NAMESPACE = 2 + GROUP = 3 + CLASS = 4 + STRUCT = 5 + UNION = 6 + TYPEDEF = 7 + DIR = 8 + FILE = 9 + FUNC = 10 + DEFINE = 11 + ENUM = 12 + ENUM_VALUE = 13 + VAR = 14 + xref_id_rx = re.compile(r"""(.*)_1(_[a-z-]+[0-9]+|@)$""") slugify_nonalnum_rx = re.compile(r"""[^\w\s-]""") slugify_hyphens_rx = re.compile(r"""[-\s]+""") @@ -1671,7 +1688,7 @@ def parse_enum(state: State, element: ET.Element): if value.brief or value.description: if enum.base_url == state.current_compound_url and not state.doxyfile['M_SEARCH_DISABLED']: result = Empty() - result.flags = ResultFlag.ENUM_VALUE|(ResultFlag.DEPRECATED if value.is_deprecated else ResultFlag(0)) + result.flags = ResultFlag.from_type(ResultFlag.DEPRECATED if value.is_deprecated else ResultFlag(0), EntryType.ENUM_VALUE) result.url = enum.base_url + '#' + value.id result.prefix = state.current_prefix + [enum.name] result.name = value.name @@ -1696,7 +1713,7 @@ def parse_enum(state: State, element: ET.Element): if enum.brief or enum.has_details or enum.has_value_details: if enum.base_url == state.current_compound_url and not state.doxyfile['M_SEARCH_DISABLED']: result = Empty() - result.flags = ResultFlag.ENUM|(ResultFlag.DEPRECATED if enum.is_deprecated else ResultFlag(0)) + result.flags = ResultFlag.from_type(ResultFlag.DEPRECATED if enum.is_deprecated else ResultFlag(0), EntryType.ENUM) result.url = enum.base_url + '#' + enum.id result.prefix = state.current_prefix result.name = enum.name @@ -1772,7 +1789,7 @@ def parse_typedef(state: State, element: ET.Element): # Avoid duplicates in search if typedef.base_url == state.current_compound_url and not state.doxyfile['M_SEARCH_DISABLED']: result = Empty() - result.flags = ResultFlag.TYPEDEF|(ResultFlag.DEPRECATED if typedef.is_deprecated else ResultFlag(0)) + result.flags = ResultFlag.from_type(ResultFlag.DEPRECATED if typedef.is_deprecated else ResultFlag(0), EntryType.TYPEDEF) result.url = typedef.base_url + '#' + typedef.id result.prefix = state.current_prefix result.name = typedef.name @@ -1917,7 +1934,7 @@ def parse_func(state: State, element: ET.Element): # Avoid duplicates in search if func.base_url == state.current_compound_url and not state.doxyfile['M_SEARCH_DISABLED']: result = Empty() - result.flags = ResultFlag.FUNC|(ResultFlag.DEPRECATED if func.is_deprecated else ResultFlag(0))|(ResultFlag.DELETED if func.is_deleted else ResultFlag(0)) + result.flags = ResultFlag.from_type((ResultFlag.DEPRECATED if func.is_deprecated else ResultFlag(0))|(ResultFlag.DELETED if func.is_deleted else ResultFlag(0)), EntryType.FUNC) result.url = func.base_url + '#' + func.id result.prefix = state.current_prefix result.name = func.name @@ -1953,7 +1970,7 @@ def parse_var(state: State, element: ET.Element): # Avoid duplicates in search if var.base_url == state.current_compound_url and not state.doxyfile['M_SEARCH_DISABLED']: result = Empty() - result.flags = ResultFlag.VAR|(ResultFlag.DEPRECATED if var.is_deprecated else ResultFlag(0)) + result.flags = ResultFlag.from_type(ResultFlag.DEPRECATED if var.is_deprecated else ResultFlag(0), EntryType.VAR) result.url = var.base_url + '#' + var.id result.prefix = state.current_prefix result.name = var.name @@ -1993,7 +2010,7 @@ def parse_define(state: State, element: ET.Element): # Avoid duplicates in search if define.base_url == state.current_compound_url and not state.doxyfile['M_SEARCH_DISABLED']: result = Empty() - result.flags = ResultFlag.DEFINE|(ResultFlag.DEPRECATED if define.is_deprecated else ResultFlag(0)) + result.flags = ResultFlag.from_type(ResultFlag.DEPRECATED if define.is_deprecated else ResultFlag(0), EntryType.DEFINE) result.url = define.base_url + '#' + define.id result.prefix = [] result.name = define.name @@ -2250,11 +2267,11 @@ def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers= # Decide on prefix joiner. Defines are among the :: ones as well, # because we need to add the function macros twice -- but they have no # prefix, so it's okay. - if result.flags & ResultFlag._TYPE in [ResultFlag.NAMESPACE, ResultFlag.CLASS, ResultFlag.STRUCT, ResultFlag.UNION, ResultFlag.TYPEDEF, ResultFlag.FUNC, ResultFlag.VAR, ResultFlag.ENUM, ResultFlag.ENUM_VALUE, ResultFlag.DEFINE]: + if EntryType(result.flags.type) in [EntryType.NAMESPACE, EntryType.CLASS, EntryType.STRUCT, EntryType.UNION, EntryType.TYPEDEF, EntryType.FUNC, EntryType.VAR, EntryType.ENUM, EntryType.ENUM_VALUE, EntryType.DEFINE]: joiner = result_joiner = '::' - elif result.flags & ResultFlag._TYPE in [ResultFlag.DIR, ResultFlag.FILE]: + elif EntryType(result.flags.type) in [EntryType.DIR, EntryType.FILE]: joiner = result_joiner = '/' - elif result.flags & ResultFlag._TYPE in [ResultFlag.PAGE, ResultFlag.GROUP]: + elif EntryType(result.flags.type) in [EntryType.PAGE, EntryType.GROUP]: joiner = '' result_joiner = ' » ' else: @@ -3078,25 +3095,25 @@ def parse_xml(state: State, xml: str): # TODO: add example sources there? how? if not state.doxyfile['M_SEARCH_DISABLED'] and not compound.kind == 'example' and (compound.kind == 'group' or compound.brief or compounddef.find('detaileddescription')): if compound.kind == 'namespace': - kind = ResultFlag.NAMESPACE + kind = EntryType.NAMESPACE elif compound.kind == 'struct': - kind = ResultFlag.STRUCT + kind = EntryType.STRUCT elif compound.kind == 'class': - kind = ResultFlag.CLASS + kind = EntryType.CLASS elif compound.kind == 'union': - kind = ResultFlag.UNION + kind = EntryType.UNION elif compound.kind == 'dir': - kind = ResultFlag.DIR + kind = EntryType.DIR elif compound.kind == 'file': - kind = ResultFlag.FILE + kind = EntryType.FILE elif compound.kind == 'page': - kind = ResultFlag.PAGE + kind = EntryType.PAGE elif compound.kind == 'group': - kind = ResultFlag.GROUP + kind = EntryType.GROUP else: assert False # pragma: no cover result = Empty() - result.flags = kind|(ResultFlag.DEPRECATED if compound.is_deprecated else ResultFlag(0)) + result.flags = ResultFlag.from_type(ResultFlag.DEPRECATED if compound.is_deprecated else ResultFlag(0), kind) result.url = compound.url result.prefix = state.current_prefix[:-1] result.name = state.current_prefix[-1] diff --git a/documentation/test_doxygen/populate-js-test-data.py b/documentation/test_doxygen/populate-js-test-data.py index 86fa998f..3d39bb32 100755 --- a/documentation/test_doxygen/populate-js-test-data.py +++ b/documentation/test_doxygen/populate-js-test-data.py @@ -30,6 +30,7 @@ import sys import pathlib sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')) +from doxygen import EntryType from _search import Trie, ResultMap, ResultFlag, serialize_search_data basedir = pathlib.Path(os.path.dirname(os.path.realpath(__file__)))/'js-test-data' @@ -46,25 +47,25 @@ with open(basedir/'empty.bin', 'wb') as f: trie = Trie() map = ResultMap() -trie.insert("math", map.add("Math", "namespaceMath.html", flags=ResultFlag.NAMESPACE)) -index = map.add("Math::min(int, int)", "namespaceMath.html#min", suffix_length=8, flags=ResultFlag.FUNC) +trie.insert("math", map.add("Math", "namespaceMath.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE))) +index = map.add("Math::min(int, int)", "namespaceMath.html#min", suffix_length=8, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC)) trie.insert("math::min()", index, lookahead_barriers=[4]) trie.insert("min()", index) -index = map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.CLASS|ResultFlag.DEPRECATED) +index = map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.from_type(ResultFlag.DEPRECATED, EntryType.CLASS)) trie.insert("math::vector", index) trie.insert("vector", index) -index = map.add("Math::Vector::min() const", "classMath_1_1Vector.html#min", suffix_length=6, flags=ResultFlag.FUNC) +index = map.add("Math::Vector::min() const", "classMath_1_1Vector.html#min", suffix_length=6, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC)) trie.insert("math::vector::min()", index, lookahead_barriers=[4, 12]) trie.insert("vector::min()", index, lookahead_barriers=[6]) trie.insert("min()", index) -range_index = map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.CLASS) +range_index = map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)) trie.insert("math::range", range_index) trie.insert("range", range_index) -index = map.add("Math::Range::min() const", "classMath_1_1Range.html#min", suffix_length=6, flags=ResultFlag.FUNC|ResultFlag.DELETED) +index = map.add("Math::Range::min() const", "classMath_1_1Range.html#min", suffix_length=6, flags=ResultFlag.from_type(ResultFlag.DELETED, EntryType.FUNC)) trie.insert("math::range::min()", index, lookahead_barriers=[4, 11]) trie.insert("range::min()", index, lookahead_barriers=[5]) trie.insert("min()", index) -trie.insert("subpage", map.add("Page » Subpage", "subpage.html", flags=ResultFlag.PAGE)) +trie.insert("subpage", map.add("Page » Subpage", "subpage.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.PAGE))) trie.insert("rectangle", map.add("Rectangle", "", alias=range_index)) trie.insert("rect", map.add("Rectangle::Rect()", "", suffix_length=2, alias=range_index)) @@ -77,18 +78,18 @@ with open(basedir/'searchdata.b85', 'wb') as f: trie = Trie() map = ResultMap() -trie.insert("hýždě", map.add("Hýždě", "#a", flags=ResultFlag.PAGE)) -trie.insert("hárá", map.add("Hárá", "#b", flags=ResultFlag.PAGE)) +trie.insert("hýždě", map.add("Hýždě", "#a", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.PAGE))) +trie.insert("hárá", map.add("Hárá", "#b", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.PAGE))) with open(basedir/'unicode.bin', 'wb') as f: f.write(serialize_search_data(trie, map, 2)) trie = Trie() map = ResultMap() -trie.insert("magnum", map.add("Magnum", "namespaceMagnum.html", flags=ResultFlag.NAMESPACE)) -trie.insert("math", map.add("Magnum::Math", "namespaceMagnum_1_1Math.html", flags=ResultFlag.NAMESPACE)) -trie.insert("geometry", map.add("Magnum::Math::Geometry", "namespaceMagnum_1_1Math_1_1Geometry.html", flags=ResultFlag.NAMESPACE)) -trie.insert("range", map.add("Magnum::Math::Range", "classMagnum_1_1Math_1_1Range.html", flags=ResultFlag.CLASS)) +trie.insert("magnum", map.add("Magnum", "namespaceMagnum.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE))) +trie.insert("math", map.add("Magnum::Math", "namespaceMagnum_1_1Math.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE))) +trie.insert("geometry", map.add("Magnum::Math::Geometry", "namespaceMagnum_1_1Math_1_1Geometry.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE))) +trie.insert("range", map.add("Magnum::Math::Range", "classMagnum_1_1Math_1_1Range.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS))) with open(basedir/'nested.bin', 'wb') as f: f.write(serialize_search_data(trie, map, 4)) diff --git a/documentation/test_doxygen/test_search.py b/documentation/test_doxygen/test_search.py index d9fa9103..37a9d5a8 100755 --- a/documentation/test_doxygen/test_search.py +++ b/documentation/test_doxygen/test_search.py @@ -30,138 +30,11 @@ import sys import unittest from types import SimpleNamespace as Empty -from _search import Trie, ResultMap, ResultFlag, serialize_search_data, search_data_header_struct +from doxygen import EntryType +from _search import Trie, ResultMap, ResultFlag, serialize_search_data, pretty_print_trie, pretty_print_map, pretty_print from test_doxygen import IntegrationTestCase -def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, show_merged, show_lookahead_barriers, color_map) -> str: - # Visualize where the trees were merged - if show_merged and base_offset in hashtable: - return color_map['red'] + '#' + color_map['reset'] - - stats.node_count += 1 - - out = '' - result_count, child_count = Trie.header_struct.unpack_from(serialized, base_offset) - stats.max_node_results = max(result_count, stats.max_node_results) - stats.max_node_children = max(child_count, stats.max_node_children) - offset = base_offset + Trie.header_struct.size - - # print results, if any - if result_count: - out += color_map['blue'] + ' [' - for i in range(result_count): - if i: out += color_map['blue']+', ' - result = Trie.result_struct.unpack_from(serialized, offset)[0] - stats.max_node_result_index = max(result, stats.max_node_result_index) - out += color_map['cyan'] + str(result) - offset += Trie.result_struct.size - out += color_map['blue'] + ']' - - # print children, if any - for i in range(child_count): - if result_count or i: - out += color_map['reset'] + '\n' - out += color_map['blue'] + indent + color_map['white'] - char = Trie.child_char_struct.unpack_from(serialized, offset + 3)[0] - if char <= 127: - out += chr(char) - else: - out += color_map['reset'] + hex(char) - if (show_lookahead_barriers and Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00800000): - out += color_map['green'] + '$' - if char > 127 or (show_lookahead_barriers and Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00800000): - out += color_map['reset'] + '\n' + color_map['blue'] + indent + ' ' + color_map['white'] - child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x007fffff - stats.max_node_child_offset = max(child_offset, stats.max_node_child_offset) - offset += Trie.child_struct.size - out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if child_count > 1 else ' '), show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map) - child_count += 1 - - hashtable[base_offset] = True - return out - -color_map_colors = {'blue': '\033[0;34m', - 'white': '\033[1;39m', - 'red': '\033[1;31m', - 'green': '\033[1;32m', - 'cyan': '\033[1;36m', - 'yellow': '\033[1;33m', - 'reset': '\033[0m'} - -color_map_dummy = {'blue': '', - 'white': '', - 'red': '', - 'green': '', - 'cyan': '', - 'yellow': '', - 'reset': ''} - -def pretty_print_trie(serialized: bytes, show_merged=False, show_lookahead_barriers=True, colors=False): - color_map = color_map_colors if colors else color_map_dummy - - hashtable = {} - - stats = Empty() - stats.node_count = 0 - stats.max_node_results = 0 - stats.max_node_children = 0 - stats.max_node_result_index = 0 - stats.max_node_child_offset = 0 - - out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map) - if out: out = color_map['white'] + out - stats = """ -node count: {} -max node results: {} -max node children: {} -max node result index: {} -max node child offset: {}""".lstrip().format(stats.node_count, stats.max_node_results, stats.max_node_children, stats.max_node_result_index, stats.max_node_child_offset) - return out, stats - -def pretty_print_map(serialized: bytes, colors=False): - color_map = color_map_colors if colors else color_map_dummy - - # The first item gives out offset of first value, which can be used to - # calculate total value count - offset = ResultMap.offset_struct.unpack_from(serialized, 0)[0] & 0x00ffffff - size = int(offset/4 - 1) - - out = '' - for i in range(size): - if i: out += '\n' - flags = ResultFlag(ResultMap.flags_struct.unpack_from(serialized, i*4 + 3)[0]) - extra = [] - if flags & ResultFlag._TYPE == ResultFlag.ALIAS: - extra += ['alias={}'.format(ResultMap.alias_struct.unpack_from(serialized, offset)[0])] - offset += ResultMap.alias_struct.size - if flags & ResultFlag.HAS_PREFIX: - extra += ['prefix={}[:{}]'.format(*ResultMap.prefix_struct.unpack_from(serialized, offset))] - offset += ResultMap.prefix_struct.size - if flags & ResultFlag.HAS_SUFFIX: - extra += ['suffix_length={}'.format(ResultMap.suffix_length_struct.unpack_from(serialized, offset)[0])] - offset += ResultMap.suffix_length_struct.size - if flags & ResultFlag.DEPRECATED: - extra += ['deprecated'] - if flags & ResultFlag.DELETED: - extra += ['deleted'] - if flags & ResultFlag._TYPE: - extra += ['type={}'.format((flags & ResultFlag._TYPE).name)] - next_offset = ResultMap.offset_struct.unpack_from(serialized, (i + 1)*4)[0] & 0x00ffffff - name, _, url = serialized[offset:next_offset].partition(b'\0') - out += color_map['cyan'] + str(i) + color_map['blue'] + ': ' + color_map['white'] + name.decode('utf-8') + color_map['blue'] + ' [' + color_map['yellow'] + (color_map['blue'] + ', ' + color_map['yellow']).join(extra) + color_map['blue'] + '] ->' + (' ' + color_map['reset'] + url.decode('utf-8') if url else '') - offset = next_offset - return out - -def pretty_print(serialized: bytes, show_merged=False, show_lookahead_barriers=True, colors=False): - magic, version, symbol_count, map_offset = search_data_header_struct.unpack_from(serialized) - assert magic == b'MCS' - assert version == 0 - - pretty_trie, stats = pretty_print_trie(serialized[search_data_header_struct.size:map_offset], show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, colors=colors) - pretty_map = pretty_print_map(serialized[map_offset:], colors=colors) - return '{} symbols\n'.format(symbol_count) + pretty_trie + '\n' + pretty_map, stats - class TrieSerialization(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -283,7 +156,7 @@ class MapSerialization(unittest.TestCase): self.maxDiff = None def compare(self, serialized: bytes, expected: str): - pretty = pretty_print_map(serialized) + pretty = pretty_print_map(serialized, entryTypeClass=EntryType) #print(pretty) self.assertEqual(pretty, expected.strip()) @@ -296,7 +169,7 @@ class MapSerialization(unittest.TestCase): def test_single(self): map = ResultMap() - self.assertEqual(map.add("Magnum", "namespaceMagnum.html", suffix_length=11, flags=ResultFlag.NAMESPACE), 0) + self.assertEqual(map.add("Magnum", "namespaceMagnum.html", suffix_length=11, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE)), 0) serialized = map.serialize() self.compare(serialized, """ @@ -307,11 +180,11 @@ class MapSerialization(unittest.TestCase): def test_multiple(self): map = ResultMap() - self.assertEqual(map.add("Math", "namespaceMath.html", flags=ResultFlag.NAMESPACE), 0) - self.assertEqual(map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.CLASS), 1) - self.assertEqual(map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.CLASS), 2) - self.assertEqual(map.add("Math::min()", "namespaceMath.html#abcdef2875", flags=ResultFlag.FUNC), 3) - self.assertEqual(map.add("Math::max(int, int)", "namespaceMath.html#abcdef1234", suffix_length=8, flags=ResultFlag.FUNC|ResultFlag.DEPRECATED|ResultFlag.DELETED), 4) + self.assertEqual(map.add("Math", "namespaceMath.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE)), 0) + self.assertEqual(map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)), 1) + self.assertEqual(map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)), 2) + self.assertEqual(map.add("Math::min()", "namespaceMath.html#abcdef2875", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC)), 3) + self.assertEqual(map.add("Math::max(int, int)", "namespaceMath.html#abcdef1234", suffix_length=8, flags=ResultFlag.from_type(ResultFlag.DEPRECATED|ResultFlag.DELETED, EntryType.FUNC)), 4) self.assertEqual(map.add("Rectangle", "", alias=2), 5) self.assertEqual(map.add("Rectangle::Rect()", "", suffix_length=2, alias=2), 6) @@ -333,7 +206,7 @@ class Serialization(unittest.TestCase): self.maxDiff = None def compare(self, serialized: bytes, expected: str): - pretty = pretty_print(serialized)[0] + pretty = pretty_print(serialized, entryTypeClass=EntryType)[0] #print(pretty) self.assertEqual(pretty, expected.strip()) @@ -341,11 +214,11 @@ class Serialization(unittest.TestCase): trie = Trie() map = ResultMap() - trie.insert("math", map.add("Math", "namespaceMath.html", flags=ResultFlag.NAMESPACE)) - index = map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.CLASS) + trie.insert("math", map.add("Math", "namespaceMath.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE))) + index = map.add("Math::Vector", "classMath_1_1Vector.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)) trie.insert("math::vector", index) trie.insert("vector", index) - index = map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.CLASS) + index = map.add("Math::Range", "classMath_1_1Range.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)) trie.insert("math::range", index) trie.insert("range", index) @@ -372,7 +245,7 @@ class Search(IntegrationTestCase): with open(os.path.join(self.path, 'html', 'searchdata.bin'), 'rb') as f: serialized = f.read() - search_data_pretty = pretty_print(serialized)[0] + search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0] #print(search_data_pretty) self.assertEqual(len(serialized), 4695) self.assertEqual(search_data_pretty, """ @@ -549,7 +422,7 @@ class SearchLongSuffixLength(IntegrationTestCase): with open(os.path.join(self.path, 'html', 'searchdata.bin'), 'rb') as f: serialized = f.read() - search_data_pretty = pretty_print(serialized)[0] + search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0] #print(search_data_pretty) self.assertEqual(len(serialized), 382) # The parameters get cut off with an ellipsis @@ -578,6 +451,6 @@ if __name__ == '__main__': # pragma: no cover args = parser.parse_args() with open(args.file, 'rb') as f: - out, stats = pretty_print(f.read(), show_merged=args.show_merged, show_lookahead_barriers=args.show_lookahead_barriers, colors=args.colors) + out, stats = pretty_print(f.read(), entryTypeClass=EntryType, show_merged=args.show_merged, show_lookahead_barriers=args.show_lookahead_barriers, colors=args.colors) print(out) if args.show_stats: print(stats, file=sys.stderr)