From: Vladimír Vondruš Date: Sun, 4 Feb 2018 21:09:52 +0000 (+0100) Subject: doxygen: add compound to search data during the actual parse. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=402dd22ed458e61f03b03a2221cb5a94483b49fe;p=blog.git doxygen: add compound to search data during the actual parse. So it's prepared for adding extra search stuff that needs to be parsed, such as search keywords. I went through the test case and manually reordered by hand to ensure no functionality was lost while doing this. It was not, quite the opposite -- groups were not properly marked as deprecated. Now they are. The file size stays the same, as it should. --- diff --git a/doxygen/dox2html5.py b/doxygen/dox2html5.py index 9d3c2ac1..92430de2 100755 --- a/doxygen/dox2html5.py +++ b/doxygen/dox2html5.py @@ -1659,120 +1659,80 @@ def postprocess_state(state: State): if state.doxyfile['M_FAVICON']: state.doxyfile['M_FAVICON'] = (state.doxyfile['M_FAVICON'], mimetypes.guess_type(state.doxyfile['M_FAVICON'])[0]) -def _build_search_data(state: State, prefix, id: str, trie: Trie, map: ResultMap, add_lookahead_barriers): - compound = state.compounds[id] - if not compound.brief and not compound.has_details: return 0 - - # Add current item name to prefix list - prefixed_name = prefix + [compound.leaf_name] - - if compound.kind == 'namespace': - kind = ResultFlag.NAMESPACE - elif compound.kind == 'struct': - kind = ResultFlag.STRUCT - elif compound.kind == 'class': - kind = ResultFlag.CLASS - elif compound.kind == 'union': - kind = ResultFlag.UNION - elif compound.kind == 'dir': - kind = ResultFlag.DIR - elif compound.kind == 'file': - kind = ResultFlag.FILE - elif compound.kind == 'page': - kind = ResultFlag.PAGE - elif compound.kind == 'group': - kind = ResultFlag.GROUP - else: assert False # pragma: no cover - - # Calculate fully-qualified name - if compound.kind in ['namespace', 'struct', 'class', 'union']: - joiner = result_joiner = '::' - elif compound.kind in ['file', 'dir']: - joiner = result_joiner = '/' - elif compound.kind in ['page', 'group']: - joiner = '' - result_joiner = ' » ' - else: assert False # pragma: no cover - - # If just a leaf name, add it once - if not joiner: - result_name = result_joiner.join(prefixed_name) - - # TODO: escape elsewhere so i don't have to unescape here - index = map.add(html.unescape(result_name), compound.url, flags=kind) - trie.insert(html.unescape(compound.leaf_name).lower(), index) - - # Otherwise add it multiple times with all possible prefixes - else: - # TODO: escape elsewhere so i don't have to unescape here - index = map.add(html.unescape(result_joiner.join(prefixed_name)), compound.url, flags=kind|(ResultFlag.DEPRECATED if compound.is_deprecated else ResultFlag(0))) - for i in range(len(prefixed_name)): - lookahead_barriers = [] - name = '' - for j in prefixed_name[i:]: - if name: - lookahead_barriers += [len(name)] - name += joiner - name += html.unescape(j) - trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers if add_lookahead_barriers else []) - - for i in compound.children: - if i in state.compounds: - _build_search_data(state, prefixed_name, i, trie, map, add_lookahead_barriers=add_lookahead_barriers) - def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=True, merge_prefixes=True) -> bytearray: trie = Trie() map = ResultMap() - for id, compound in state.compounds.items(): - if compound.parent: continue # start from the root - _build_search_data(state, [], id, trie, map, add_lookahead_barriers=add_lookahead_barriers) - - # TODO: examples? - strip_tags_re = re.compile('<.*?>') def strip_tags(text): return strip_tags_re.sub('', text) for result in state.search: - # Handle function arguments - name_with_args = result.name - name = result.name - suffix_length = 0 - if hasattr(result, 'params') and result.params is not None: - params = strip_tags(', '.join(result.params)) - name_with_args += '(' + params + ')' - suffix_length += len(html.unescape(params)) + 2 - if hasattr(result, 'suffix') and result.suffix: - name_with_args += result.suffix + # Decide on prefix joiner. Defines are among the :: ones as well, + # because we need to add the function macros twice -- but they have no + # prefix, so it's okay. + if result.flags & ResultFlag._TYPE in [ResultFlag.NAMESPACE, ResultFlag.CLASS, ResultFlag.STRUCT, ResultFlag.UNION, ResultFlag.TYPEDEF, ResultFlag.FUNC, ResultFlag.VAR, ResultFlag.ENUM, ResultFlag.ENUM_VALUE, ResultFlag.DEFINE]: + joiner = result_joiner = '::' + elif result.flags & ResultFlag._TYPE in [ResultFlag.DIR, ResultFlag.FILE]: + joiner = result_joiner = '/' + elif result.flags & ResultFlag._TYPE in [ResultFlag.PAGE, ResultFlag.GROUP]: + joiner = '' + result_joiner = ' » ' + else: + print(result.flags & ResultFlag._TYPE) + assert False # pragma: no cover + + # If just a leaf name, add it once + if not joiner: + assert result_joiner + result_name = result_joiner.join(result.prefix + [result.name]) + # TODO: escape elsewhere so i don't have to unescape here - suffix_length += len(html.unescape(result.suffix)) - - # TODO: escape elsewhere so i don't have to unescape here - index = map.add(html.unescape('::'.join(result.prefix + [name_with_args])), result.url, suffix_length=suffix_length, flags=result.flags) - - # Add functions and function macros the second time with () appended, - # everything is the same except for suffix length which is 2 chars - # shorter - if hasattr(result, 'params') and result.params is not None: - index_args = map.add(html.unescape('::'.join(result.prefix + [name_with_args])), result.url, - suffix_length=suffix_length - 2, flags=result.flags) - - prefixed_name = result.prefix + [name] - for i in range(len(prefixed_name)): - lookahead_barriers = [] - name = '' - for j in prefixed_name[i:]: - if name: - lookahead_barriers += [len(name)] - name += '::' - name += html.unescape(j) - trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers if add_lookahead_barriers else []) - - # Add functions and function macros the second time with () - # appended, referencing the other result that expects () appended + index = map.add(html.unescape(result_name), result.url, flags=result.flags) + trie.insert(html.unescape(result.name).lower(), index) + + # Otherwise add it multiple times with all possible prefixes + else: + # Handle function arguments + name_with_args = result.name + name = result.name + suffix_length = 0 if hasattr(result, 'params') and result.params is not None: - trie.insert(name.lower() + '()', index_args, lookahead_barriers=lookahead_barriers + [len(name)] if add_lookahead_barriers else []) + params = strip_tags(', '.join(result.params)) + name_with_args += '(' + params + ')' + suffix_length += len(html.unescape(params)) + 2 + if hasattr(result, 'suffix') and result.suffix: + name_with_args += result.suffix + # TODO: escape elsewhere so i don't have to unescape here + suffix_length += len(html.unescape(result.suffix)) + + # TODO: escape elsewhere so i don't have to unescape here + index = map.add(html.unescape(joiner.join(result.prefix + [name_with_args])), result.url, suffix_length=suffix_length, flags=result.flags) + + # Add functions and function macros the second time with () appended, + # everything is the same except for suffix length which is 2 chars + # shorter + if hasattr(result, 'params') and result.params is not None: + index_args = map.add(html.unescape(joiner.join(result.prefix + [name_with_args])), result.url, + suffix_length=suffix_length - 2, flags=result.flags) + + prefixed_name = result.prefix + [name] + for i in range(len(prefixed_name)): + lookahead_barriers = [] + name = '' + for j in prefixed_name[i:]: + if name: + lookahead_barriers += [len(name)] + name += joiner + name += html.unescape(j) + trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers if add_lookahead_barriers else []) + + # Add functions and function macros the second time with () + # appended, referencing the other result that expects () appended. + # The lookahead barrier is at the ( character to avoid the result + # being shown twice. + if hasattr(result, 'params') and result.params is not None: + trie.insert(name.lower() + '()', index_args, lookahead_barriers=lookahead_barriers + [len(name)] if add_lookahead_barriers else []) return serialize_search_data(trie, map, merge_subtrees=merge_subtrees, merge_prefixes=merge_prefixes) @@ -2340,6 +2300,34 @@ def parse_xml(state: State, xml: str): else: compound.breadcrumb = [(compound.name, compound.id + '.html')] + # Add the compound to search data, if it's documented + # TODO: add example sources there? how? + if not state.doxyfile['M_SEARCH_DISABLED'] and not compound.kind == 'example' and (compound.kind == 'group' or compound.brief or compounddef.find('detaileddescription')): + if compound.kind == 'namespace': + kind = ResultFlag.NAMESPACE + elif compound.kind == 'struct': + kind = ResultFlag.STRUCT + elif compound.kind == 'class': + kind = ResultFlag.CLASS + elif compound.kind == 'union': + kind = ResultFlag.UNION + elif compound.kind == 'dir': + kind = ResultFlag.DIR + elif compound.kind == 'file': + kind = ResultFlag.FILE + elif compound.kind == 'page': + kind = ResultFlag.PAGE + elif compound.kind == 'group': + kind = ResultFlag.GROUP + else: assert False # pragma: no cover + + result = Empty() + result.flags = kind|(ResultFlag.DEPRECATED if compound.is_deprecated else ResultFlag(0)) + result.url = compound.url + result.prefix = state.current_prefix[:-1] + result.name = state.current_prefix[-1] + state.search += [result] + parsed = Empty() parsed.version = root.attrib['version'] parsed.compound = compound diff --git a/doxygen/test/test_search.py b/doxygen/test/test_search.py index 1724ef23..33428f5a 100755 --- a/doxygen/test/test_search.py +++ b/doxygen/test/test_search.py @@ -369,131 +369,131 @@ class Search(IntegrationTestCase): #print(search_data_pretty) self.assertEqual(len(serialized), 3712) self.assertEqual(search_data_pretty, """ -deprecated list [0] -|| dir [1] +deprecated_macro [0] +|| | ($ +|| | ) [1] +|| dir [19] || | /$ || | deprecatedfile.h [2] || file.h [2] -|| |oo [38] +|| |oo [29] || || ($ -|| || ) [39] -|| namespace [7] +|| || ) [30] +|| list [18] +|| namespace [31] || | :$ -|| | :deprecatedclass [8] -|| | | struct [9] -|| | | union [10] -|| | | enum [33] +|| | :deprecatedenum [24] || | | | :$ -|| | | | :value [32] -|| | | typedef [36] -|| | | variable [37] -|| | | foo [38] +|| | | | :value [23] +|| | | typedef [27] +|| | | variable [28] +|| | | foo [29] || | | | ($ -|| | | | ) [39] -|| | enum [35] +|| | | | ) [30] +|| | | class [38] +|| | | struct [39] +|| | | union [42] +|| | enum [26] || | | :$ -|| | | :deprecatedvalue [34] -|| class [8] -|| struct [9] -|| union [10] -|| _macro [17] -|| | ($ -|| | ) [18] -|| enum [33] +|| | | :deprecatedvalue [25] +|| enum [24] || | :$ -|| | :value [32] -|| value [34] -|| | riable [37] -|| typedef [36] -|ir [3] +|| | :value [23] +|| value [25] +|| | riable [28] +|| typedef [27] +|| class [38] +|| struct [39] +|| union [42] +|ir [20] || /$ -|| file.h [4] -file.h [4] -|oo [24, 26, 28, 30] +|| file.h [8] +macro [3] +| _function [4] +| ($ +| ) [5] +| _with_params [6] +| | ($ +| | ) [7] +file.h [8] +|oo [9, 11, 13, 15] || ($ -|| ) [25, 27, 29, 31] -a group [5, 6] -| page [15] -namespace [11] +|| ) [10, 12, 14, 16] +namespace [36] | :$ -| :class [12] +| :class [17] | | :$ -| | :foo [24, 26, 28, 30] +| | :foo [9, 11, 13, 15] | | ($ -| | ) [25, 27, 29, 31] -| struct [13] -| union [14] -| enum [41] +| | ) [10, 12, 14, 16] +| enum [33] | | :$ -| | :value [40] -| typedef [42] -| variable [43] -class [12] +| | :value [32] +| typedef [34] +| variable [35] +| struct [40] +| union [43] +class [17] | :$ -| :foo [24, 26, 28, 30] +| :foo [9, 11, 13, 15] | ($ -| ) [25, 27, 29, 31] -struct [13] -|ubpage [16] -union [14] -macro [19] -| _function [20] -| ($ -| ) [21] -| _with_params [22] -| | ($ -| | ) [23] -value [32, 40] -| riable [43] -enum [35, 41] +| ) [10, 12, 14, 16] +a group [21, 22] +| page [37] +value [23, 32] +| riable [35] +enum [26, 33] | :$ -| :deprecatedvalue [34] -| value [40] -typedef [42] -0: Deprecated List [type=PAGE] -> deprecated.html -1: DeprecatedDir [deprecated, type=DIR] -> dir_c6c97faf5a6cbd0f62c27843ce3af4d0.html -2: /DeprecatedFile.h [prefix=1[:0], deprecated, type=FILE] -> DeprecatedFile_8h.html -3: Dir [type=DIR] -> dir_da5033def2d0db76e9883b31b76b3d0c.html -4: /File.h [prefix=3[:0], type=FILE] -> File_8h.html -5: A group [type=GROUP] -> group__deprecated-group.html -6: A group [type=GROUP] -> group__group.html -7: DeprecatedNamespace [deprecated, type=NAMESPACE] -> namespaceDeprecatedNamespace.html -8: ::DeprecatedClass [prefix=7[:0], deprecated, type=STRUCT] -> structDeprecatedNamespace_1_1DeprecatedClass.html -9: ::DeprecatedStruct [prefix=7[:0], deprecated, type=STRUCT] -> structDeprecatedNamespace_1_1DeprecatedStruct.html -10: ::DeprecatedUnion [prefix=7[:0], deprecated, type=UNION] -> unionDeprecatedNamespace_1_1DeprecatedUnion.html -11: Namespace [type=NAMESPACE] -> namespaceNamespace.html -12: ::Class [prefix=11[:0], type=CLASS] -> classNamespace_1_1Class.html -13: ::Struct [prefix=11[:0], type=STRUCT] -> structNamespace_1_1Struct.html -14: ::Union [prefix=11[:0], type=UNION] -> unionNamespace_1_1Union.html -15: A page [type=PAGE] -> page.html -16: » Subpage [prefix=15[:0], type=PAGE] -> subpage.html -17: DEPRECATED_MACRO(a, b, c) [suffix_length=9, deprecated, type=DEFINE] -> DeprecatedFile_8h.html#a7f8376730349fef9ff7d103b0245a13e -18: [prefix=17[:56], suffix_length=7, deprecated, type=DEFINE] -> -19: MACRO [type=DEFINE] -> File_8h.html#a824c99cb152a3c2e9111a2cb9c34891e -20: _FUNCTION() [prefix=19[:14], suffix_length=2, type=DEFINE] -> 025158d6007b306645a8eb7c7a9237c1 -21: [prefix=20[:46], type=DEFINE] -> -22: _FUNCTION_WITH_PARAMS(params) [prefix=19[:15], suffix_length=8, type=DEFINE] -> 8602bba5a72becb4f2dc544ce12c420 -23: [prefix=22[:46], suffix_length=6, type=DEFINE] -> -24: ::foo() [prefix=12[:28], suffix_length=2, type=FUNC] -> #aaeba4096356215868370d6ea476bf5d9 -25: [prefix=24[:62], type=FUNC] -> -26: const [prefix=24[:30], suffix_length=8, type=FUNC] -> c03c5b93907dda16763eabd26b25500a -27: [prefix=26[:62], suffix_length=6, type=FUNC] -> -28: && [prefix=24[:30], suffix_length=5, deleted, type=FUNC] -> 77803233441965cad057a6619e9a75fd -29: [prefix=28[:62], suffix_length=3, deleted, type=FUNC] -> -30: ::foo(const Enum&, Typedef) [prefix=12[:28], suffix_length=22, type=FUNC] -> #aba8d57a830d4d79f86d58d92298677fa -31: [prefix=30[:62], suffix_length=20, type=FUNC] -> -32: ::Value [prefix=33[:67], type=ENUM_VALUE] -> a689202409e48743b914713f96d93947c -33: ::DeprecatedEnum [prefix=7[:33], deprecated, type=ENUM] -> #ab1e37ddc1d65765f2a48485df4af7b47 -34: ::DeprecatedValue [prefix=35[:67], deprecated, type=ENUM_VALUE] -> a4b5b0e9709902228c33df7e5e377e596 -35: ::Enum [prefix=7[:33], type=ENUM] -> #ac59010e983270c330b8625b5433961b9 -36: ::DeprecatedTypedef [prefix=7[:33], deprecated, type=TYPEDEF] -> #af503ad3ff194a4c2512aff16df771164 -37: ::DeprecatedVariable [prefix=7[:33], deprecated, type=VAR] -> #ae934297fc39624409333eefbfeabf5e5 -38: ::deprecatedFoo(int, bool, double) [prefix=7[:33], suffix_length=19, deprecated, type=FUNC] -> #a9a1b3fc71d294b548095985acc0d5092 -39: [prefix=38[:67], suffix_length=17, deprecated, type=FUNC] -> -40: ::Value [prefix=41[:57], type=ENUM_VALUE] -> a689202409e48743b914713f96d93947c -41: ::Enum [prefix=11[:23], type=ENUM] -> #add172b93283b1ab7612c3ca6cc5dcfea -42: ::Typedef [prefix=11[:23], type=TYPEDEF] -> #abe2a245304bc2234927ef33175646e08 -43: ::Variable [prefix=11[:23], type=VAR] -> #ad3121960d8665ab045ca1bfa1480a86d +| :deprecatedvalue [25] +| value [32] +typedef [34] +struct [40] +|ubpage [41] +union [43] +0: DEPRECATED_MACRO(a, b, c) [suffix_length=9, deprecated, type=DEFINE] -> DeprecatedFile_8h.html#a7f8376730349fef9ff7d103b0245a13e +1: [prefix=0[:56], suffix_length=7, deprecated, type=DEFINE] -> +2: /DeprecatedFile.h [prefix=19[:0], deprecated, type=FILE] -> DeprecatedFile_8h.html +3: MACRO [type=DEFINE] -> File_8h.html#a824c99cb152a3c2e9111a2cb9c34891e +4: _FUNCTION() [prefix=3[:14], suffix_length=2, type=DEFINE] -> 025158d6007b306645a8eb7c7a9237c1 +5: [prefix=4[:46], type=DEFINE] -> +6: _FUNCTION_WITH_PARAMS(params) [prefix=3[:15], suffix_length=8, type=DEFINE] -> 8602bba5a72becb4f2dc544ce12c420 +7: [prefix=6[:46], suffix_length=6, type=DEFINE] -> +8: /File.h [prefix=20[:0], type=FILE] -> File_8h.html +9: ::foo() [prefix=17[:28], suffix_length=2, type=FUNC] -> #aaeba4096356215868370d6ea476bf5d9 +10: [prefix=9[:62], type=FUNC] -> +11: const [prefix=9[:30], suffix_length=8, type=FUNC] -> c03c5b93907dda16763eabd26b25500a +12: [prefix=11[:62], suffix_length=6, type=FUNC] -> +13: && [prefix=9[:30], suffix_length=5, deleted, type=FUNC] -> 77803233441965cad057a6619e9a75fd +14: [prefix=13[:62], suffix_length=3, deleted, type=FUNC] -> +15: ::foo(const Enum&, Typedef) [prefix=17[:28], suffix_length=22, type=FUNC] -> #aba8d57a830d4d79f86d58d92298677fa +16: [prefix=15[:62], suffix_length=20, type=FUNC] -> +17: ::Class [prefix=36[:0], type=CLASS] -> classNamespace_1_1Class.html +18: Deprecated List [type=PAGE] -> deprecated.html +19: DeprecatedDir [deprecated, type=DIR] -> dir_c6c97faf5a6cbd0f62c27843ce3af4d0.html +20: Dir [type=DIR] -> dir_da5033def2d0db76e9883b31b76b3d0c.html +21: A group [deprecated, type=GROUP] -> group__deprecated-group.html +22: A group [type=GROUP] -> group__group.html +23: ::Value [prefix=24[:67], type=ENUM_VALUE] -> a689202409e48743b914713f96d93947c +24: ::DeprecatedEnum [prefix=31[:33], deprecated, type=ENUM] -> #ab1e37ddc1d65765f2a48485df4af7b47 +25: ::DeprecatedValue [prefix=26[:67], deprecated, type=ENUM_VALUE] -> a4b5b0e9709902228c33df7e5e377e596 +26: ::Enum [prefix=31[:33], type=ENUM] -> #ac59010e983270c330b8625b5433961b9 +27: ::DeprecatedTypedef [prefix=31[:33], deprecated, type=TYPEDEF] -> #af503ad3ff194a4c2512aff16df771164 +28: ::DeprecatedVariable [prefix=31[:33], deprecated, type=VAR] -> #ae934297fc39624409333eefbfeabf5e5 +29: ::deprecatedFoo(int, bool, double) [prefix=31[:33], suffix_length=19, deprecated, type=FUNC] -> #a9a1b3fc71d294b548095985acc0d5092 +30: [prefix=29[:67], suffix_length=17, deprecated, type=FUNC] -> +31: DeprecatedNamespace [deprecated, type=NAMESPACE] -> namespaceDeprecatedNamespace.html +32: ::Value [prefix=33[:57], type=ENUM_VALUE] -> a689202409e48743b914713f96d93947c +33: ::Enum [prefix=36[:23], type=ENUM] -> #add172b93283b1ab7612c3ca6cc5dcfea +34: ::Typedef [prefix=36[:23], type=TYPEDEF] -> #abe2a245304bc2234927ef33175646e08 +35: ::Variable [prefix=36[:23], type=VAR] -> #ad3121960d8665ab045ca1bfa1480a86d +36: Namespace [type=NAMESPACE] -> namespaceNamespace.html +37: A page [type=PAGE] -> page.html +38: ::DeprecatedClass [prefix=31[:0], deprecated, type=STRUCT] -> structDeprecatedNamespace_1_1DeprecatedClass.html +39: ::DeprecatedStruct [prefix=31[:0], deprecated, type=STRUCT] -> structDeprecatedNamespace_1_1DeprecatedStruct.html +40: ::Struct [prefix=36[:0], type=STRUCT] -> structNamespace_1_1Struct.html +41: » Subpage [prefix=37[:0], type=PAGE] -> subpage.html +42: ::DeprecatedUnion [prefix=31[:0], deprecated, type=UNION] -> unionDeprecatedNamespace_1_1DeprecatedUnion.html +43: ::Union [prefix=36[:0], type=UNION] -> unionNamespace_1_1Union.html """.strip()) if __name__ == '__main__': # pragma: no cover