From: Vladimír Vondruš Date: Thu, 18 Jul 2019 15:17:40 +0000 (+0200) Subject: documentation/doxygen: insert also pages with all prefixes. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=4e9cbda2f9e2dc72b918f44a552f3eb4265b65b2;p=blog.git documentation/doxygen: insert also pages with all prefixes. Not sure why there was an exception. This makes it practically impossible to search for subpages. --- diff --git a/documentation/doxygen.py b/documentation/doxygen.py index ff918cb9..b774e5bd 100755 --- a/documentation/doxygen.py +++ b/documentation/doxygen.py @@ -2288,78 +2288,67 @@ def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers= # because we need to add the function macros twice -- but they have no # prefix, so it's okay. if EntryType(result.flags.type) in [EntryType.NAMESPACE, EntryType.CLASS, EntryType.STRUCT, EntryType.UNION, EntryType.TYPEDEF, EntryType.FUNC, EntryType.VAR, EntryType.ENUM, EntryType.ENUM_VALUE, EntryType.DEFINE]: - joiner = result_joiner = '::' + joiner = '::' elif EntryType(result.flags.type) in [EntryType.DIR, EntryType.FILE]: - joiner = result_joiner = '/' + joiner = '/' elif EntryType(result.flags.type) in [EntryType.PAGE, EntryType.GROUP]: - joiner = '' - result_joiner = ' » ' + joiner = ' » ' else: assert False # pragma: no cover - # If just a leaf name, add it once - if not joiner: - assert result_joiner - result_name = result_joiner.join(result.prefix + [result.name]) - + # Handle function arguments + name_with_args = result.name + name = result.name + suffix_length = 0 + if hasattr(result, 'params') and result.params is not None: + # Some very heavily templated function parameters might cause the + # suffix_length to exceed 256, which won't fit into the serialized + # search data. However that *also* won't fit in the search result + # list so there's no point in storing so much. Truncate it to 48 + # chars which should fit the full function name in the list in most + # cases, yet be still long enough to be able to distinguish + # particular overloads. + # TODO: the suffix_length has to be calculated on UTF-8 and I + # am (un)escaping a lot back and forth here -- needs to be + # cleaned up + params = html.unescape(strip_tags(', '.join(result.params))) + if len(params) > 49: + params = params[:48] + '…' + name_with_args += '(' + html.escape(params) + ')' + suffix_length += len(params.encode('utf-8')) + 2 + if hasattr(result, 'suffix') and result.suffix: + name_with_args += result.suffix # TODO: escape elsewhere so i don't have to unescape here - index = map.add(html.unescape(result_name), result.url, flags=result.flags) - trie.insert(html.unescape(result.name).lower(), index) - - # Otherwise add it multiple times with all possible prefixes - else: - # Handle function arguments - name_with_args = result.name - name = result.name - suffix_length = 0 - if hasattr(result, 'params') and result.params is not None: - # Some very heavily templated function parameters might cause - # the suffix_length to exceed 256, which won't fit into the - # serialized search data. However that *also* won't fit in the - # search result list so there's no point in storing so much. - # Truncate it to 48 chars which should fit the full function - # name in the list in most cases, yet be still long enough to - # be able to distinguish particular overloads. - # TODO: the suffix_length has to be calculated on UTF-8 and I - # am (un)escaping a lot back and forth here -- needs to be - # cleaned up - params = html.unescape(strip_tags(', '.join(result.params))) - if len(params) > 49: - params = params[:48] + '…' - name_with_args += '(' + html.escape(params) + ')' - suffix_length += len(params.encode('utf-8')) + 2 - if hasattr(result, 'suffix') and result.suffix: - name_with_args += result.suffix - # TODO: escape elsewhere so i don't have to unescape here - suffix_length += len(html.unescape(result.suffix)) - - # TODO: escape elsewhere so i don't have to unescape here - index = map.add(html.unescape(joiner.join(result.prefix + [name_with_args])), result.url, suffix_length=suffix_length, flags=result.flags) - - # Add functions and function macros the second time with () appended, - # everything is the same except for suffix length which is 2 chars - # shorter + suffix_length += len(html.unescape(result.suffix)) + + # TODO: escape elsewhere so i don't have to unescape here + index = map.add(html.unescape(joiner.join(result.prefix + [name_with_args])), result.url, suffix_length=suffix_length, flags=result.flags) + + # Add functions and function macros the second time with () appended, + # everything is the same except for suffix length which is 2 chars + # shorter + if hasattr(result, 'params') and result.params is not None: + index_args = map.add(html.unescape(joiner.join(result.prefix + [name_with_args])), result.url, + suffix_length=suffix_length - 2, flags=result.flags) + + # Add the result multiple times with all possible prefixes + prefixed_name = result.prefix + [name] + for i in range(len(prefixed_name)): + lookahead_barriers = [] + name = '' + for j in prefixed_name[i:]: + if name: + lookahead_barriers += [len(name)] + name += joiner + name += html.unescape(j) + trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers if add_lookahead_barriers else []) + + # Add functions and function macros the second time with () + # appended, referencing the other result that expects () appended. + # The lookahead barrier is at the ( character to avoid the result + # being shown twice. if hasattr(result, 'params') and result.params is not None: - index_args = map.add(html.unescape(joiner.join(result.prefix + [name_with_args])), result.url, - suffix_length=suffix_length - 2, flags=result.flags) - - prefixed_name = result.prefix + [name] - for i in range(len(prefixed_name)): - lookahead_barriers = [] - name = '' - for j in prefixed_name[i:]: - if name: - lookahead_barriers += [len(name)] - name += joiner - name += html.unescape(j) - trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers if add_lookahead_barriers else []) - - # Add functions and function macros the second time with () - # appended, referencing the other result that expects () appended. - # The lookahead barrier is at the ( character to avoid the result - # being shown twice. - if hasattr(result, 'params') and result.params is not None: - trie.insert(name.lower() + '()', index_args, lookahead_barriers=lookahead_barriers + [len(name)] if add_lookahead_barriers else []) + trie.insert(name.lower() + '()', index_args, lookahead_barriers=lookahead_barriers + [len(name)] if add_lookahead_barriers else []) # Add keyword aliases for this symbol for search, title, suffix_length in result.keywords: diff --git a/documentation/test_doxygen/test_search.py b/documentation/test_doxygen/test_search.py index 3cb23713..8da0e12a 100755 --- a/documentation/test_doxygen/test_search.py +++ b/documentation/test_doxygen/test_search.py @@ -44,7 +44,7 @@ class Search(IntegrationTestCase): serialized = f.read() search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0] #print(search_data_pretty) - self.assertEqual(len(serialized), 4802) + self.assertEqual(len(serialized), 4836) self.assertEqual(search_data_pretty, """ 53 symbols deprecated_macro [0] @@ -135,6 +135,10 @@ class [20] | ) [12, 15, 19, 17] a group [29, 28] | page [52] +| | $ +| | 0xc2 +| | 0xbb +| | subpage [57] value [41, 31] | riable [48] enum [44, 34]