chiark / gitweb /
doxygen: make search lookahead barriers and subtree merging configurable.
authorVladimír Vondruš <mosra@centrum.cz>
Tue, 30 Jan 2018 22:56:06 +0000 (23:56 +0100)
committerVladimír Vondruš <mosra@centrum.cz>
Sat, 3 Feb 2018 09:51:55 +0000 (10:51 +0100)
So I can see the difference.

doxygen/dox2html5.py

index e01191680c854006c340e6fe4a87177da8eb07d5..2e2272126e88e5afa9aa452bd22dfcd2641d0102 100755 (executable)
@@ -85,11 +85,11 @@ class Trie:
         self._insert(path.encode('utf-8'), value, lookahead_barriers)
 
     # Returns offset of the serialized thing in `output`
-    def _serialize(self, hashtable, output: bytearray) -> int:
+    def _serialize(self, hashtable, output: bytearray, merge_subtrees) -> int:
         # Serialize all children first
         child_offsets = []
         for char, child in self.children.items():
-            offset = child[1]._serialize(hashtable, output)
+            offset = child[1]._serialize(hashtable, output, merge_subtrees=merge_subtrees)
             child_offsets += [(char, child[0], offset)]
 
         # Serialize this node
@@ -115,18 +115,18 @@ class Trie:
         # its offset. Otherwise add it and return the new offset.
         # TODO: why hashable = bytes(output[base_offset:] + serialized) didn't work?
         hashable = bytes(serialized)
-        if hashable in hashtable:
+        if merge_subtrees and hashable in hashtable:
             return hashtable[hashable]
         else:
             offset = len(output)
             output += serialized
-            hashtable[hashable] = offset
+            if merge_subtrees: hashtable[hashable] = offset
             return offset
 
-    def serialize(self) -> bytearray:
+    def serialize(self, merge_subtrees=True) -> bytearray:
         output = bytearray(b'\x00\x00\x00\x00')
         hashtable = {}
-        self.root_offset_struct.pack_into(output, 0, self._serialize(hashtable, output))
+        self.root_offset_struct.pack_into(output, 0, self._serialize(hashtable, output, merge_subtrees=merge_subtrees))
         return output
 
 class ResultFlag(Flag):
@@ -203,8 +203,8 @@ class ResultMap:
 
 search_data_header_struct = struct.Struct('<3sBI')
 
-def serialize_search_data(trie: Trie, map: ResultMap) -> bytearray:
-    serialized_trie = trie.serialize()
+def serialize_search_data(trie: Trie, map: ResultMap, merge_subtrees=True) -> bytearray:
+    serialized_trie = trie.serialize(merge_subtrees=merge_subtrees)
     serialized_map = map.serialize()
     # magic header, version, offset of result map
     return search_data_header_struct.pack(b'MCS', 0, len(serialized_trie) + 8) + serialized_trie + serialized_map
@@ -1519,7 +1519,7 @@ def postprocess_state(state: State):
     if state.doxyfile['M_FAVICON']:
         state.doxyfile['M_FAVICON'] = (state.doxyfile['M_FAVICON'], mimetypes.guess_type(state.doxyfile['M_FAVICON'])[0])
 
-def _build_search_data(state: State, prefix, id: str, trie: Trie, map: ResultMap):
+def _build_search_data(state: State, prefix, id: str, trie: Trie, map: ResultMap, add_lookahead_barriers):
     compound = state.compounds[id]
     if not compound.brief and not compound.has_details: return 0
 
@@ -1563,19 +1563,19 @@ def _build_search_data(state: State, prefix, id: str, trie: Trie, map: ResultMap
                     lookahead_barriers += [len(name)]
                     name += joiner
                 name += html.unescape(j)
-            trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers)
+            trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers if add_lookahead_barriers else [])
 
     for i in compound.children:
         if i in state.compounds:
-            _build_search_data(state, prefixed_name, i, trie, map)
+            _build_search_data(state, prefixed_name, i, trie, map, add_lookahead_barriers=add_lookahead_barriers)
 
-def build_search_data(state: State) -> bytearray:
+def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=True) -> bytearray:
     trie = Trie()
     map = ResultMap()
 
     for id, compound in state.compounds.items():
         if compound.parent: continue # start from the root
-        _build_search_data(state, [], id, trie, map)
+        _build_search_data(state, [], id, trie, map, add_lookahead_barriers=add_lookahead_barriers)
 
     # TODO: examples?
 
@@ -1609,9 +1609,9 @@ def build_search_data(state: State) -> bytearray:
                     lookahead_barriers += [len(name)]
                     name += '::'
                 name += html.unescape(j)
-            trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers)
+            trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers if add_lookahead_barriers else [])
 
-    return serialize_search_data(trie, map)
+    return serialize_search_data(trie, map, merge_subtrees=merge_subtrees)
 
 def base85encode_search_data(data: bytearray) -> bytearray:
     return (b"/* Generated by http://mcss.mosra.cz/doxygen/. Do not edit. */\n" +
@@ -2430,7 +2430,7 @@ default_index_pages = ['pages', 'files', 'namespaces', 'modules', 'annotated']
 default_wildcard = '*.xml'
 default_templates = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'templates/')
 
-def run(doxyfile, templates=default_templates, wildcard=default_wildcard, index_pages=default_index_pages):
+def run(doxyfile, templates=default_templates, wildcard=default_wildcard, index_pages=default_index_pages, search_add_lookahead_barriers=True, search_merge_subtrees=True):
     state = State()
     state.basedir = os.path.dirname(doxyfile)
 
@@ -2514,7 +2514,7 @@ def run(doxyfile, templates=default_templates, wildcard=default_wildcard, index_
             f.write(rendered)
 
     if not state.doxyfile['M_SEARCH_DISABLED']:
-        data = build_search_data(state)
+        data = build_search_data(state, add_lookahead_barriers=search_add_lookahead_barriers, merge_subtrees=search_merge_subtrees)
 
         if state.doxyfile['M_SEARCH_DOWNLOAD_BINARY']:
             with open(os.path.join(html_output, "searchdata.bin"), 'wb') as f:
@@ -2546,6 +2546,8 @@ if __name__ == '__main__': # pragma: no cover
     parser.add_argument('--wildcard', help="only process files matching the wildcard", default=default_wildcard)
     parser.add_argument('--index-pages', nargs='+', help="index page templates", default=default_index_pages)
     parser.add_argument('--no-doxygen', help="don't run Doxygen before", action='store_true')
+    parser.add_argument('--search-no-subtree-merging', help="don't merge search data subtrees", action='store_true')
+    parser.add_argument('--search-no-lookahead-barriers', help="don't insert search lookahead barriers", action='store_true')
     parser.add_argument('--debug', help="verbose debug output", action='store_true')
     args = parser.parse_args()
 
@@ -2560,4 +2562,4 @@ if __name__ == '__main__': # pragma: no cover
     if not args.no_doxygen:
         subprocess.run(["doxygen", doxyfile], cwd=os.path.dirname(doxyfile))
 
-    run(doxyfile, os.path.abspath(args.templates), args.wildcard, args.index_pages)
+    run(doxyfile, os.path.abspath(args.templates), args.wildcard, args.index_pages, search_merge_subtrees=not args.search_no_subtree_merging, search_add_lookahead_barriers=not args.search_no_lookahead_barriers)