doxygen: save result count instead of node size to search binary.

author Vladimír Vondruš <mosra@centrum.cz>

Sat, 3 Feb 2018 11:53:58 +0000 (12:53 +0100)

committer Vladimír Vondruš <mosra@centrum.cz>

Sun, 4 Feb 2018 14:02:58 +0000 (15:02 +0100)
author Vladimír Vondruš <mosra@centrum.cz>
Sat, 3 Feb 2018 11:53:58 +0000 (12:53 +0100)
committer Vladimír Vondruš <mosra@centrum.cz>
Sun, 4 Feb 2018 14:02:58 +0000 (15:02 +0100)
diff --git a/doxygen/dox2html5.py b/doxygen/dox2html5.py

index 5adbb1ee759953f8574c2a1a02113247b6c1f082..fbc0d23830ce3e63b722feffd0ae788e344013d6 100755 (executable)
--- a/doxygen/dox2html5.py
+++ b/doxygen/dox2html5.py
@@ -54,22 +54,22 @@ import m.math
  import ansilexer
  
  class Trie:
-    #  root  |     |     header       | values | child 1 | child 1 | child 1 |
-    # offset | ... | size/2 | value # |  ...   |   char  | barrier | offset  | ...
-    #  32b   |     |   8b   |    8b   | n*16b  |   8b    |    1b   |   23b   |
+    #  root  |     |     header         | results | child 1 | child 1 | child 1 |
+    # offset | ... | result # | value # |   ...   |  char   | barrier | offset  | ...
+    #  32b   |     |    8b    |   8b    |  n*16b  |   8b    |    1b   |   23b   |
      root_offset_struct = struct.Struct('<I')
      header_struct = struct.Struct('<BB')
-    value_struct = struct.Struct('<H')
+    result_struct = struct.Struct('<H')
      child_struct = struct.Struct('<I')
      child_char_struct = struct.Struct('<B')
  
      def __init__(self):
-        self.values = []
+        self.results = []
          self.children = {}
  
-    def _insert(self, path: bytes, value, lookahead_barriers):
+    def _insert(self, path: bytes, result, lookahead_barriers):
          if not path:
-            self.values += [value]
+            self.results += [result]
              return
  
          char = path[0]
@@ -78,11 +78,11 @@ class Trie:
          if lookahead_barriers and lookahead_barriers[0] == 0:
              lookahead_barriers = lookahead_barriers[1:]
              self.children[char] = (True, self.children[char][1])
-        self.children[char][1]._insert(path[1:], value, [b - 1 for b in lookahead_barriers])
+        self.children[char][1]._insert(path[1:], result, [b - 1 for b in lookahead_barriers])
  
-    def insert(self, path: str, value, lookahead_barriers=[]):
+    def insert(self, path: str, result, lookahead_barriers=[]):
          assert not path.isupper() # to avoid unnecessary duplicates
-        self._insert(path.encode('utf-8'), value, lookahead_barriers)
+        self._insert(path.encode('utf-8'), result, lookahead_barriers)
  
      # Returns offset of the serialized thing in `output`
      def _serialize(self, hashtable, output: bytearray, merge_subtrees) -> int:
@@ -93,11 +93,10 @@ class Trie:
              child_offsets += [(char, child[0], offset)]
  
          # Serialize this node
-        size = int(2 + 2*len(self.values) + 4*len(child_offsets))
          serialized = bytearray()
-        serialized += self.header_struct.pack(int(size/2), len(self.values))
-        for v in self.values:
-            serialized += self.value_struct.pack(v)
+        serialized += self.header_struct.pack(len(self.results), len(self.children))
+        for v in self.results:
+            serialized += self.result_struct.pack(v)
  
          # Serialize child offsets
          for char, lookahead_barrier, abs_offset in child_offsets:
@@ -109,8 +108,6 @@ class Trie:
              serialized += self.child_struct.pack(abs_offset | ((1 if lookahead_barrier else 0) << 23))
              self.child_char_struct.pack_into(serialized, offset + 3, char)
  
-        assert size == len(serialized)
-
          # Subtree merging: if this exact tree is already in the table, return
          # its offset. Otherwise add it and return the new offset.
          # TODO: why hashable = bytes(output[base_offset:] + serialized) didn't work?
diff --git a/doxygen/search.js b/doxygen/search.js

index 0bac7a514785943980a2ef07e6d7d6d3fb6f5403..060626100029faab659f58cc534951e9120cf213 100644 (file)
--- a/doxygen/search.js
+++ b/doxygen/search.js
@@ -188,9 +188,8 @@ var Search = {
          for(; foundPrefix != searchString.length; ++foundPrefix) {
              /* Calculate offset and count of children */
              let offset = this.searchStack[this.searchStack.length - 1];
-            let nodeSize = this.trie.getUint8(offset)*2;
-            let relChildOffset = 2 + this.trie.getUint8(offset + 1)*2;
-            let childCount = (nodeSize - relChildOffset)/4;
+            let relChildOffset = 2 + this.trie.getUint8(offset)*2;
+            let childCount = this.trie.getUint8(offset + 1);
  
              /* Go through all children and find the next offset */
              let childOffset = offset + relChildOffset;
@@ -230,10 +229,10 @@ var Search = {
      },
  
      gatherResults: function(offset, suffixLength, results) {
-        let valueCount = this.trie.getUint8(offset + 1);
+        let resultCount = this.trie.getUint8(offset);
  
          /* Populate the results with all values associated with this node */
-        for(let i = 0; i != valueCount; ++i) {
+        for(let i = 0; i != resultCount; ++i) {
              let index = this.trie.getUint16(offset + (i + 1)*2, true);
              let flags = this.map.getUint8(index*4 + 3);
              let resultOffset = this.map.getUint32(index*4, true) & 0x00ffffff;
@@ -279,9 +278,8 @@ var Search = {
  
          /* Dig deeper. If the child already has enough, return. */
          /* TODO: hmmm. this is helluvalot duplicated code. hmm. */
-        let nodeSize = this.trie.getUint8(offset)*2;
-        let relChildOffset = 2 + this.trie.getUint8(offset + 1)*2;
-        let childCount = (nodeSize - relChildOffset)/4;
+        let relChildOffset = 2 + this.trie.getUint8(offset)*2;
+        let childCount = this.trie.getUint8(offset + 1);
          let childOffset = offset + relChildOffset;
          for(let j = 0; j != childCount; ++j) {
              let offsetBarrier = this.trie.getUint32(childOffset + j*4, true);
diff --git a/doxygen/test/js-test-data/empty.bin b/doxygen/test/js-test-data/empty.bin

index 053f032fcdea537178c8a915da4a53c84b6d060e..6d194001ded57661326bd550013ee951d9ef184b 100644 (file)

Binary files a/doxygen/test/js-test-data/empty.bin and b/doxygen/test/js-test-data/empty.bin differ
diff --git a/doxygen/test/js-test-data/searchdata.b85 b/doxygen/test/js-test-data/searchdata.b85

index 2d1c16147fb9ae7a16c828c6357a3b93044c3d1d..a9f602a19b31905eb552383bd714b835448d6543 100644 (file)
--- a/doxygen/test/js-test-data/searchdata.b85
+++ b/doxygen/test/js-test-data/searchdata.b85
@@ -1 +1 @@
-O+!-vL;(N*Dggih0s#R40{{d704W0i2mk;m0{{*H0B!>S6aWBe0s#X60{|cZ04W0iBme*?0{|)j0B!>SFaQ8)0{}Jv0Br*RJOBVX1OWm7LI8j|0{}<>0CEEWPyhgL0{~V40CWQYTmS%L0{~(G0A&IJ1pos8ZU6u&0|0UW04M_hcmM!y0|0&i0BHjNga80-0|1Hu06GK#1OSi#fI0&JmH+@{0|1@?0A~XLqyPYJ0|2T30AU9J8UO%oXaE3qumAvZ0|2%F06GK#006`QfI0&J$^Zap0|3$h0CWTc0RRI41pos8-T(k80|4d#04M_h>;M361pwFp0Aca~0BHgN1^@#90s#PJ0{{jA0A~XL3;_UP0{{{M0B{2U7y$rc0{|WY0Cfof_y7QHXaE3qumAvZBmn?(AOHXmHvj-(VgLXlhX4R!z5oCq;Q#<-76AajG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl
-\ No newline at end of file
+O+!-vL;(N*Dggih0RRC2009I504V?g2mk;m009mF0B!&Q6aWBe0RRI400AHX04V?gBme*?00Alh0B!&QFaQ8)00A}t0BryPJOBVX0RaL4LI8j|00Bq<0CE5UPyhgL00CA20CWHWTmS%L00CkE0A&FH1poj6ZU6u&00D9U04M+fcmM!y00Djg0BHaLga80-00D{s06GBy1OSi#fI0vHmH+@{00Eu=0A~OJqyPYJ00F810AT<F8UO%oXaE3qumAvZ00FiD06GBy006`QfI0vH$^Zap00Ghf0CWQY0RRI41poj6-T(k800HIz04M+f>;M3600P(m0Aca~0BHdL1^@s70s#PJ009O80A~OJ3;_UP009yK0B`^S7y$rc00ABW0CfNa_y7QHXaE3qumAvZBmn?(AOHXmHvj-(VgLXlhX4R!z5oCq;Q#<-76AajG64VpO<{Cs0B&JzWpi+0V`WWYbZ9PUbZu-1O<{CsIy!A>ZYXJPbSxlgZgeRCZeeX@b8ul}WldppXf9}UZEPcLX>LtnbZ9y{R%K&!Z*l-*Y+-YAO<{CsUol@XR%K&!Z*neZbZu+~O<{CsIyzQmV{~tFIy!A>ZYU`rV{dMAbO2*)VRLg$VRUF;F<&uOWn*-2axQ3eZEPcLX>LtnbZ9y{QekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYz9qXbZ9y{QekdqWjZ=-X>KSfAY*TCb94Y>Y+-YAO<{CsUol@XQekdqWiDuRZEPcLX>L$qXJsJ5yC73_VsK$+WdL(^VsK$+WiDuRZEOGl
+\ No newline at end of file
diff --git a/doxygen/test/js-test-data/searchdata.bin b/doxygen/test/js-test-data/searchdata.bin

index 12fe6ea17cdd32c9f0e4fd08ada7e5617a356704..3c44891c33b1fa3edb935a8e0ed2e00e8ffac0d3 100644 (file)

Binary files a/doxygen/test/js-test-data/searchdata.bin and b/doxygen/test/js-test-data/searchdata.bin differ
diff --git a/doxygen/test/js-test-data/unicode.bin b/doxygen/test/js-test-data/unicode.bin

index ed2bd5051a6b5edf84a1ef12a6f27dede4a1c005..23aa5fc9c091ceca3a549c5e28df3abb58344b97 100644 (file)

Binary files a/doxygen/test/js-test-data/unicode.bin and b/doxygen/test/js-test-data/unicode.bin differ
diff --git a/doxygen/test/test_search.py b/doxygen/test/test_search.py

index 4c1a98dde737e3d1ba6c9ee10220f52d6aea9b00..e915e13591dcbef2d1d03115a8603153fd971a30 100755 (executable)
--- a/doxygen/test/test_search.py
+++ b/doxygen/test/test_search.py
@@ -34,7 +34,7 @@ from dox2html5 import Trie, ResultMap, ResultFlag, serialize_search_data, search
  
  from test import IntegrationTestCase
  
-def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, draw_pipe, show_merged, show_lookahead_barriers, color_map) -> str:
+def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, show_merged, show_lookahead_barriers, color_map) -> str:
      # Visualize where the trees were merged
      if show_merged and base_offset in hashtable:
          return color_map['red'] + '#' + color_map['reset']
@@ -42,27 +42,25 @@ def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, ind
      stats.node_count += 1
  
      out = ''
-    size, value_count = Trie.header_struct.unpack_from(serialized, base_offset)
-    stats.max_node_size = max(size, stats.max_node_size)
-    stats.max_node_values = max(value_count, stats.max_node_values)
+    result_count, child_count = Trie.header_struct.unpack_from(serialized, base_offset)
+    stats.max_node_results = max(result_count, stats.max_node_results)
+    stats.max_node_children = max(child_count, stats.max_node_children)
      offset = base_offset + Trie.header_struct.size
  
-    # print values, if any
-    if value_count:
+    # print results, if any
+    if result_count:
          out += color_map['blue'] + ' ['
-        for i in range(value_count):
+        for i in range(result_count):
              if i: out += color_map['blue']+', '
-            value = Trie.value_struct.unpack_from(serialized, offset)[0]
-            stats.max_node_value_index = max(value, stats.max_node_value_index)
-            out += color_map['cyan'] + str(value)
-            offset += Trie.value_struct.size
+            result = Trie.result_struct.unpack_from(serialized, offset)[0]
+            stats.max_node_result_index = max(result, stats.max_node_result_index)
+            out += color_map['cyan'] + str(result)
+            offset += Trie.result_struct.size
          out += color_map['blue'] + ']'
  
-    # print children
-    if base_offset + size*2 - offset > 4: draw_pipe = True
-    child_count = 0
-    while offset < base_offset + size*2:
-        if child_count or value_count:
+    # print children, if any
+    for i in range(child_count):
+        if result_count or i:
              out += color_map['reset'] + '\n'
              out += color_map['blue'] + indent + color_map['white']
          char = Trie.child_char_struct.unpack_from(serialized, offset + 3)[0]
@@ -77,11 +75,9 @@ def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, ind
          child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x007fffff
          stats.max_node_child_offset = max(child_offset, stats.max_node_child_offset)
          offset += Trie.child_struct.size
-        out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if draw_pipe else ' '), draw_pipe=False, show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
+        out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if child_count > 1 else ' '), show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
          child_count += 1
  
-    stats.max_node_children = max(child_count, stats.max_node_children)
-
      hashtable[base_offset] = True
      return out
  
@@ -108,21 +104,19 @@ def pretty_print_trie(serialized: bytes, show_merged=False, show_lookahead_barri
  
      stats = Empty()
      stats.node_count = 0
-    stats.max_node_size = 0
-    stats.max_node_values = 0
+    stats.max_node_results = 0
      stats.max_node_children = 0
-    stats.max_node_value_index = 0
+    stats.max_node_result_index = 0
      stats.max_node_child_offset = 0
  
-    out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', draw_pipe=False, show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
+    out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
      if out: out = color_map['white'] + out
      stats = """
  node count:             {}
-max node size:          {} bytes
-max node values:        {}
+max node results:       {}
  max node children:      {}
-max node value index:   {}
-max node child offset:  {}""".lstrip().format(stats.node_count, stats.max_node_size*2, stats.max_node_values, stats.max_node_children, stats.max_node_value_index, stats.max_node_child_offset)
+max node result index:  {}
+max node child offset:  {}""".lstrip().format(stats.node_count, stats.max_node_results, stats.max_node_children, stats.max_node_result_index, stats.max_node_child_offset)
      return out, stats
  
  def pretty_print_map(serialized: bytes, colors=False):
author	Vladimír Vondruš <mosra@centrum.cz>
	Sat, 3 Feb 2018 11:53:58 +0000 (12:53 +0100)
committer	Vladimír Vondruš <mosra@centrum.cz>
	Sun, 4 Feb 2018 14:02:58 +0000 (15:02 +0100)
doxygen/dox2html5.py		patch \| blob \| history
doxygen/search.js		patch \| blob \| history
doxygen/test/js-test-data/empty.bin		patch \| blob \| history
doxygen/test/js-test-data/searchdata.b85		patch \| blob \| history
doxygen/test/js-test-data/searchdata.bin		patch \| blob \| history
doxygen/test/js-test-data/unicode.bin		patch \| blob \| history
doxygen/test/test_search.py		patch \| blob \| history