documentation: make it possible to have more than 128 results for a node.

author Vladimír Vondruš <mosra@centrum.cz>

Thu, 18 Jul 2019 10:55:59 +0000 (12:55 +0200)

committer Vladimír Vondruš <mosra@centrum.cz>

Thu, 18 Jul 2019 15:41:49 +0000 (17:41 +0200)
author Vladimír Vondruš <mosra@centrum.cz>
Thu, 18 Jul 2019 10:55:59 +0000 (12:55 +0200)
committer Vladimír Vondruš <mosra@centrum.cz>
Thu, 18 Jul 2019 15:41:49 +0000 (17:41 +0200)
diff --git a/documentation/_search.py b/documentation/_search.py

index 25c0be1ba7b0011c0d28fdbb885830a0cc2d4b8f..7d6e26562b5484d6d1fe09258b900cde180386fd 100644 (file)
--- a/documentation/_search.py
+++ b/documentation/_search.py
@@ -271,9 +271,16 @@ class ResultMap:
          return output
  
  class Trie:
-    #  root  |     |     header         | results | child 1 | child 1 | child 1 |
-    # offset | ... | result # | value # |   ...   |  char   | barrier | offset  | ...
-    #  32b   |     |    8b    |   8b    |  n*16b  |   8b    |    1b   |   23b   |
+    #  root  |     |       header         | results | child 1 | child 1 | child 1 |
+    # offset | ... | | result # | child # |   ...   |  char   | barrier | offset  | ...
+    #  32b   |     |0|    7b    |   8b    |  n*16b  |   8b    |    1b   |   23b   |
+    #
+    # if result count > 127, it's instead:
+    #
+    #  root  |     |      header          | results | child 1 | child 1 | child 1 |
+    # offset | ... | | result # | child # |   ...   |  char   | barrier | offset  | ...
+    #  32b   |     |1|   11b    |   4b    |  n*16b  |   8b    |    1b   |   23b   |
+
      root_offset_struct = struct.Struct('<I')
      header_struct = struct.Struct('<BB')
      result_struct = struct.Struct('<H')
@@ -337,9 +344,21 @@ class Trie:
              offset = child[1]._serialize(hashtable, output, merge_subtrees=merge_subtrees)
              child_offsets += [(char, child[0], offset)]
  
-        # Serialize this node
+        # Serialize this node. Sometimes we'd have an insane amount of results
+        # (such as Python's __init__), but very little children to go with
+        # that. Then we can make the result count storage larger (11 bits,
+        # 2048 results) and the child count storage smaller (4 bits, 16
+        # children). Hopefully that's enough. The remaining leftmost bit is
+        # used as an indicator of this shifted state.
          serialized = bytearray()
-        serialized += self.header_struct.pack(len(self.results), len(self.children))
+        if len(self.results) > 127:
+            assert len(self.children) < 16 and len(self.results) < 2048
+            result_count = (len(self.results) & 0x7f) | 0x80
+            children_count = ((len(self.results) & 0xf80) >> 3) | len(self.children)
+        else:
+            result_count = len(self.results)
+            children_count = len(self.children)
+        serialized += self.header_struct.pack(result_count, children_count)
          for v in self.results:
              serialized += self.result_struct.pack(v)
  
diff --git a/documentation/search.js b/documentation/search.js

index cbd6ed0313991a8699712782550a4b13c7a02487..8b0c9a5b2d75fac60ea6e4b294235cfda9778af0 100644 (file)
--- a/documentation/search.js
+++ b/documentation/search.js
@@ -256,7 +256,15 @@ var Search = {
              /* Calculate offset and count of children */
              let offset = this.searchStack[this.searchStack.length - 1];
              let relChildOffset = 2 + this.trie.getUint8(offset)*2;
+
+            /* Calculate child count. If there's a lot of results, the count
+               "leaks over" to the child count storage. */
+            let resultCount = this.trie.getUint8(offset);
              let childCount = this.trie.getUint8(offset + 1);
+            if(resultCount & 0x80) {
+                resultCount = (resultCount & 0x7f) | ((childCount & 0xf0) << 3);
+                childCount = childCount & 0x0f;
+            }
  
              /* Go through all children and find the next offset */
              let childOffset = offset + relChildOffset;
@@ -299,8 +307,17 @@ var Search = {
              let offset = current[0];
              let suffixLength = current[1];
  
-            /* Populate the results with all values associated with this node */
+            /* Calculate child count. If there's a lot of results, the count
+               "leaks over" to the child count storage. */
+            /* TODO: hmmm. this is helluvalot duplicated code. hmm. */
              let resultCount = this.trie.getUint8(offset);
+            let childCount = this.trie.getUint8(offset + 1);
+            if(resultCount & 0x80) {
+                resultCount = (resultCount & 0x7f) | ((childCount & 0xf0) << 3);
+                childCount = childCount & 0x0f;
+            }
+
+            /* Populate the results with all values associated with this node */
              for(let i = 0; i != resultCount; ++i) {
                  let index = this.trie.getUint16(offset + (i + 1)*2, true);
                  results.push(this.gatherResult(index, suffixLength, 0xffffff)); /* should be enough haha */
@@ -313,7 +330,6 @@ var Search = {
              /* Dig deeper */
              /* TODO: hmmm. this is helluvalot duplicated code. hmm. */
              let relChildOffset = 2 + this.trie.getUint8(offset)*2;
-            let childCount = this.trie.getUint8(offset + 1);
              let childOffset = offset + relChildOffset;
              for(let j = 0; j != childCount; ++j) {
                  let offsetBarrier = this.trie.getUint32(childOffset + j*4, true);
diff --git a/documentation/test/js-test-data/manyresults.bin b/documentation/test/js-test-data/manyresults.bin

new file mode 100644 (file)

index 0000000..4d3eb35

Binary files /dev/null and b/documentation/test/js-test-data/manyresults.bin differ
diff --git a/documentation/test/populate-js-test-data.py b/documentation/test/populate-js-test-data.py

index 998beff10a7f0401e39e1f6236637152b03b764d..9903ee84b83669b1e7edfbe4d56b96e25b5dd3a2 100755 (executable)
--- a/documentation/test/populate-js-test-data.py
+++ b/documentation/test/populate-js-test-data.py
@@ -103,3 +103,19 @@ trie.insert("range", map.add("Magnum::Math::Range", "classMagnum_1_1Math_1_1Rang
  
  with open(basedir/'nested.bin', 'wb') as f:
      f.write(serialize_search_data(trie, map, search_type_map, 4))
+
+# Extreme amount of search results (Python's __init__, usually)
+
+trie = Trie()
+map = ResultMap()
+
+for i in range(128):
+    trie.insert("__init__", map.add(f"Foo{i}.__init__(self)", f"Foo{i}.html#__init__", suffix_length=6, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC)))
+
+# It's __init_subclass__, but here I want to trigger the case of both a high
+# amount of results and some children as well.
+for i in [3, 15, 67]:
+    trie.insert("__init__subclass__", map.add(f"Foo{i}.__init__subclass__(self)", f"Foo{i}.html#__init__subclass__", suffix_length=6, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC)))
+
+with open(basedir/'manyresults.bin', 'wb') as f:
+    f.write(serialize_search_data(trie, map, search_type_map, 128 + 3))
diff --git a/documentation/test/test-search.js b/documentation/test/test-search.js

index 2bf9cc7afab9e3d17789909a442b9d8899e6b071..d99550ba2ed5417e20fc4ab75517ea57c3d2d3b2 100644 (file)
--- a/documentation/test/test-search.js
+++ b/documentation/test/test-search.js
@@ -336,4 +336,66 @@ const { StringDecoder } = require('string_decoder');
            suffixLength: 3 }], 'nge']);
  }
  
+/* Extreme amount of search results */
+{
+    let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/manyresults.bin"));
+    assert.ok(Search.init(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength), 10000));
+    assert.equal(Search.dataSize, 6415);
+    assert.equal(Search.symbolCount, 128 + 3);
+    assert.equal(Search.maxResults, 10000);
+    assert.deepEqual(Search.search('__init__')[0].length, 128 + 3);
+    assert.deepEqual(Search.search('__init__')[1], '');
+    assert.deepEqual(Search.search('__init__')[0][0],
+        { name: 'Foo0.__init__(self)',
+          url: 'Foo0.html#__init__',
+          flags: 1, /* has suffix */
+          cssClass: 'm-info',
+          typeName: 'func',
+          suffixLength: 6 });
+    /* The 127 other results in between are similar. It should also print
+       results from the children: */
+    assert.deepEqual(Search.search('__init__')[0][128],
+        { name: 'Foo3.__init__subclass__(self)',
+          url: 'Foo3.html#__init__subclass__',
+          flags: 1, /* has suffix */
+          cssClass: 'm-info',
+          typeName: 'func',
+          suffixLength: 16 });
+    assert.deepEqual(Search.search('__init__')[0][129],
+        { name: 'Foo15.__init__subclass__(self)',
+          url: 'Foo15.html#__init__subclass__',
+          flags: 1, /* has suffix */
+          cssClass: 'm-info',
+          typeName: 'func',
+          suffixLength: 16 });
+    assert.deepEqual(Search.search('__init__')[0][130],
+        { name: 'Foo67.__init__subclass__(self)',
+          url: 'Foo67.html#__init__subclass__',
+          flags: 1, /* has suffix */
+          cssClass: 'm-info',
+          typeName: 'func',
+          suffixLength: 16 });
+
+    /* Searching for nested results should work as well */
+    assert.deepEqual(Search.search('__init__s'), [[
+        { name: 'Foo3.__init__subclass__(self)',
+          url: 'Foo3.html#__init__subclass__',
+          flags: 1, /* has suffix */
+          cssClass: 'm-info',
+          typeName: 'func',
+          suffixLength: 15 },
+        { name: 'Foo15.__init__subclass__(self)',
+          url: 'Foo15.html#__init__subclass__',
+          flags: 1, /* has suffix */
+          cssClass: 'm-info',
+          typeName: 'func',
+          suffixLength: 15 },
+        { name: 'Foo67.__init__subclass__(self)',
+          url: 'Foo67.html#__init__subclass__',
+          flags: 1, /* has suffix */
+          cssClass: 'm-info',
+          typeName: 'func',
+          suffixLength: 15 }], 'ubclass__']);
+}
+
  /* Not testing Search.download() because the xmlhttprequest npm package is *crap* */
author	Vladimír Vondruš <mosra@centrum.cz>
	Thu, 18 Jul 2019 10:55:59 +0000 (12:55 +0200)
committer	Vladimír Vondruš <mosra@centrum.cz>
	Thu, 18 Jul 2019 15:41:49 +0000 (17:41 +0200)
documentation/_search.py		patch \| blob \| history
documentation/search.js		patch \| blob \| history
documentation/test/js-test-data/manyresults.bin	[new file with mode: 0644]	patch \| blob
documentation/test/populate-js-test-data.py		patch \| blob \| history
documentation/test/test-search.js		patch \| blob \| history