return output
class Trie:
- # root | | header | results | child 1 | child 1 | child 1 |
- # offset | ... | result # | value # | ... | char | barrier | offset | ...
- # 32b | | 8b | 8b | n*16b | 8b | 1b | 23b |
+ # root | | header | results | child 1 | child 1 | child 1 |
+ # offset | ... | | result # | child # | ... | char | barrier | offset | ...
+ # 32b | |0| 7b | 8b | n*16b | 8b | 1b | 23b |
+ #
+ # if result count > 127, it's instead:
+ #
+ # root | | header | results | child 1 | child 1 | child 1 |
+ # offset | ... | | result # | child # | ... | char | barrier | offset | ...
+ # 32b | |1| 11b | 4b | n*16b | 8b | 1b | 23b |
+
root_offset_struct = struct.Struct('<I')
header_struct = struct.Struct('<BB')
result_struct = struct.Struct('<H')
offset = child[1]._serialize(hashtable, output, merge_subtrees=merge_subtrees)
child_offsets += [(char, child[0], offset)]
- # Serialize this node
+ # Serialize this node. Sometimes we'd have an insane amount of results
+ # (such as Python's __init__), but very little children to go with
+ # that. Then we can make the result count storage larger (11 bits,
+ # 2048 results) and the child count storage smaller (4 bits, 16
+ # children). Hopefully that's enough. The remaining leftmost bit is
+ # used as an indicator of this shifted state.
serialized = bytearray()
- serialized += self.header_struct.pack(len(self.results), len(self.children))
+ if len(self.results) > 127:
+ assert len(self.children) < 16 and len(self.results) < 2048
+ result_count = (len(self.results) & 0x7f) | 0x80
+ children_count = ((len(self.results) & 0xf80) >> 3) | len(self.children)
+ else:
+ result_count = len(self.results)
+ children_count = len(self.children)
+ serialized += self.header_struct.pack(result_count, children_count)
for v in self.results:
serialized += self.result_struct.pack(v)
/* Calculate offset and count of children */
let offset = this.searchStack[this.searchStack.length - 1];
let relChildOffset = 2 + this.trie.getUint8(offset)*2;
+
+ /* Calculate child count. If there's a lot of results, the count
+ "leaks over" to the child count storage. */
+ let resultCount = this.trie.getUint8(offset);
let childCount = this.trie.getUint8(offset + 1);
+ if(resultCount & 0x80) {
+ resultCount = (resultCount & 0x7f) | ((childCount & 0xf0) << 3);
+ childCount = childCount & 0x0f;
+ }
/* Go through all children and find the next offset */
let childOffset = offset + relChildOffset;
let offset = current[0];
let suffixLength = current[1];
- /* Populate the results with all values associated with this node */
+ /* Calculate child count. If there's a lot of results, the count
+ "leaks over" to the child count storage. */
+ /* TODO: hmmm. this is helluvalot duplicated code. hmm. */
let resultCount = this.trie.getUint8(offset);
+ let childCount = this.trie.getUint8(offset + 1);
+ if(resultCount & 0x80) {
+ resultCount = (resultCount & 0x7f) | ((childCount & 0xf0) << 3);
+ childCount = childCount & 0x0f;
+ }
+
+ /* Populate the results with all values associated with this node */
for(let i = 0; i != resultCount; ++i) {
let index = this.trie.getUint16(offset + (i + 1)*2, true);
results.push(this.gatherResult(index, suffixLength, 0xffffff)); /* should be enough haha */
/* Dig deeper */
/* TODO: hmmm. this is helluvalot duplicated code. hmm. */
let relChildOffset = 2 + this.trie.getUint8(offset)*2;
- let childCount = this.trie.getUint8(offset + 1);
let childOffset = offset + relChildOffset;
for(let j = 0; j != childCount; ++j) {
let offsetBarrier = this.trie.getUint32(childOffset + j*4, true);
with open(basedir/'nested.bin', 'wb') as f:
f.write(serialize_search_data(trie, map, search_type_map, 4))
+
+# Extreme amount of search results (Python's __init__, usually)
+
+trie = Trie()
+map = ResultMap()
+
+for i in range(128):
+ trie.insert("__init__", map.add(f"Foo{i}.__init__(self)", f"Foo{i}.html#__init__", suffix_length=6, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC)))
+
+# It's __init_subclass__, but here I want to trigger the case of both a high
+# amount of results and some children as well.
+for i in [3, 15, 67]:
+ trie.insert("__init__subclass__", map.add(f"Foo{i}.__init__subclass__(self)", f"Foo{i}.html#__init__subclass__", suffix_length=6, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC)))
+
+with open(basedir/'manyresults.bin', 'wb') as f:
+ f.write(serialize_search_data(trie, map, search_type_map, 128 + 3))
suffixLength: 3 }], 'nge']);
}
+/* Extreme amount of search results */
+{
+ let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/manyresults.bin"));
+ assert.ok(Search.init(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength), 10000));
+ assert.equal(Search.dataSize, 6415);
+ assert.equal(Search.symbolCount, 128 + 3);
+ assert.equal(Search.maxResults, 10000);
+ assert.deepEqual(Search.search('__init__')[0].length, 128 + 3);
+ assert.deepEqual(Search.search('__init__')[1], '');
+ assert.deepEqual(Search.search('__init__')[0][0],
+ { name: 'Foo0.__init__(self)',
+ url: 'Foo0.html#__init__',
+ flags: 1, /* has suffix */
+ cssClass: 'm-info',
+ typeName: 'func',
+ suffixLength: 6 });
+ /* The 127 other results in between are similar. It should also print
+ results from the children: */
+ assert.deepEqual(Search.search('__init__')[0][128],
+ { name: 'Foo3.__init__subclass__(self)',
+ url: 'Foo3.html#__init__subclass__',
+ flags: 1, /* has suffix */
+ cssClass: 'm-info',
+ typeName: 'func',
+ suffixLength: 16 });
+ assert.deepEqual(Search.search('__init__')[0][129],
+ { name: 'Foo15.__init__subclass__(self)',
+ url: 'Foo15.html#__init__subclass__',
+ flags: 1, /* has suffix */
+ cssClass: 'm-info',
+ typeName: 'func',
+ suffixLength: 16 });
+ assert.deepEqual(Search.search('__init__')[0][130],
+ { name: 'Foo67.__init__subclass__(self)',
+ url: 'Foo67.html#__init__subclass__',
+ flags: 1, /* has suffix */
+ cssClass: 'm-info',
+ typeName: 'func',
+ suffixLength: 16 });
+
+ /* Searching for nested results should work as well */
+ assert.deepEqual(Search.search('__init__s'), [[
+ { name: 'Foo3.__init__subclass__(self)',
+ url: 'Foo3.html#__init__subclass__',
+ flags: 1, /* has suffix */
+ cssClass: 'm-info',
+ typeName: 'func',
+ suffixLength: 15 },
+ { name: 'Foo15.__init__subclass__(self)',
+ url: 'Foo15.html#__init__subclass__',
+ flags: 1, /* has suffix */
+ cssClass: 'm-info',
+ typeName: 'func',
+ suffixLength: 15 },
+ { name: 'Foo67.__init__subclass__(self)',
+ url: 'Foo67.html#__init__subclass__',
+ flags: 1, /* has suffix */
+ cssClass: 'm-info',
+ typeName: 'func',
+ suffixLength: 15 }], 'ubclass__']);
+}
+
/* Not testing Search.download() because the xmlhttprequest npm package is *crap* */