From: Vladimír Vondruš Date: Thu, 18 Jul 2019 10:55:59 +0000 (+0200) Subject: documentation: make it possible to have more than 128 results for a node. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=63e8d474e84de6be95ed9b2fe8ee7da348eeff49;p=blog.git documentation: make it possible to have more than 128 results for a node. Python's __init__ is the main offender, the (currently very barebone) Magnum Python bindings have 340 results for __init__. This change is based on the assumption that nodes with extreme amount of results on the other hand don't have many children, so we can steal some bits from the child count instead. Now it's either up to 127 results and up to 127 children or up to 2048 results and 16 children. --- diff --git a/documentation/_search.py b/documentation/_search.py index 25c0be1b..7d6e2656 100644 --- a/documentation/_search.py +++ b/documentation/_search.py @@ -271,9 +271,16 @@ class ResultMap: return output class Trie: - # root | | header | results | child 1 | child 1 | child 1 | - # offset | ... | result # | value # | ... | char | barrier | offset | ... - # 32b | | 8b | 8b | n*16b | 8b | 1b | 23b | + # root | | header | results | child 1 | child 1 | child 1 | + # offset | ... | | result # | child # | ... | char | barrier | offset | ... + # 32b | |0| 7b | 8b | n*16b | 8b | 1b | 23b | + # + # if result count > 127, it's instead: + # + # root | | header | results | child 1 | child 1 | child 1 | + # offset | ... | | result # | child # | ... | char | barrier | offset | ... + # 32b | |1| 11b | 4b | n*16b | 8b | 1b | 23b | + root_offset_struct = struct.Struct(' 127: + assert len(self.children) < 16 and len(self.results) < 2048 + result_count = (len(self.results) & 0x7f) | 0x80 + children_count = ((len(self.results) & 0xf80) >> 3) | len(self.children) + else: + result_count = len(self.results) + children_count = len(self.children) + serialized += self.header_struct.pack(result_count, children_count) for v in self.results: serialized += self.result_struct.pack(v) diff --git a/documentation/search.js b/documentation/search.js index cbd6ed03..8b0c9a5b 100644 --- a/documentation/search.js +++ b/documentation/search.js @@ -256,7 +256,15 @@ var Search = { /* Calculate offset and count of children */ let offset = this.searchStack[this.searchStack.length - 1]; let relChildOffset = 2 + this.trie.getUint8(offset)*2; + + /* Calculate child count. If there's a lot of results, the count + "leaks over" to the child count storage. */ + let resultCount = this.trie.getUint8(offset); let childCount = this.trie.getUint8(offset + 1); + if(resultCount & 0x80) { + resultCount = (resultCount & 0x7f) | ((childCount & 0xf0) << 3); + childCount = childCount & 0x0f; + } /* Go through all children and find the next offset */ let childOffset = offset + relChildOffset; @@ -299,8 +307,17 @@ var Search = { let offset = current[0]; let suffixLength = current[1]; - /* Populate the results with all values associated with this node */ + /* Calculate child count. If there's a lot of results, the count + "leaks over" to the child count storage. */ + /* TODO: hmmm. this is helluvalot duplicated code. hmm. */ let resultCount = this.trie.getUint8(offset); + let childCount = this.trie.getUint8(offset + 1); + if(resultCount & 0x80) { + resultCount = (resultCount & 0x7f) | ((childCount & 0xf0) << 3); + childCount = childCount & 0x0f; + } + + /* Populate the results with all values associated with this node */ for(let i = 0; i != resultCount; ++i) { let index = this.trie.getUint16(offset + (i + 1)*2, true); results.push(this.gatherResult(index, suffixLength, 0xffffff)); /* should be enough haha */ @@ -313,7 +330,6 @@ var Search = { /* Dig deeper */ /* TODO: hmmm. this is helluvalot duplicated code. hmm. */ let relChildOffset = 2 + this.trie.getUint8(offset)*2; - let childCount = this.trie.getUint8(offset + 1); let childOffset = offset + relChildOffset; for(let j = 0; j != childCount; ++j) { let offsetBarrier = this.trie.getUint32(childOffset + j*4, true); diff --git a/documentation/test/js-test-data/manyresults.bin b/documentation/test/js-test-data/manyresults.bin new file mode 100644 index 00000000..4d3eb35f Binary files /dev/null and b/documentation/test/js-test-data/manyresults.bin differ diff --git a/documentation/test/populate-js-test-data.py b/documentation/test/populate-js-test-data.py index 998beff1..9903ee84 100755 --- a/documentation/test/populate-js-test-data.py +++ b/documentation/test/populate-js-test-data.py @@ -103,3 +103,19 @@ trie.insert("range", map.add("Magnum::Math::Range", "classMagnum_1_1Math_1_1Rang with open(basedir/'nested.bin', 'wb') as f: f.write(serialize_search_data(trie, map, search_type_map, 4)) + +# Extreme amount of search results (Python's __init__, usually) + +trie = Trie() +map = ResultMap() + +for i in range(128): + trie.insert("__init__", map.add(f"Foo{i}.__init__(self)", f"Foo{i}.html#__init__", suffix_length=6, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC))) + +# It's __init_subclass__, but here I want to trigger the case of both a high +# amount of results and some children as well. +for i in [3, 15, 67]: + trie.insert("__init__subclass__", map.add(f"Foo{i}.__init__subclass__(self)", f"Foo{i}.html#__init__subclass__", suffix_length=6, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC))) + +with open(basedir/'manyresults.bin', 'wb') as f: + f.write(serialize_search_data(trie, map, search_type_map, 128 + 3)) diff --git a/documentation/test/test-search.js b/documentation/test/test-search.js index 2bf9cc7a..d99550ba 100644 --- a/documentation/test/test-search.js +++ b/documentation/test/test-search.js @@ -336,4 +336,66 @@ const { StringDecoder } = require('string_decoder'); suffixLength: 3 }], 'nge']); } +/* Extreme amount of search results */ +{ + let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/manyresults.bin")); + assert.ok(Search.init(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength), 10000)); + assert.equal(Search.dataSize, 6415); + assert.equal(Search.symbolCount, 128 + 3); + assert.equal(Search.maxResults, 10000); + assert.deepEqual(Search.search('__init__')[0].length, 128 + 3); + assert.deepEqual(Search.search('__init__')[1], ''); + assert.deepEqual(Search.search('__init__')[0][0], + { name: 'Foo0.__init__(self)', + url: 'Foo0.html#__init__', + flags: 1, /* has suffix */ + cssClass: 'm-info', + typeName: 'func', + suffixLength: 6 }); + /* The 127 other results in between are similar. It should also print + results from the children: */ + assert.deepEqual(Search.search('__init__')[0][128], + { name: 'Foo3.__init__subclass__(self)', + url: 'Foo3.html#__init__subclass__', + flags: 1, /* has suffix */ + cssClass: 'm-info', + typeName: 'func', + suffixLength: 16 }); + assert.deepEqual(Search.search('__init__')[0][129], + { name: 'Foo15.__init__subclass__(self)', + url: 'Foo15.html#__init__subclass__', + flags: 1, /* has suffix */ + cssClass: 'm-info', + typeName: 'func', + suffixLength: 16 }); + assert.deepEqual(Search.search('__init__')[0][130], + { name: 'Foo67.__init__subclass__(self)', + url: 'Foo67.html#__init__subclass__', + flags: 1, /* has suffix */ + cssClass: 'm-info', + typeName: 'func', + suffixLength: 16 }); + + /* Searching for nested results should work as well */ + assert.deepEqual(Search.search('__init__s'), [[ + { name: 'Foo3.__init__subclass__(self)', + url: 'Foo3.html#__init__subclass__', + flags: 1, /* has suffix */ + cssClass: 'm-info', + typeName: 'func', + suffixLength: 15 }, + { name: 'Foo15.__init__subclass__(self)', + url: 'Foo15.html#__init__subclass__', + flags: 1, /* has suffix */ + cssClass: 'm-info', + typeName: 'func', + suffixLength: 15 }, + { name: 'Foo67.__init__subclass__(self)', + url: 'Foo67.html#__init__subclass__', + flags: 1, /* has suffix */ + cssClass: 'm-info', + typeName: 'func', + suffixLength: 15 }], 'ubclass__']); +} + /* Not testing Search.download() because the xmlhttprequest npm package is *crap* */