From 63e8d474e84de6be95ed9b2fe8ee7da348eeff49 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Thu, 18 Jul 2019 12:55:59 +0200 Subject: [PATCH] documentation: make it possible to have more than 128 results for a node. Python's __init__ is the main offender, the (currently very barebone) Magnum Python bindings have 340 results for __init__. This change is based on the assumption that nodes with extreme amount of results on the other hand don't have many children, so we can steal some bits from the child count instead. Now it's either up to 127 results and up to 127 children or up to 2048 results and 16 children. --- documentation/_search.py | 29 ++++++-- documentation/search.js | 20 +++++- .../test/js-test-data/manyresults.bin | Bin 0 -> 6415 bytes documentation/test/populate-js-test-data.py | 16 +++++ documentation/test/test-search.js | 62 ++++++++++++++++++ 5 files changed, 120 insertions(+), 7 deletions(-) create mode 100644 documentation/test/js-test-data/manyresults.bin diff --git a/documentation/_search.py b/documentation/_search.py index 25c0be1b..7d6e2656 100644 --- a/documentation/_search.py +++ b/documentation/_search.py @@ -271,9 +271,16 @@ class ResultMap: return output class Trie: - # root | | header | results | child 1 | child 1 | child 1 | - # offset | ... | result # | value # | ... | char | barrier | offset | ... - # 32b | | 8b | 8b | n*16b | 8b | 1b | 23b | + # root | | header | results | child 1 | child 1 | child 1 | + # offset | ... | | result # | child # | ... | char | barrier | offset | ... + # 32b | |0| 7b | 8b | n*16b | 8b | 1b | 23b | + # + # if result count > 127, it's instead: + # + # root | | header | results | child 1 | child 1 | child 1 | + # offset | ... | | result # | child # | ... | char | barrier | offset | ... + # 32b | |1| 11b | 4b | n*16b | 8b | 1b | 23b | + root_offset_struct = struct.Struct(' 127: + assert len(self.children) < 16 and len(self.results) < 2048 + result_count = (len(self.results) & 0x7f) | 0x80 + children_count = ((len(self.results) & 0xf80) >> 3) | len(self.children) + else: + result_count = len(self.results) + children_count = len(self.children) + serialized += self.header_struct.pack(result_count, children_count) for v in self.results: serialized += self.result_struct.pack(v) diff --git a/documentation/search.js b/documentation/search.js index cbd6ed03..8b0c9a5b 100644 --- a/documentation/search.js +++ b/documentation/search.js @@ -256,7 +256,15 @@ var Search = { /* Calculate offset and count of children */ let offset = this.searchStack[this.searchStack.length - 1]; let relChildOffset = 2 + this.trie.getUint8(offset)*2; + + /* Calculate child count. If there's a lot of results, the count + "leaks over" to the child count storage. */ + let resultCount = this.trie.getUint8(offset); let childCount = this.trie.getUint8(offset + 1); + if(resultCount & 0x80) { + resultCount = (resultCount & 0x7f) | ((childCount & 0xf0) << 3); + childCount = childCount & 0x0f; + } /* Go through all children and find the next offset */ let childOffset = offset + relChildOffset; @@ -299,8 +307,17 @@ var Search = { let offset = current[0]; let suffixLength = current[1]; - /* Populate the results with all values associated with this node */ + /* Calculate child count. If there's a lot of results, the count + "leaks over" to the child count storage. */ + /* TODO: hmmm. this is helluvalot duplicated code. hmm. */ let resultCount = this.trie.getUint8(offset); + let childCount = this.trie.getUint8(offset + 1); + if(resultCount & 0x80) { + resultCount = (resultCount & 0x7f) | ((childCount & 0xf0) << 3); + childCount = childCount & 0x0f; + } + + /* Populate the results with all values associated with this node */ for(let i = 0; i != resultCount; ++i) { let index = this.trie.getUint16(offset + (i + 1)*2, true); results.push(this.gatherResult(index, suffixLength, 0xffffff)); /* should be enough haha */ @@ -313,7 +330,6 @@ var Search = { /* Dig deeper */ /* TODO: hmmm. this is helluvalot duplicated code. hmm. */ let relChildOffset = 2 + this.trie.getUint8(offset)*2; - let childCount = this.trie.getUint8(offset + 1); let childOffset = offset + relChildOffset; for(let j = 0; j != childCount; ++j) { let offsetBarrier = this.trie.getUint32(childOffset + j*4, true); diff --git a/documentation/test/js-test-data/manyresults.bin b/documentation/test/js-test-data/manyresults.bin new file mode 100644 index 0000000000000000000000000000000000000000..4d3eb35f13ea8547586e8fac5a707c7b80de6bff GIT binary patch literal 6415 zcmZvgNo-Yh7{$*mZ7Br`ZGoZ!f*|v7|98F&>IE4bm;@0DQc7u|TG~pf0!~1!#>8Rg zuvC{KF~(`yHeCFC{z&f@p_a_)Ce-@U*4l9zA$raj&Wp7wyN zvw>6mI|1i$0T%(U7U+OCrSjSW48h|FU*`dx@D>4m@RkEz@O$H!_fJtb?WK6+SG+`R1qZut|#SF~EEVN-ZuEX`X0dp`H^Kc{PV*zf$LM+0~Sd1lD zie*@i6#!aha0_n5Z78r2n{YceV+*!o8}7iJxC?h< zJ9c0vc40T}!5-|zz1WBQupbA|j)ORa`*D~Te1sQwloxdj-RQw_oIo!gz=OQlljz3) z2JsM1@iv^sFdoJuID@k|hez=k9>)`SlAoizM;j}5ZJSr#!5toly}cfbympTz-jT}t zp32cazD}<}k&?q?lw2ZTlE2A<2`ILa0rC?0i2OpDYf-EvUF13P9{GVxtV7|G1LPuk zlYB+~A&cu#>?CK%YveQX2bnVw#b(k^UM3%tUr9>?ign}|d7ivaek2W(P&m>~M#v@d zHTjn;X+*J$oF%W5&&i);?qn2O$N+hTd_sOBty55}C*9-)@&WmYOqz-!AP31){2MX`$PCnMxC`IgkqK(U6jlTq?E`Hs}j}@;zyo#s5nV zlV{1h0oFZl@KS4IHiQ4ZB$a6u;P>!hPF|Oal%ZX%rG>KfilC;4k|N&FcT;<3~i$_ z69_YbGQ-d|Dl>sF6DTtbZKE<12s42)!_YK_$_zt0sLX`IOsLE-w2jJ4D9nV)3`5(f z%!I;BsLU|5jmk_Y%!JAeL))m#gu+au%rG>Kkut;34k|N|FcT>=3~i$_6A3esGQ-d| zDl?HV6DczcZKE<12{Vy0!_YLw$_zt0sLaH|Osvc>w2jJ4EX>5p3`5(f%*4V>tjsX9 zjmk_c%*4tJL))m##KKIX%rG>Ki88~`4k|N=Fq0@V3~i$_lL#}3GQ-d|Dl>^NlPEI` zZKE=i2s4Q?!_YLQ$_zt0sLZ6oOsdQKnKHxB4k|O5Fq0`W3~i$_lL<4KGQ-d|Dl?fdlPNO{ZKE=i2{V~8 z!_YM5$_zt0sLbTTOs>o@w2jJ4F3jZ03`5(f%;dsMuFNpBjmk_e%;d@pL))m#e zm6|d&oz7Qs%2bRhJ=+6RjG2maOHd@ zt4wXHUJU5XfV9PhfoPSsVnA0{eWH&$PEFv0jgEm(n|ulCE!LtwJKYB zQ6M)3+zO~xWlJs#oZ1#Rxh^PEYm2)9@d4Dvz{!