documentation: parametrize the search binary data type sizes.

author Vladimír Vondruš <mosra@centrum.cz>

Sat, 8 Jan 2022 19:49:26 +0000 (20:49 +0100)

committer Vladimír Vondruš <mosra@centrum.cz>

Sun, 9 Jan 2022 15:51:50 +0000 (16:51 +0100)
author Vladimír Vondruš <mosra@centrum.cz>
Sat, 8 Jan 2022 19:49:26 +0000 (20:49 +0100)
committer Vladimír Vondruš <mosra@centrum.cz>
Sun, 9 Jan 2022 15:51:50 +0000 (16:51 +0100)
diff --git a/documentation/_search.py b/documentation/_search.py

index 1a1d893ee746cc6de21fc947b37efd7d233d44cc..a2d44556ac84339510c01c2ad64e03c236867641 100644 (file)
--- a/documentation/_search.py
+++ b/documentation/_search.py
@@ -29,14 +29,263 @@ import base64
  import enum
  import struct
  from types import SimpleNamespace as Empty
-from typing import List, Tuple
+from typing import List, Tuple, Union
  
  # Version 0 was without the type map
-searchdata_format_version = 1
+searchdata_format_version = 2
  search_filename = f'search-v{searchdata_format_version}.js'
  searchdata_filename = f'{{search_filename_prefix}}-v{searchdata_format_version}.bin'
  searchdata_filename_b85 = f'{{search_filename_prefix}}-v{searchdata_format_version}.js'
  
+# In order to be both space-efficient and flexible enough to accomodate for
+# larger projects, the bit counts for particular data types can vary in each
+# file. There's the following categories:
+#
+# - NAME_SIZE_BITS, how many bits is needed to store name lengths (such as
+#   prefix length). Can be either 8 or 16.
+# - RESULT_ID_BITS, how many bits is needed for IDs pointing into the result
+#   map. Can be either 16, 24 or 32.
+# - FILE_OFFSET_BITS, how many bits is needed to store general offsets into
+#   the file. Can be either 24 or 32.
+#
+# Whole file encoding
+# ===================
+#
+# magic | version | type | not  | symbol | result | type   | trie | result | type
+# 'MCS' | (0x02)  | data | used | count  |  map   |  map   | data |  map   | map
+#       |         |      |      |        | offset | offset |      |  data  | data
+#  24b  |   8b    |  8b  | 24b  |  32b   |  32b   |  32b   |  …   |   …    |  …
+#
+# The type data encode the NAME_SIZE_BITS, RESULT_ID_BITS and
+# FILE_OFFSET_BITS:
+#
+# not  | NAME_SIZE_BITS | RESULT_ID_BITS | FILE_OFFSET_BITS
+# used | 0b0 = 8b       | 0b00 = 16b     | 0b0 = 24b
+#      | 0b1 = 16b      | 0b01 = 24b     | 0b1 = 32b
+#      |                | 0b10 = 32b     |
+#  4b  |     1b         |       2b       |       1b
+#
+# Trie encoding
+# =============
+#
+# Because child tries are serialized first, the trie containing the initial
+# characters is never the first, and instead the root offset points to it. If
+# result count < 128:
+#
+#  root  |   |         header       |      results     | children
+# offset | … | | result # | child # |         …        |  data
+#  32b   |   |0|    7b    |   8b    | n*RESULT_ID_BITS |    …
+#
+# If result count > 127, it's instead this -- since entries with very large
+# number of results (such as python __init__()) are rather rare, it doesn't
+# make sense to have it globally configurable and then waste 8 bits in the
+# majority of cases. Note that the 15-bit value is stored as Big-Endian,
+# otherwise the leftmost bit couldn't be used to denote the size.
+#
+#  root  |   |         header       |      results     | children
+# offset | … | | result # | child # |         …        |  data
+#  32b   |   |1| 15b (BE) |   8b    | n*RESULT_ID_BITS |    …
+#
+# Trie children data encoding, the barrier is stored in the topmost offset bit:
+#
+# child 1 | child 2 |   |     child 1      |      child 2     |
+#  char   |  char   | … | barrier + offset | barrier + offset | …
+#   8b    |   8b    |   | FILE_OFFSET_BITS | FILE_OFFSET_BITS |
+#
+# Result map encoding
+# ===================
+#
+# First all flags, then all offsets, so we don't need to have weird paddings or
+# alignments. The "file size" is there so size of item N can be always
+# retrieved as `offsets[N + 1] - offsets[N]`
+#
+#       item         |       file       | item  | item 1 | item 2 |
+#      offsets       |       size       | flags |  data  |  data  | …
+# n*FILE_OFFSET_BITS | FILE_OFFSET_BITS | n*8b  |        |        |
+#
+# Basic item data (flags & 0b11 == 0b00):
+#
+# name | \0 | URL
+#      |    |
+#      | 8b |
+#
+# Suffixed item data (flags & 0b11 == 0b01):
+#
+#     suffix     | name | \0 | URL
+#     length     |      |    |
+# NAME_SIZE_BITS |      | 8b |
+#
+# Prefixed item data (flags & 0xb11 == 0b10):
+#
+#     prefix     |     prefix     |  name  | \0 |  URL
+#       id       |     length     | suffix |    | suffix
+# RESULT_ID_BITS | NAME_SIZE_BITS |        | 8b |
+#
+# Prefixed & suffixed item (flags & 0xb11 == 0b11):
+#
+#     prefix     |     prefix     |     suffix     |  name  | \0 | URL
+#       id       |     length     |     length     | suffix |    |
+# RESULT_ID_BITS | NAME_SIZE_BITS | NAME_SIZE_BITS |        | 8b |
+#
+# Alias item (flags & 0xf0 == 0x00), flags & 0xb11 then denote what's in the
+# `…` portion, alias have no URL so the alias name is in place of it:
+#
+#     alias      |   | alias
+#       id       | … | name
+# RESULT_ID_BITS |   |
+#
+# Type map encoding
+# =================
+#
+# Again the "end offset" is here so size of type N can be always retrieved as
+# `offsets[N + 1] - offsets[N]`. Type names are not expected to have more than
+# 255 chars, so NAME_SIZE_BITS is not used here.
+#
+#     type 1     |     type 2     |   |         |        | type 1 |
+# class |  name  | class |  name  | … | padding |  end   |  name  | …
+#   ID  | offset |   ID  | offset |   |         | offset |  data  |
+#   8b  |   8b   |   8b  |   8b   |   |    8b   |   8b   |        |
+
+class Serializer:
+    # This is currently hardcoded
+    result_map_flag_bytes = 1
+
+    header_struct = struct.Struct('<3sBBxxxIII')
+    result_map_flags_struct = struct.Struct('<B')
+    trie_root_offset_struct = struct.Struct('<I')
+    type_map_entry_struct = struct.Struct('<BB')
+
+    def __init__(self, *, file_offset_bytes, result_id_bytes, name_size_bytes):
+        assert file_offset_bytes in [3, 4]
+        self.file_offset_bytes = file_offset_bytes
+
+        assert result_id_bytes in [2, 3, 4]
+        self.result_id_bytes = result_id_bytes
+
+        assert name_size_bytes in [1, 2]
+        self.name_size_bytes = name_size_bytes
+
+    def pack_header(self, symbol_count, trie_size, result_map_size):
+        return self.header_struct.pack(b'MCS', searchdata_format_version,
+            (self.file_offset_bytes - 3) << 0 |
+            (self.result_id_bytes - 2) << 1 |
+            (self.name_size_bytes - 1) << 3,
+            symbol_count,
+            self.header_struct.size + trie_size,
+            self.header_struct.size + trie_size + result_map_size)
+
+    def pack_result_map_flags(self, flags: int):
+        return self.result_map_flags_struct.pack(flags)
+    def pack_result_map_offset(self, offset: int):
+        return offset.to_bytes(self.file_offset_bytes, byteorder='little')
+    def pack_result_map_prefix(self, id: int, length: int):
+        return id.to_bytes(self.result_id_bytes, byteorder='little') + \
+           length.to_bytes(self.name_size_bytes, byteorder='little')
+    def pack_result_map_suffix_length(self, length: int):
+        return length.to_bytes(self.name_size_bytes, byteorder='little')
+    def pack_result_map_alias(self, id: int):
+        return id.to_bytes(self.result_id_bytes, byteorder='little')
+
+    def pack_trie_root_offset(self, offset: int):
+        return self.trie_root_offset_struct.pack(offset)
+    def pack_trie_node(self, result_ids: List[int], child_chars_offsets_barriers: List[Tuple[int, int, bool]]):
+        out = bytearray()
+        # If the result count fits into 7 bits, pack it into a single byte
+        if len(result_ids) < 128:
+            out += len(result_ids).to_bytes(1, byteorder='little')
+        # Otherwise use the leftmost bit to denote it's two-byte, and store the
+        # higher 8 bits in a second byte. Which is the same as storing the
+        # value as Big-Endian.
+        else:
+            assert len(result_ids) < 32768
+            out += (len(result_ids) | 0x8000).to_bytes(2, byteorder='big')
+        out += len(child_chars_offsets_barriers).to_bytes(1, byteorder='little')
+        for id in result_ids:
+            out += id.to_bytes(self.result_id_bytes, byteorder='little')
+        out += bytes([char for char, offset, barrier in child_chars_offsets_barriers])
+        child_barrier_mask = 1 << (self.file_offset_bytes*8 - 1)
+        for char, offset, barrier in child_chars_offsets_barriers:
+            if offset >= child_barrier_mask: raise OverflowError
+            out += (offset | (barrier*child_barrier_mask)).to_bytes(self.file_offset_bytes, byteorder='little')
+        return out
+
+    def pack_type_map_entry(self, class_: int, offset: int):
+        return self.type_map_entry_struct.pack(class_, offset)
+
+class Deserializer:
+    def __init__(self, *, file_offset_bytes, result_id_bytes, name_size_bytes):
+        assert file_offset_bytes in [3, 4]
+        self.file_offset_bytes = file_offset_bytes
+
+        assert result_id_bytes in [2, 3, 4]
+        self.result_id_bytes = result_id_bytes
+
+        assert name_size_bytes in [1, 2]
+        self.name_size_bytes = name_size_bytes
+
+    @classmethod
+    def from_serialized(self, serialized: bytes):
+        magic, version, type_data, symbol_count, map_offset, type_map_offset = Serializer.header_struct.unpack_from(serialized)
+        assert magic == b'MCS'
+        assert version == searchdata_format_version
+        out = Deserializer(
+            file_offset_bytes=[3, 4][(type_data & 0b0001) >> 0],
+            result_id_bytes=[2, 3, 4][(type_data & 0b0110) >> 1],
+            name_size_bytes=[1, 2][(type_data & 0b1000) >> 3])
+        out.symbol_count = symbol_count
+        out.map_offset = map_offset
+        out.type_map_offset = type_map_offset
+        return out
+
+    # The last tuple item is number of bytes extracted
+    def unpack_result_map_flags(self, serialized: bytes, offset: int) -> Tuple[int, int]:
+        return Serializer.result_map_flags_struct.unpack_from(serialized, offset) + (Serializer.result_map_flags_struct.size, )
+    def unpack_result_map_offset(self, serialized: bytes, offset: int) -> Tuple[int, int]:
+        return int.from_bytes(serialized[offset:offset + self.file_offset_bytes], byteorder='little'), self.file_offset_bytes
+    def unpack_result_map_prefix(self, serialized: bytes, offset: int) -> Tuple[int, int, int]:
+        return int.from_bytes(serialized[offset:offset + self.result_id_bytes], byteorder='little'), int.from_bytes(serialized[offset + self.result_id_bytes:offset + self.result_id_bytes + self.name_size_bytes], byteorder='little'), self.result_id_bytes + self.name_size_bytes
+    def unpack_result_map_suffix_length(self, serialized: bytes, offset: int) -> Tuple[int, int]:
+        return int.from_bytes(serialized[offset:offset + self.name_size_bytes], byteorder='little'), self.name_size_bytes
+    def unpack_result_map_alias(self, serialized: bytes, offset: int) -> Tuple[int, int]:
+        return int.from_bytes(serialized[offset:offset + self.result_id_bytes], byteorder='little'), self.result_id_bytes
+
+    def unpack_trie_root_offset(self, serialized: bytes, offset: int) -> Tuple[int, int]:
+        return Serializer.trie_root_offset_struct.unpack_from(serialized, offset) + (Serializer.trie_root_offset_struct.size, )
+    def unpack_trie_node(self, serialized: bytes, offset: int) -> Tuple[List[int], List[int], List[Tuple[int, int, bool]], int]:
+        prev_offset = offset
+        # Result count, first try 8-bit, if it has the highest bit set, extract
+        # two bytes (as a BE) and then remove the highest bit
+        result_count = int.from_bytes(serialized[offset:offset + 1], byteorder='little')
+        if result_count & 0x80:
+            result_count = int.from_bytes(serialized[offset:offset + 2], byteorder='big') & ~0x8000
+            offset += 1
+        offset += 1
+        child_count = int.from_bytes(serialized[offset:offset + 1], byteorder='little')
+        offset += 1
+
+        # Unpack all result IDs
+        result_ids = []
+        for i in range(result_count):
+            result_ids += [int.from_bytes(serialized[offset:offset + self.result_id_bytes], byteorder='little')]
+            offset += self.result_id_bytes
+
+        # Unpack all child chars
+        child_chars = list(serialized[offset:offset + child_count])
+        offset += child_count
+
+        # Unpack all children offsets and lookahead barriers
+        child_chars_offsets_barriers = []
+        child_barrier_mask = 1 << (self.file_offset_bytes*8 - 1)
+        for i in range(child_count):
+            child_offset_barrier = int.from_bytes(serialized[offset:offset + self.file_offset_bytes], byteorder='little')
+            child_chars_offsets_barriers += [(child_chars[i], child_offset_barrier & ~child_barrier_mask, bool(child_offset_barrier & child_barrier_mask))]
+            offset += self.file_offset_bytes
+
+        return result_ids, child_chars_offsets_barriers, offset - prev_offset
+
+    def unpack_type_map_entry(self, serialized: bytes, offset: int) -> Tuple[int, int, int]:
+        return Serializer.type_map_entry_struct.unpack_from(serialized, offset) + (Serializer.type_map_entry_struct.size, )
+
  class CssClass(enum.Enum):
      DEFAULT = 0
      PRIMARY = 1
@@ -87,50 +336,7 @@ class ResultFlag(enum.Flag):
      _TYPE14 = 14 << 4
      _TYPE15 = 15 << 4
  
-# Result map encoding -- the "file size" is there so size of item N can be
-# always retrieved as `offsets[N + 1] - offsets[N]`
-#
-# item 1 flags | item 2 flags |   | item N flags | file | item 1 |
-#   + offset   |   + offset   | … |   + offset   | size |  data  | …
-#    8 + 24b   |    8 + 24b   |   |    8 + 24b   |  32b |        |
-#
-# basic item (flags & 0b11 == 0b00):
-#
-# name | \0 | URL
-#      |    |
-#      | 8b |
-#
-# suffixed item (flags & 0b11 == 0b01):
-#
-# suffix | name | \0 | URL
-# length |      |    |
-#   8b   |      | 8b |
-#
-# prefixed item (flags & 0xb11 == 0b10):
-#
-#  prefix  |  name  | \0 |  URL
-# id + len | suffix |    | suffix
-# 16b + 8b |        | 8b |
-#
-# prefixed & suffixed item (flags & 0xb11 == 0b11):
-#
-#  prefix  | suffix |  name  | \0 | URL
-# id + len | length | suffix |    |
-# 16b + 8b |   8b   |        | 8b |
-#
-# alias item (flags & 0xf0 == 0x00), flags & 0xb11 then denote what's in the
-# `…` portion, alias have no URL so the alias name is in place of it:
-#
-# alias |   | alias
-#  id   | … | name
-#  16b  |   |
  class ResultMap:
-    offset_struct = struct.Struct('<I')
-    flags_struct = struct.Struct('<B')
-    prefix_struct = struct.Struct('<HB')
-    suffix_length_struct = struct.Struct('<B')
-    alias_struct = struct.Struct('<H')
-
      def __init__(self):
          self.entries = []
  
@@ -151,9 +357,7 @@ class ResultMap:
          self.entries += [entry]
          return len(self.entries) - 1
  
-    def serialize(self, merge_prefixes=True) -> bytearray:
-        output = bytearray()
-
+    def serialize(self, serializer: Serializer, merge_prefixes=True) -> bytearray:
          if merge_prefixes:
              # Put all entry names into a trie to discover common prefixes
              trie = Trie()
@@ -225,25 +429,24 @@ class ResultMap:
              # Everything merged, replace the original list
              self.entries = merged
  
-        # Write the offset array. Starting offset for items is after the offset
-        # array and the file size
-        offset = (len(self.entries) + 1)*4
+        # Write the offset array. Starting offset for items is after the
+        # (aligned) flag array and (aligned) offset + file size array.
+        output = bytearray()
+        offset = len(self.entries)*serializer.result_map_flag_bytes + (len(self.entries) + 1)*serializer.file_offset_bytes
          for e in self.entries:
-            assert offset < 2**24
-            output += self.offset_struct.pack(offset)
-            self.flags_struct.pack_into(output, len(output) - 1, e.flags.value)
+            output += serializer.pack_result_map_offset(offset)
  
              # The entry is an alias, extra field for alias index
              if e.flags & ResultFlag._TYPE == ResultFlag.ALIAS:
-                offset += self.alias_struct.size
+                offset += serializer.result_id_bytes
  
              # Extra field for prefix index and length
              if e.flags & ResultFlag.HAS_PREFIX:
-                offset += self.prefix_struct.size
+                offset += serializer.result_id_bytes + serializer.name_size_bytes
  
              # Extra field for suffix length
              if e.flags & ResultFlag.HAS_SUFFIX:
-                offset += self.suffix_length_struct.size
+                offset += serializer.name_size_bytes
  
              # Length of the name
              offset += len(e.name.encode('utf-8'))
@@ -254,18 +457,22 @@ class ResultMap:
                   offset += len(e.url.encode('utf-8')) + 1
  
          # Write file size
-        output += self.offset_struct.pack(offset)
+        output += serializer.pack_result_map_offset(offset)
+
+        # Write the flag array
+        for e in self.entries:
+            output += serializer.pack_result_map_flags(e.flags.value)
  
          # Write the entries themselves
          for e in self.entries:
              if e.flags & ResultFlag._TYPE == ResultFlag.ALIAS:
                  assert not e.alias is None
                  assert not e.url
-                output += self.alias_struct.pack(e.alias)
+                output += serializer.pack_result_map_alias(e.alias)
              if e.flags & ResultFlag.HAS_PREFIX:
-                output += self.prefix_struct.pack(e.prefix, e.prefix_length)
+                output += serializer.pack_result_map_prefix(e.prefix, e.prefix_length)
              if e.flags & ResultFlag.HAS_SUFFIX:
-                output += self.suffix_length_struct.pack(e.suffix_length)
+                output += serializer.pack_result_map_suffix_length(e.suffix_length)
              output += e.name.encode('utf-8')
              if e.url:
                  output += b'\0'
@@ -274,31 +481,21 @@ class ResultMap:
          assert len(output) == offset
          return output
  
-# Trie encoding:
-#
-#  root  |   |       header         | results | child 1 | child 1 | child 1 |
-# offset | … | | result # | child # |    …    |  char   | barrier | offset  | …
-#  32b   |   |0|    7b    |   8b    |  n*16b  |   8b    |    1b   |   23b   |
-#
-# if result count > 127, it's instead:
-#
-#  root  |   |      header          | results | child 1 | child 1 | child 1 |
-# offset | … | | result # | child # |    …    |  char   | barrier | offset  | …
-#  32b   |   |1|   11b    |   4b    |  n*16b  |   8b    |    1b   |   23b   |
  class Trie:
-    root_offset_struct = struct.Struct('<I')
-    header_struct = struct.Struct('<BB')
-    result_struct = struct.Struct('<H')
-    child_struct = struct.Struct('<I')
-    child_char_struct = struct.Struct('<B')
-
      def __init__(self):
          self.results = []
          self.children = {}
  
-    def _insert(self, path: bytes, result, lookahead_barriers):
+    def _insert(self, path: bytes, result: Union[int, List[int]], lookahead_barriers):
          if not path:
-            self.results += [result]
+            # Inserting a list is mainly used by the
+            # TrieSerialization.test_23bit_file_offset_too_small() test, as
+            # otherwise it'd be WAY too slow.
+            # TODO this whole thing needs optimizing with less recursion
+            if type(result) is list:
+                self.results += result
+            else:
+                self.results += [result]
              return
  
          char = path[0]
@@ -309,7 +506,7 @@ class Trie:
              self.children[char] = (True, self.children[char][1])
          self.children[char][1]._insert(path[1:], result, [b - 1 for b in lookahead_barriers])
  
-    def insert(self, path: str, result, lookahead_barriers=[]):
+    def insert(self, path: str, result: Union[int, List[int]], lookahead_barriers=[]):
          self._insert(path.encode('utf-8'), result, lookahead_barriers)
  
      def _sort(self, key):
@@ -342,40 +539,15 @@ class Trie:
          self._sort(key)
  
      # Returns offset of the serialized thing in `output`
-    def _serialize(self, hashtable, output: bytearray, merge_subtrees) -> int:
+    def _serialize(self, serializer: Serializer, hashtable, output: bytearray, merge_subtrees) -> int:
          # Serialize all children first
-        child_offsets = []
+        child_chars_offsets_barriers = []
          for char, child in self.children.items():
-            offset = child[1]._serialize(hashtable, output, merge_subtrees=merge_subtrees)
-            child_offsets += [(char, child[0], offset)]
-
-        # Serialize this node. Sometimes we'd have an insane amount of results
-        # (such as Python's __init__), but very little children to go with
-        # that. Then we can make the result count storage larger (11 bits,
-        # 2048 results) and the child count storage smaller (4 bits, 16
-        # children). Hopefully that's enough. The remaining leftmost bit is
-        # used as an indicator of this shifted state.
-        serialized = bytearray()
-        if len(self.results) > 127:
-            assert len(self.children) < 16 and len(self.results) < 2048
-            result_count = (len(self.results) & 0x7f) | 0x80
-            children_count = ((len(self.results) & 0xf80) >> 3) | len(self.children)
-        else:
-            result_count = len(self.results)
-            children_count = len(self.children)
-        serialized += self.header_struct.pack(result_count, children_count)
-        for v in self.results:
-            serialized += self.result_struct.pack(v)
-
-        # Serialize child offsets
-        for char, lookahead_barrier, abs_offset in child_offsets:
-            assert abs_offset < 2**23
-
-            # write them over each other because that's the only way to pack
-            # a 24 bit field
-            offset = len(serialized)
-            serialized += self.child_struct.pack(abs_offset | ((1 if lookahead_barrier else 0) << 23))
-            self.child_char_struct.pack_into(serialized, offset + 3, char)
+            offset = child[1]._serialize(serializer, hashtable, output, merge_subtrees=merge_subtrees)
+            child_chars_offsets_barriers += [(char, offset, child[0])]
+
+        # Serialize this node
+        serialized = serializer.pack_trie_node(self.results, child_chars_offsets_barriers)
  
          # Subtree merging: if this exact tree is already in the table, return
          # its offset. Otherwise add it and return the new offset.
@@ -389,21 +561,13 @@ class Trie:
              if merge_subtrees: hashtable[hashable] = offset
              return offset
  
-    def serialize(self, merge_subtrees=True) -> bytearray:
+    def serialize(self, serializer: Serializer, merge_subtrees=True) -> bytearray:
          output = bytearray(b'\x00\x00\x00\x00')
          hashtable = {}
-        self.root_offset_struct.pack_into(output, 0, self._serialize(hashtable, output, merge_subtrees=merge_subtrees))
+        output[0:4] = serializer.pack_trie_root_offset(self._serialize(serializer, hashtable, output, merge_subtrees=merge_subtrees))
          return output
  
-# Type map encoding:
-#
-#     type 1     |     type 2     |   |         |        | type 1 |
-# class |  name  | class |  name  | … | padding |  end   |  name  | …
-#   ID  | offset |   ID  | offset |   |         | offset |  data  |
-#   8b  |   8b   |   8b  |   8b   |   |    8b   |   8b   |        |
-type_map_entry_struct = struct.Struct('<BB')
-
-def serialize_type_map(map: List[Tuple[CssClass, str]]) -> bytearray:
+def serialize_type_map(serializer: Serializer, map: List[Tuple[CssClass, str]]) -> bytearray:
      serialized = bytearray()
      names = bytearray()
  
@@ -412,42 +576,31 @@ def serialize_type_map(map: List[Tuple[CssClass, str]]) -> bytearray:
      assert len(map) <= 15
  
      # Initial name offset is after all the offset entries plus the final one
-    initial_name_offset = (len(map) + 1)*type_map_entry_struct.size
+    initial_name_offset = (len(map) + 1)*serializer.type_map_entry_struct.size
  
      # Add all entries (and the final offset), encode the names separately,
      # concatenate at the end
      for css_class, name in map:
-        serialized += type_map_entry_struct.pack(css_class.value, initial_name_offset + len(names))
+        serialized += serializer.pack_type_map_entry(css_class.value, initial_name_offset + len(names))
          names += name.encode('utf-8')
-    serialized += type_map_entry_struct.pack(0, initial_name_offset + len(names))
+    serialized += serializer.pack_type_map_entry(0, initial_name_offset + len(names))
      assert len(serialized) == initial_name_offset
  
      return serialized + names
  
-# Whole file encoding:
-#
-# magic  | version | symbol | result |  type  | trie | result | type
-# header |         | count  |  map   |  map   | data |  map   | map
-#        |         |        | offset | offset |      |  data  | data
-#  24b   |   8b    |  16b   |  32b   |  32b   |  …   |   …    |  …
-search_data_header_struct = struct.Struct('<3sBHII')
-
-def serialize_search_data(trie: Trie, map: ResultMap, type_map: List[Tuple[CssClass, str]], symbol_count, *, merge_subtrees=True, merge_prefixes=True) -> bytearray:
-    serialized_trie = trie.serialize(merge_subtrees=merge_subtrees)
-    serialized_map = map.serialize(merge_prefixes=merge_prefixes)
-    serialized_type_map = serialize_type_map(type_map)
+def serialize_search_data(serializer: Serializer, trie: Trie, map: ResultMap, type_map: List[Tuple[CssClass, str]], symbol_count, *, merge_subtrees=True, merge_prefixes=True) -> bytearray:
+    serialized_trie = trie.serialize(serializer, merge_subtrees=merge_subtrees)
+    serialized_map = map.serialize(serializer, merge_prefixes=merge_prefixes)
+    serialized_type_map = serialize_type_map(serializer, type_map)
  
-    preamble = search_data_header_struct.pack(b'MCS',
-        searchdata_format_version, symbol_count,
-        search_data_header_struct.size + len(serialized_trie),
-        search_data_header_struct.size + len(serialized_trie) + len(serialized_map))
+    preamble = serializer.pack_header(symbol_count, len(serialized_trie), len(serialized_map))
      return preamble + serialized_trie + serialized_map + serialized_type_map
  
  def base85encode_search_data(data: bytearray) -> bytearray:
      return (b"/* Generated by https://mcss.mosra.cz/documentation/doxygen/. Do not edit. */\n" +
              b"Search.load('" + base64.b85encode(data, True) + b"');\n")
  
-def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, indent, *, show_merged, show_lookahead_barriers, color_map) -> str:
+def _pretty_print_trie(deserializer: Deserializer, serialized: bytearray, hashtable, stats, base_offset, indent, *, show_merged, show_lookahead_barriers, color_map) -> str:
      # Visualize where the trees were merged
      if show_merged and base_offset in hashtable:
          return color_map['red'] + '#' + color_map['reset']
@@ -455,46 +608,35 @@ def _pretty_print_trie(serialized: bytearray, hashtable, stats, base_offset, ind
      stats.node_count += 1
  
      out = ''
-    result_count, child_count = Trie.header_struct.unpack_from(serialized, base_offset)
-    # If result count has the high bit set, it's stored in 11 bits and child
-    # count in 4 bits instead of 7 + 8
-    if result_count & 0x80:
-        result_count = (result_count & 0x7f) | ((child_count & 0xf0) << 3)
-        child_count = child_count & 0x0f
-    stats.max_node_results = max(result_count, stats.max_node_results)
-    stats.max_node_children = max(child_count, stats.max_node_children)
-    offset = base_offset + Trie.header_struct.size
+    result_ids, child_chars_offsets_barriers, offset = deserializer.unpack_trie_node(serialized, base_offset)
+
+    stats.max_node_results = max(len(result_ids), stats.max_node_results)
+    stats.max_node_children = max(len(child_chars_offsets_barriers), stats.max_node_children)
  
      # print results, if any
-    if result_count:
+    if result_ids:
          out += color_map['blue'] + ' ['
-        for i in range(result_count):
+        for i, result in enumerate(result_ids):
              if i: out += color_map['blue']+', '
-            result = Trie.result_struct.unpack_from(serialized, offset)[0]
              stats.max_node_result_index = max(result, stats.max_node_result_index)
              out += color_map['cyan'] + str(result)
-            offset += Trie.result_struct.size
          out += color_map['blue'] + ']'
  
      # print children, if any
-    for i in range(child_count):
-        if result_count or i:
+    for i, (char, offset, barrier) in enumerate(child_chars_offsets_barriers):
+        if len(result_ids) or i:
              out += color_map['reset'] + '\n'
              out += color_map['blue'] + indent + color_map['white']
-        char = Trie.child_char_struct.unpack_from(serialized, offset + 3)[0]
          if char <= 127:
              out += chr(char)
          else:
              out += color_map['reset'] + hex(char)
-        if (show_lookahead_barriers and Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00800000):
+        if (show_lookahead_barriers and barrier):
              out += color_map['green'] + '$'
-        if char > 127 or (show_lookahead_barriers and Trie.child_struct.unpack_from(serialized, offset)[0] & 0x00800000):
+        if char > 127 or (show_lookahead_barriers and barrier):
              out += color_map['reset'] + '\n' + color_map['blue'] + indent + ' ' + color_map['white']
-        child_offset = Trie.child_struct.unpack_from(serialized, offset)[0] & 0x007fffff
-        stats.max_node_child_offset = max(child_offset, stats.max_node_child_offset)
-        offset += Trie.child_struct.size
-        out += _pretty_print_trie(serialized, hashtable, stats, child_offset, indent + ('|' if child_count > 1 else ' '), show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
-        child_count += 1
+        stats.max_node_child_offset = max(offset, stats.max_node_child_offset)
+        out += _pretty_print_trie(deserializer, serialized, hashtable, stats, offset, indent + ('|' if len(child_chars_offsets_barriers) > 1 else ' '), show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
  
      hashtable[base_offset] = True
      return out
@@ -515,7 +657,7 @@ color_map_dummy = {'blue': '',
                     'yellow': '',
                     'reset': ''}
  
-def pretty_print_trie(serialized: bytes, *, show_merged=False, show_lookahead_barriers=True, colors=False):
+def pretty_print_trie(deserializer: Deserializer, serialized: bytes, *, show_merged=False, show_lookahead_barriers=True, colors=False):
      color_map = color_map_colors if colors else color_map_dummy
  
      hashtable = {}
@@ -527,7 +669,7 @@ def pretty_print_trie(serialized: bytes, *, show_merged=False, show_lookahead_ba
      stats.max_node_result_index = 0
      stats.max_node_child_offset = 0
  
-    out = _pretty_print_trie(serialized, hashtable, stats, Trie.root_offset_struct.unpack_from(serialized, 0)[0], '', show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
+    out = _pretty_print_trie(deserializer, serialized, hashtable, stats, deserializer.unpack_trie_root_offset(serialized, 0)[0], '', show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, color_map=color_map)
      if out: out = color_map['white'] + out
      stats = """
  node count:             {}
@@ -537,59 +679,61 @@ max node result index:  {}
  max node child offset:  {}""".lstrip().format(stats.node_count, stats.max_node_results, stats.max_node_children, stats.max_node_result_index, stats.max_node_child_offset)
      return out, stats
  
-def pretty_print_map(serialized: bytes, *, entryTypeClass, colors=False):
+def pretty_print_map(deserializer: Deserializer, serialized: bytes, *, entryTypeClass, colors=False):
      color_map = color_map_colors if colors else color_map_dummy
  
      # The first item gives out offset of first value, which can be used to
      # calculate total value count
-    offset = ResultMap.offset_struct.unpack_from(serialized, 0)[0] & 0x00ffffff
-    size = int(offset/4 - 1)
+    offset, offset_size = deserializer.unpack_result_map_offset(serialized, 0)
+    size = int((offset - offset_size)/(offset_size + Serializer.result_map_flag_bytes))
+    flags_offset = (size + 1)*offset_size
  
      out = ''
      for i in range(size):
          if i: out += '\n'
-        flags = ResultFlag(ResultMap.flags_struct.unpack_from(serialized, i*4 + 3)[0])
+        flags = ResultFlag(deserializer.unpack_result_map_flags(serialized, flags_offset + i*Serializer.result_map_flag_bytes)[0])
          extra = []
          if flags & ResultFlag._TYPE == ResultFlag.ALIAS:
-            extra += ['alias={}'.format(ResultMap.alias_struct.unpack_from(serialized, offset)[0])]
-            offset += ResultMap.alias_struct.size
+            alias, alias_bytes = deserializer.unpack_result_map_alias(serialized, offset)
+            extra += ['alias={}'.format(alias)]
+            offset += alias_bytes
          if flags & ResultFlag.HAS_PREFIX:
-            extra += ['prefix={}[:{}]'.format(*ResultMap.prefix_struct.unpack_from(serialized, offset))]
-            offset += ResultMap.prefix_struct.size
+            prefix_id, prefix_length, prefix_bytes = deserializer.unpack_result_map_prefix(serialized, offset)
+            extra += ['prefix={}[:{}]'.format(prefix_id, prefix_length)]
+            offset += prefix_bytes
          if flags & ResultFlag.HAS_SUFFIX:
-            extra += ['suffix_length={}'.format(ResultMap.suffix_length_struct.unpack_from(serialized, offset)[0])]
-            offset += ResultMap.suffix_length_struct.size
+            suffix_length, suffix_bytes = deserializer.unpack_result_map_suffix_length(serialized, offset)
+            extra += ['suffix_length={}'.format(suffix_length)]
+            offset += suffix_bytes
          if flags & ResultFlag.DEPRECATED:
              extra += ['deprecated']
          if flags & ResultFlag.DELETED:
              extra += ['deleted']
          if flags & ResultFlag._TYPE:
              extra += ['type={}'.format(entryTypeClass(flags.type).name)]
-        next_offset = ResultMap.offset_struct.unpack_from(serialized, (i + 1)*4)[0] & 0x00ffffff
+        next_offset = deserializer.unpack_result_map_offset(serialized, (i + 1)*offset_size)[0]
          name, _, url = serialized[offset:next_offset].partition(b'\0')
          out += color_map['cyan'] + str(i) + color_map['blue'] + ': ' + color_map['white'] + name.decode('utf-8') + color_map['blue'] + ' [' + color_map['yellow'] + (color_map['blue'] + ', ' + color_map['yellow']).join(extra) + color_map['blue'] + '] ->' + (' ' + color_map['reset'] + url.decode('utf-8') if url else '')
          offset = next_offset
      return out
  
-def pretty_print_type_map(serialized: bytes, *, entryTypeClass):
+def pretty_print_type_map(deserializer: Deserializer, serialized: bytes, *, entryTypeClass):
      # Unpack until we aren't at EOF
      i = 0
      out = ''
-    class_id, offset = type_map_entry_struct.unpack_from(serialized, 0)
-    while offset < len(serialized):
+    class_id, name_offset, type_map_bytes = deserializer.unpack_type_map_entry(serialized, 0)
+    while name_offset < len(serialized):
          if i: out += ',\n'
-        next_class_id, next_offset = type_map_entry_struct.unpack_from(serialized, (i + 1)*type_map_entry_struct.size)
-        out += "({}, {}, '{}')".format(entryTypeClass(i + 1), CssClass(class_id), serialized[offset:next_offset].decode('utf-8'))
+        next_class_id, next_name_offset = deserializer.unpack_type_map_entry(serialized, (i + 1)*type_map_bytes)[:2]
+        out += "({}, {}, '{}')".format(entryTypeClass(i + 1), CssClass(class_id), serialized[name_offset:next_name_offset].decode('utf-8'))
          i += 1
-        class_id, offset = next_class_id, next_offset
+        class_id, name_offset = next_class_id, next_name_offset
      return out
  
  def pretty_print(serialized: bytes, *, entryTypeClass, show_merged=False, show_lookahead_barriers=True, colors=False):
-    magic, version, symbol_count, map_offset, type_map_offset = search_data_header_struct.unpack_from(serialized)
-    assert magic == b'MCS'
-    assert version == searchdata_format_version
-
-    pretty_trie, stats = pretty_print_trie(serialized[search_data_header_struct.size:map_offset], show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, colors=colors)
-    pretty_map = pretty_print_map(serialized[map_offset:type_map_offset], entryTypeClass=entryTypeClass, colors=colors)
-    pretty_type_map = pretty_print_type_map(serialized[type_map_offset:], entryTypeClass=entryTypeClass)
-    return '{} symbols\n'.format(symbol_count) + pretty_trie + '\n' + pretty_map + '\n' + pretty_type_map, stats
+    deserializer = Deserializer.from_serialized(serialized)
+
+    pretty_trie, stats = pretty_print_trie(deserializer, serialized[Serializer.header_struct.size:deserializer.map_offset], show_merged=show_merged, show_lookahead_barriers=show_lookahead_barriers, colors=colors)
+    pretty_map = pretty_print_map(deserializer, serialized[deserializer.map_offset:deserializer.type_map_offset], entryTypeClass=entryTypeClass, colors=colors)
+    pretty_type_map = pretty_print_type_map(deserializer, serialized[deserializer.type_map_offset:], entryTypeClass=entryTypeClass)
+    return '{} symbols\n'.format(deserializer.symbol_count) + pretty_trie + '\n' + pretty_map + '\n' + pretty_type_map, stats
diff --git a/documentation/doxygen.py b/documentation/doxygen.py

index 822dca78305534d4b86ce7bb92ce3baa4b55b953..973c3973f4056179ae39b6e5f8882e4a3b9ea1a8 100755 (executable)
--- a/documentation/doxygen.py
+++ b/documentation/doxygen.py
@@ -49,7 +49,7 @@ from pygments import highlight
  from pygments.formatters import HtmlFormatter
  from pygments.lexers import TextLexer, BashSessionLexer, get_lexer_by_name, find_lexer_class_for_filename
  
-from _search import CssClass, ResultFlag, ResultMap, Trie, serialize_search_data, base85encode_search_data, search_filename, searchdata_filename, searchdata_filename_b85, searchdata_format_version
+from _search import CssClass, ResultFlag, ResultMap, Trie, Serializer, serialize_search_data, base85encode_search_data, search_filename, searchdata_filename, searchdata_filename_b85, searchdata_format_version
  
  sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../plugins'))
  import dot2svg
@@ -2440,7 +2440,7 @@ def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=
      # order by default
      trie.sort(map)
  
-    return serialize_search_data(trie, map, search_type_map, symbol_count, merge_subtrees=merge_subtrees, merge_prefixes=merge_prefixes)
+    return serialize_search_data(Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1), trie, map, search_type_map, symbol_count, merge_subtrees=merge_subtrees, merge_prefixes=merge_prefixes)
  
  def parse_xml(state: State, xml: str):
      # Reset counter for unique math formulas
diff --git a/documentation/python.py b/documentation/python.py

index 1d2dfe128823b14ab001a74f3a9c3372243ba4e6..2d04885817011f26b641862acd8ed48f9c8d57b0 100755 (executable)
--- a/documentation/python.py
+++ b/documentation/python.py
@@ -54,7 +54,7 @@ from docutils.transforms import Transform
  
  import jinja2
  
-from _search import CssClass, ResultFlag, ResultMap, Trie, serialize_search_data, base85encode_search_data, searchdata_format_version, search_filename, searchdata_filename, searchdata_filename_b85
+from _search import CssClass, ResultFlag, ResultMap, Trie, Serializer, serialize_search_data, base85encode_search_data, searchdata_format_version, search_filename, searchdata_filename, searchdata_filename_b85
  
  sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../plugins'))
  import m.htmlsanity
@@ -2454,7 +2454,7 @@ def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=
      # order by default
      trie.sort(map)
  
-    return serialize_search_data(trie, map, search_type_map, symbol_count, merge_subtrees=merge_subtrees, merge_prefixes=merge_prefixes)
+    return serialize_search_data(Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1), trie, map, search_type_map, symbol_count, merge_subtrees=merge_subtrees, merge_prefixes=merge_prefixes)
  
  def run(basedir, config, *, templates=default_templates, search_add_lookahead_barriers=True, search_merge_subtrees=True, search_merge_prefixes=True):
      # Populate the INPUT, if not specified, make it absolute
diff --git a/documentation/search.js b/documentation/search.js

index 87a3ed7d09d7054738033af945250ab401de5356..8cc2a3d46208181cd800d450b5e186438271c778 100644 (file)
--- a/documentation/search.js
+++ b/documentation/search.js
@@ -25,15 +25,27 @@
  "use strict"; /* it summons the Cthulhu in a proper way, they say */
  
  var Search = {
-    formatVersion: 1, /* the data filename contains this number too */
+    formatVersion: 2, /* the data filename contains this number too */
  
      dataSize: 0, /* used mainly by tests, not here */
      symbolCount: '&hellip;',
      trie: null,
      map: null,
+    mapFlagsOffset: null,
      typeMap: null,
      maxResults: 0,
  
+    /* Type sizes and masks. The data is always fetched as 16/32bit number and
+       then masked to 1, 2, 3 or 4 bytes. Fortunately on LE a mask is enough,
+       on BE we'd have to read N bytes before and then mask. */
+    nameSizeBytes: null,
+    nameSizeMask: null,
+    resultIdBytes: null,
+    resultIdMask: null,
+    fileOffsetBytes: null,
+    fileOffsetMask: null,
+    lookaheadBarrierMask: null,
+
      /* Always contains at least the root node offset and then one node offset
         per entered character */
      searchString: '',
@@ -57,7 +69,7 @@ var Search = {
  
          /* The file is too short to contain at least the headers and empty
             sections */
-        if(view.byteLength < 26) {
+        if(view.byteLength < 31) {
              console.error("Search data too short");
              return false;
          }
@@ -74,16 +86,61 @@ var Search = {
              return false;
          }
  
-        /* Separate the data into the trie and the result / type map */
-        let mapOffset = view.getUint32(6, true);
-        let typeMapOffset = view.getUint32(10, true);
-        this.trie = new DataView(buffer, 14, mapOffset - 14);
-        this.map = new DataView(buffer, mapOffset, typeMapOffset - mapOffset);
+        /* Fetch type sizes. The only value that can fail is result ID byte
+           count, where value of 3 has no assigned meaning. */
+        let typeSizes = view.getUint8(4, true);
+        if((typeSizes & 0x01) >> 0 == 0) {
+            this.fileOffsetBytes = 3;
+            this.fileOffsetMask = 0x00ffffff;
+            this.lookaheadBarrierMask = 0x00800000;
+        } else /* (typeSizes & 0x01) >> 0 == 1 */ {
+            this.fileOffsetBytes = 4;
+            this.fileOffsetMask = 0xffffffff;
+            this.lookaheadBarrierMask = 0x80000000;
+        }
+        if((typeSizes & 0x06) >> 1 == 0) {
+            this.resultIdBytes = 2;
+            this.resultIdMask = 0x0000ffff;
+        } else if((typeSizes & 0x06) >> 1 == 1) {
+            this.resultIdBytes = 3;
+            this.resultIdMask = 0x00ffffff;
+        } else if((typeSizes & 0x06) >> 1 == 2) {
+            this.resultIdBytes = 4;
+            this.resultIdMask = 0xffffffff;
+        } else /* (typeSizes & 0x06) >> 1 == 3 */ {
+            console.error("Invalid search data result ID byte value");
+            return false;
+        }
+        if((typeSizes & 0x08) >> 3 == 0) {
+            this.nameSizeBytes = 1;
+            this.nameSizeMask = 0x00ff;
+        } else /* (typeSizes & 0x08) >> 3 == 1 */ {
+            this.nameSizeBytes = 2;
+            this.nameSizeMask = 0xffff;
+        }
+
+        /* Separate the data into the trie and the result / type map. Because
+           we're reading larger values than there might be and then masking out
+           the high bytes, keep extra 1/2 byte padding at the end to avoid
+           OOB errors. */
+        let mapOffset = view.getUint32(12, true);
+        let typeMapOffset = view.getUint32(16, true);
+        /* There may be a 3-byte file offset at the end of the trie which we'll
+           read as 32-bit, add one safety byte in that case */
+        this.trie = new DataView(buffer, 20, mapOffset - 20 + (4 - this.fileOffsetBytes));
+        /* There may be a 3-byte file size (for zero results) which we'll read
+           as 32-bit, add one safety byte in that case */
+        this.map = new DataView(buffer, mapOffset, typeMapOffset - mapOffset + (4 - this.fileOffsetBytes));
+        /* No variable-size types in the type map at the moment */
          this.typeMap = new DataView(buffer, typeMapOffset);
  
+        /* Offset of the first result map item is after N + 1 offsets and N
+           flags, calculate flag offset from that */
+        this.mapFlagsOffset = this.fileOffsetBytes*(((this.map.getUint32(0, true) & this.fileOffsetMask) - this.fileOffsetBytes)/(this.fileOffsetBytes + 1) + 1);
+
          /* Set initial properties */
          this.dataSize = buffer.byteLength;
-        this.symbolCount = view.getUint16(4, true) + " symbols (" + Math.round(this.dataSize/102.4)/10 + " kB)";
+        this.symbolCount = view.getUint32(8, true) + " symbols (" + Math.round(this.dataSize/102.4)/10 + " kB)";
          this.maxResults = maxResults ? maxResults : 100;
          this.searchString = '';
          this.searchStack = [this.trie.getUint32(0, true)];
@@ -257,23 +314,25 @@ var Search = {
              /* Calculate offset and count of children */
              let offset = this.searchStack[this.searchStack.length - 1];
  
-            /* Calculate child count. If there's a lot of results, the count
-               "leaks over" to the child count storage. */
+            /* If there's a lot of results, the result count is a 16bit BE value
+               instead */
              let resultCount = this.trie.getUint8(offset);
-            let childCount = this.trie.getUint8(offset + 1);
+            let resultCountSize = 1;
              if(resultCount & 0x80) {
-                resultCount = (resultCount & 0x7f) | ((childCount & 0xf0) << 3);
-                childCount = childCount & 0x0f;
+                resultCount = this.trie.getUint16(offset, false) & ~0x8000;
+                ++resultCountSize;
              }
  
+            let childCount = this.trie.getUint8(offset + resultCountSize);
+
              /* Go through all children and find the next offset */
-            let childOffset = offset + 2 + resultCount*2;
+            let childOffset = offset + resultCountSize + 1 + resultCount*this.resultIdBytes;
              let found = false;
              for(let j = 0; j != childCount; ++j) {
-                if(String.fromCharCode(this.trie.getUint8(childOffset + j*4 + 3)) != searchString[foundPrefix])
+                if(String.fromCharCode(this.trie.getUint8(childOffset + j)) != searchString[foundPrefix])
                      continue;
  
-                this.searchStack.push(this.trie.getUint32(childOffset + j*4, true) & 0x007fffff);
+                this.searchStack.push(this.trie.getUint32(childOffset + childCount + j*this.fileOffsetBytes, true) & this.fileOffsetMask & ~this.lookaheadBarrierMask);
                  found = true;
                  break;
              }
@@ -321,15 +380,17 @@ var Search = {
                 "leaks over" to the child count storage. */
              /* TODO: hmmm. this is helluvalot duplicated code. hmm. */
              let resultCount = this.trie.getUint8(offset);
-            let childCount = this.trie.getUint8(offset + 1);
+            let resultCountSize = 1;
              if(resultCount & 0x80) {
-                resultCount = (resultCount & 0x7f) | ((childCount & 0xf0) << 3);
-                childCount = childCount & 0x0f;
+                resultCount = this.trie.getUint16(offset, false) & ~0x8000;
+                ++resultCountSize;
              }
  
+            let childCount = this.trie.getUint8(offset + resultCountSize);
+
              /* Populate the results with all values associated with this node */
              for(let i = 0; i != resultCount; ++i) {
-                let index = this.trie.getUint16(offset + 2 + i*2, true);
+                let index = this.trie.getUint32(offset + resultCountSize + 1 + i*this.resultIdBytes, true) & this.resultIdMask;
                  results.push(this.gatherResult(index, suffixLength, 0xffffff)); /* should be enough haha */
  
                  /* 'nuff said. */
@@ -338,15 +399,15 @@ var Search = {
              }
  
              /* Dig deeper */
-            let childOffset = offset + 2 + resultCount*2;
+            let childOffset = offset + resultCountSize + 1 + resultCount*this.resultIdBytes;
              for(let j = 0; j != childCount; ++j) {
-                let offsetBarrier = this.trie.getUint32(childOffset + j*4, true);
+                let offsetBarrier = this.trie.getUint32(childOffset + childCount + j*this.fileOffsetBytes, true) & this.fileOffsetMask;
  
                  /* Lookahead barrier, don't dig deeper */
-                if(offsetBarrier & 0x00800000) continue;
+                if(offsetBarrier & this.lookaheadBarrierMask) continue;
  
                  /* Append to the queue */
-                leaves.push([offsetBarrier & 0x007fffff, suffixLength + 1]);
+                leaves.push([offsetBarrier & ~this.lookaheadBarrierMask, suffixLength + 1]);
  
                  /* We don't have anything yet and this is the only path
                     forward, add the char to suggested Tab autocompletion. Can't
@@ -357,7 +418,7 @@ var Search = {
                     absolutely unwanted when all I want is check for truncated
                     UTF-8. */
                  if(!results.length && leaves.length == 1 && childCount == 1)
-                    suggestedTabAutocompletionChars.push(this.trie.getUint8(childOffset + j*4 + 3));
+                    suggestedTabAutocompletionChars.push(this.trie.getUint8(childOffset + j));
              }
          }
  
@@ -365,38 +426,38 @@ var Search = {
      },
  
      gatherResult: function(index, suffixLength, maxUrlPrefix) {
-        let flags = this.map.getUint8(index*4 + 3);
-        let resultOffset = this.map.getUint32(index*4, true) & 0x00ffffff;
+        let flags = this.map.getUint8(this.mapFlagsOffset + index);
+        let resultOffset = this.map.getUint32(index*this.fileOffsetBytes, true) & this.fileOffsetMask;
  
          /* The result is an alias, parse the aliased prefix */
          let aliasedIndex = null;
          if((flags & 0xf0) == 0x00) {
-            aliasedIndex = this.map.getUint16(resultOffset, true);
-            resultOffset += 2;
+            aliasedIndex = this.map.getUint32(resultOffset, true) & this.resultIdMask;
+            resultOffset += this.resultIdBytes;
          }
  
          /* The result has a prefix, parse that first, recursively */
          let name = '';
          let url = '';
          if(flags & (1 << 3)) {
-            let prefixIndex = this.map.getUint16(resultOffset, true);
-            let prefixUrlPrefixLength = Math.min(this.map.getUint8(resultOffset + 2), maxUrlPrefix);
+            let prefixIndex = this.map.getUint32(resultOffset, true) & this.resultIdMask;
+            let prefixUrlPrefixLength = Math.min(this.map.getUint16(resultOffset + this.resultIdBytes, true) & this.nameSizeMask, maxUrlPrefix);
  
              let prefix = this.gatherResult(prefixIndex, 0 /*ignored*/, prefixUrlPrefixLength);
              name = prefix.name;
              url = prefix.url;
  
-            resultOffset += 3;
+            resultOffset += this.resultIdBytes + this.nameSizeBytes;
          }
  
          /* The result has a suffix, extract its length */
          let resultSuffixLength = 0;
          if(flags & (1 << 0)) {
-            resultSuffixLength = this.map.getUint8(resultOffset);
-            ++resultOffset;
+            resultSuffixLength = this.map.getUint16(resultOffset, true) & this.nameSizeMask;
+            resultOffset += this.nameSizeBytes;
          }
  
-        let nextResultOffset = this.map.getUint32((index + 1)*4, true) & 0x00ffffff;
+        let nextResultOffset = this.map.getUint32((index + 1)*this.fileOffsetBytes, true) & this.fileOffsetMask;
  
          /* Extract name */
          let j = resultOffset;
diff --git a/documentation/test/_search_test_metadata.py b/documentation/test/_search_test_metadata.py

index 33711f50da760bdb5e71d5a60448a1b9b60e6d6f..89ee55b08f35111448477ae2d0d765471bd05024 100644 (file)
--- a/documentation/test/_search_test_metadata.py
+++ b/documentation/test/_search_test_metadata.py
@@ -44,3 +44,48 @@ search_type_map = [
      (CssClass.PRIMARY, "class"),
      (CssClass.INFO, "func")
  ]
+
+# Tries don't store any strings, so name_size_bytes can be whatever
+trie_type_sizes = [
+    {'file_offset_bytes': 3,
+     'result_id_bytes': 2,
+     'name_size_bytes': 1},
+    {'file_offset_bytes': 3,
+     'result_id_bytes': 3,
+     'name_size_bytes': 1},
+    {'file_offset_bytes': 3,
+     'result_id_bytes': 4,
+     'name_size_bytes': 1},
+
+    {'file_offset_bytes': 4,
+     'result_id_bytes': 2,
+     'name_size_bytes': 1},
+    {'file_offset_bytes': 4,
+     'result_id_bytes': 3,
+     'name_size_bytes': 1},
+    {'file_offset_bytes': 4,
+     'result_id_bytes': 4,
+     'name_size_bytes': 1},
+]
+
+type_sizes = trie_type_sizes + [
+    {'file_offset_bytes': 3,
+     'result_id_bytes': 2,
+     'name_size_bytes': 2},
+    {'file_offset_bytes': 3,
+     'result_id_bytes': 3,
+     'name_size_bytes': 2},
+    {'file_offset_bytes': 3,
+     'result_id_bytes': 4,
+     'name_size_bytes': 2},
+
+    {'file_offset_bytes': 4,
+     'result_id_bytes': 2,
+     'name_size_bytes': 2},
+    {'file_offset_bytes': 4,
+     'result_id_bytes': 3,
+     'name_size_bytes': 2},
+    {'file_offset_bytes': 4,
+     'result_id_bytes': 4,
+     'name_size_bytes': 2},
+]
diff --git a/documentation/test/js-test-data/empty-ns1-ri2-fo3.bin b/documentation/test/js-test-data/empty-ns1-ri2-fo3.bin

new file mode 100644 (file)

index 0000000..d13e176

Binary files /dev/null and b/documentation/test/js-test-data/empty-ns1-ri2-fo3.bin differ
diff --git a/documentation/test/js-test-data/empty-ns1-ri2-fo4.bin b/documentation/test/js-test-data/empty-ns1-ri2-fo4.bin

new file mode 100644 (file)

index 0000000..3803349

Binary files /dev/null and b/documentation/test/js-test-data/empty-ns1-ri2-fo4.bin differ
diff --git a/documentation/test/js-test-data/empty-ns1-ri3-fo3.bin b/documentation/test/js-test-data/empty-ns1-ri3-fo3.bin

new file mode 100644 (file)

index 0000000..4d98643

Binary files /dev/null and b/documentation/test/js-test-data/empty-ns1-ri3-fo3.bin differ
diff --git a/documentation/test/js-test-data/empty-ns1-ri3-fo4.bin b/documentation/test/js-test-data/empty-ns1-ri3-fo4.bin

new file mode 100644 (file)

index 0000000..8b484e1

Binary files /dev/null and b/documentation/test/js-test-data/empty-ns1-ri3-fo4.bin differ
diff --git a/documentation/test/js-test-data/empty-ns1-ri4-fo3.bin b/documentation/test/js-test-data/empty-ns1-ri4-fo3.bin

new file mode 100644 (file)

index 0000000..abbc4d4

Binary files /dev/null and b/documentation/test/js-test-data/empty-ns1-ri4-fo3.bin differ
diff --git a/documentation/test/js-test-data/empty-ns1-ri4-fo4.bin b/documentation/test/js-test-data/empty-ns1-ri4-fo4.bin

new file mode 100644 (file)

index 0000000..4fbe1f5

Binary files /dev/null and b/documentation/test/js-test-data/empty-ns1-ri4-fo4.bin differ
diff --git a/documentation/test/js-test-data/empty-ns2-ri2-fo3.bin b/documentation/test/js-test-data/empty-ns2-ri2-fo3.bin

new file mode 100644 (file)

index 0000000..f856f5b

Binary files /dev/null and b/documentation/test/js-test-data/empty-ns2-ri2-fo3.bin differ
diff --git a/documentation/test/js-test-data/empty-ns2-ri2-fo4.bin b/documentation/test/js-test-data/empty-ns2-ri2-fo4.bin

new file mode 100644 (file)

index 0000000..cd7ce16

Binary files /dev/null and b/documentation/test/js-test-data/empty-ns2-ri2-fo4.bin differ
diff --git a/documentation/test/js-test-data/empty-ns2-ri3-fo3.bin b/documentation/test/js-test-data/empty-ns2-ri3-fo3.bin

new file mode 100644 (file)

index 0000000..81f3114

Binary files /dev/null and b/documentation/test/js-test-data/empty-ns2-ri3-fo3.bin differ
diff --git a/documentation/test/js-test-data/empty-ns2-ri3-fo4.bin b/documentation/test/js-test-data/empty-ns2-ri3-fo4.bin

new file mode 100644 (file)

index 0000000..ef2ee65

Binary files /dev/null and b/documentation/test/js-test-data/empty-ns2-ri3-fo4.bin differ
diff --git a/documentation/test/js-test-data/empty-ns2-ri4-fo3.bin b/documentation/test/js-test-data/empty-ns2-ri4-fo3.bin

new file mode 100644 (file)

index 0000000..9f94c88

Binary files /dev/null and b/documentation/test/js-test-data/empty-ns2-ri4-fo3.bin differ
diff --git a/documentation/test/js-test-data/empty-ns2-ri4-fo4.bin b/documentation/test/js-test-data/empty-ns2-ri4-fo4.bin

new file mode 100644 (file)

index 0000000..90c4aa8

Binary files /dev/null and b/documentation/test/js-test-data/empty-ns2-ri4-fo4.bin differ
diff --git a/documentation/test/js-test-data/empty.bin b/documentation/test/js-test-data/empty.bin

deleted file mode 100644 (file)

index 36e30ed..0000000

Binary files a/documentation/test/js-test-data/empty.bin and /dev/null differ
diff --git a/documentation/test/js-test-data/manyresults-ns1-ri2-fo3.bin b/documentation/test/js-test-data/manyresults-ns1-ri2-fo3.bin

new file mode 100644 (file)

index 0000000..23e1735

Binary files /dev/null and b/documentation/test/js-test-data/manyresults-ns1-ri2-fo3.bin differ
diff --git a/documentation/test/js-test-data/manyresults-ns1-ri2-fo4.bin b/documentation/test/js-test-data/manyresults-ns1-ri2-fo4.bin

new file mode 100644 (file)

index 0000000..ea42fbc

Binary files /dev/null and b/documentation/test/js-test-data/manyresults-ns1-ri2-fo4.bin differ
diff --git a/documentation/test/js-test-data/manyresults-ns1-ri3-fo3.bin b/documentation/test/js-test-data/manyresults-ns1-ri3-fo3.bin

new file mode 100644 (file)

index 0000000..52c1637

Binary files /dev/null and b/documentation/test/js-test-data/manyresults-ns1-ri3-fo3.bin differ
diff --git a/documentation/test/js-test-data/manyresults-ns1-ri3-fo4.bin b/documentation/test/js-test-data/manyresults-ns1-ri3-fo4.bin

new file mode 100644 (file)

index 0000000..56dfd9e

Binary files /dev/null and b/documentation/test/js-test-data/manyresults-ns1-ri3-fo4.bin differ
diff --git a/documentation/test/js-test-data/manyresults-ns1-ri4-fo3.bin b/documentation/test/js-test-data/manyresults-ns1-ri4-fo3.bin

new file mode 100644 (file)

index 0000000..c8d6c8b

Binary files /dev/null and b/documentation/test/js-test-data/manyresults-ns1-ri4-fo3.bin differ
diff --git a/documentation/test/js-test-data/manyresults-ns1-ri4-fo4.bin b/documentation/test/js-test-data/manyresults-ns1-ri4-fo4.bin

new file mode 100644 (file)

index 0000000..6b1c494

Binary files /dev/null and b/documentation/test/js-test-data/manyresults-ns1-ri4-fo4.bin differ
diff --git a/documentation/test/js-test-data/manyresults-ns2-ri2-fo3.bin b/documentation/test/js-test-data/manyresults-ns2-ri2-fo3.bin

new file mode 100644 (file)

index 0000000..62ecdcd

Binary files /dev/null and b/documentation/test/js-test-data/manyresults-ns2-ri2-fo3.bin differ
diff --git a/documentation/test/js-test-data/manyresults-ns2-ri2-fo4.bin b/documentation/test/js-test-data/manyresults-ns2-ri2-fo4.bin

new file mode 100644 (file)

index 0000000..858d456

Binary files /dev/null and b/documentation/test/js-test-data/manyresults-ns2-ri2-fo4.bin differ
diff --git a/documentation/test/js-test-data/manyresults-ns2-ri3-fo3.bin b/documentation/test/js-test-data/manyresults-ns2-ri3-fo3.bin

new file mode 100644 (file)

index 0000000..4e3f284

Binary files /dev/null and b/documentation/test/js-test-data/manyresults-ns2-ri3-fo3.bin differ
diff --git a/documentation/test/js-test-data/manyresults-ns2-ri3-fo4.bin b/documentation/test/js-test-data/manyresults-ns2-ri3-fo4.bin

new file mode 100644 (file)

index 0000000..c280a6d

Binary files /dev/null and b/documentation/test/js-test-data/manyresults-ns2-ri3-fo4.bin differ
diff --git a/documentation/test/js-test-data/manyresults-ns2-ri4-fo3.bin b/documentation/test/js-test-data/manyresults-ns2-ri4-fo3.bin

new file mode 100644 (file)

index 0000000..9b295b7

Binary files /dev/null and b/documentation/test/js-test-data/manyresults-ns2-ri4-fo3.bin differ
diff --git a/documentation/test/js-test-data/manyresults-ns2-ri4-fo4.bin b/documentation/test/js-test-data/manyresults-ns2-ri4-fo4.bin

new file mode 100644 (file)

index 0000000..8097294

Binary files /dev/null and b/documentation/test/js-test-data/manyresults-ns2-ri4-fo4.bin differ
diff --git a/documentation/test/js-test-data/manyresults.bin b/documentation/test/js-test-data/manyresults.bin

deleted file mode 100644 (file)

index 4d3eb35..0000000

Binary files a/documentation/test/js-test-data/manyresults.bin and /dev/null differ
diff --git a/documentation/test/js-test-data/nested.bin b/documentation/test/js-test-data/nested.bin

index 6a92cb9c3e9ee70e07dc536fc613d6ac4df0dfef..7079b87b01e8eade5403efaa98aa1405300b6b71 100644 (file)

Binary files a/documentation/test/js-test-data/nested.bin and b/documentation/test/js-test-data/nested.bin differ
diff --git a/documentation/test/js-test-data/searchdata-ns1-ri2-fo3.b85 b/documentation/test/js-test-data/searchdata-ns1-ri2-fo3.b85

new file mode 100644 (file)

index 0000000..c49bb8c
--- /dev/null
+++ b/documentation/test/js-test-data/searchdata-ns1-ri2-fo3.b85
@@ -0,0 +1 @@
+O+!-x000002LJ#7zySaN&H?}cmH_|&0RRC200Aik00001C<p)m00C|e00001X%qke0RRI400Ai=00001C?o&?00C|)00001X)pi)00C__00001Iy?XX0RaL4Izj+|00DAH00001Z%_aL00DGX00001V_X0L00CuU000311poj6DQ*A&00Ag+00001Zg>Cy00C)!00001ZG->-00BCR000321OPga0Du4iWtIQ{00C#700001ZlnMJ00CjD00003ZFX`R003wJ0I&c600BC-000320026~0Du4iXvzQp00DH;000930RRI41poj6Dc%4800AiG00001ZtMU600LoY*Z=_X000312mk;9WdZ>J00C?U0RR92XAA)V00C|i0RRC32>@Xj0RR92bRGc!00Cnr0RR93VP&cS04xCj0RRU800Ct@0RR92XFve}00Ch}0RR92a7+OJ00ClB0RR92byxua00DDe0RR92AY=gm00Fyd0RR92!f*iq0RaX8Aa((O00Cuu0RR92XM_O&00Ci&0RR96ZFX{SbNB!NXaE2*0RWHz0A2wAD*ymO003G50Db@flmGy>007JY0NMZm^#A|>0RRpG03b;^NjOaq7yt=PVRUE!ZeeX@b8ul}WldppXf9}UZEOGl5(qjvZE0>OX>N2ZAZc!NDF7pFX>I@j06IEWWn*-2asXp&VRLg$VRUF;F<&uOWn*-2axQ3eZEOMn7zR2zZE0>ODIjBSZgX@1BW-DJ0000wI#OY7XJr6mY+-YAO<{CsUol@XQekdqWiDuRZEOSp7X~^yZE0>ODIjBSZgX@1BW-DJP+@0f0B~VvWiDuRZEOYr03gD<AX9Z>aA9X<0CRO>aA9X<E@*UZYy<#OWn*+<Zf9&|1ONyC00KHXQe|UwC@BI80S*Bd1snh%aA9X<ZeeX@b8ul}Wn*k%b8}{OZesud
+\ No newline at end of file
diff --git a/documentation/test/js-test-data/searchdata-ns1-ri2-fo3.bin b/documentation/test/js-test-data/searchdata-ns1-ri2-fo3.bin

new file mode 100644 (file)

index 0000000..1274c39

Binary files /dev/null and b/documentation/test/js-test-data/searchdata-ns1-ri2-fo3.bin differ
diff --git a/documentation/test/js-test-data/searchdata-ns1-ri2-fo4.bin b/documentation/test/js-test-data/searchdata-ns1-ri2-fo4.bin

new file mode 100644 (file)

index 0000000..9b67dce

Binary files /dev/null and b/documentation/test/js-test-data/searchdata-ns1-ri2-fo4.bin differ
diff --git a/documentation/test/js-test-data/searchdata-ns1-ri3-fo3.bin b/documentation/test/js-test-data/searchdata-ns1-ri3-fo3.bin

new file mode 100644 (file)

index 0000000..c7dd8db

Binary files /dev/null and b/documentation/test/js-test-data/searchdata-ns1-ri3-fo3.bin differ
diff --git a/documentation/test/js-test-data/searchdata-ns1-ri3-fo4.bin b/documentation/test/js-test-data/searchdata-ns1-ri3-fo4.bin

new file mode 100644 (file)

index 0000000..1899385

Binary files /dev/null and b/documentation/test/js-test-data/searchdata-ns1-ri3-fo4.bin differ
diff --git a/documentation/test/js-test-data/searchdata-ns1-ri4-fo3.bin b/documentation/test/js-test-data/searchdata-ns1-ri4-fo3.bin

new file mode 100644 (file)

index 0000000..6b91fb4

Binary files /dev/null and b/documentation/test/js-test-data/searchdata-ns1-ri4-fo3.bin differ
diff --git a/documentation/test/js-test-data/searchdata-ns1-ri4-fo4.bin b/documentation/test/js-test-data/searchdata-ns1-ri4-fo4.bin

new file mode 100644 (file)

index 0000000..8e5acd7

Binary files /dev/null and b/documentation/test/js-test-data/searchdata-ns1-ri4-fo4.bin differ
diff --git a/documentation/test/js-test-data/searchdata-ns2-ri2-fo3.bin b/documentation/test/js-test-data/searchdata-ns2-ri2-fo3.bin

new file mode 100644 (file)

index 0000000..9d4a372

Binary files /dev/null and b/documentation/test/js-test-data/searchdata-ns2-ri2-fo3.bin differ
diff --git a/documentation/test/js-test-data/searchdata-ns2-ri2-fo4.bin b/documentation/test/js-test-data/searchdata-ns2-ri2-fo4.bin

new file mode 100644 (file)

index 0000000..a48977f

Binary files /dev/null and b/documentation/test/js-test-data/searchdata-ns2-ri2-fo4.bin differ
diff --git a/documentation/test/js-test-data/searchdata-ns2-ri3-fo3.bin b/documentation/test/js-test-data/searchdata-ns2-ri3-fo3.bin

new file mode 100644 (file)

index 0000000..c30d3d9

Binary files /dev/null and b/documentation/test/js-test-data/searchdata-ns2-ri3-fo3.bin differ
diff --git a/documentation/test/js-test-data/searchdata-ns2-ri3-fo4.bin b/documentation/test/js-test-data/searchdata-ns2-ri3-fo4.bin

new file mode 100644 (file)

index 0000000..eec2ab1

Binary files /dev/null and b/documentation/test/js-test-data/searchdata-ns2-ri3-fo4.bin differ
diff --git a/documentation/test/js-test-data/searchdata-ns2-ri4-fo3.bin b/documentation/test/js-test-data/searchdata-ns2-ri4-fo3.bin

new file mode 100644 (file)

index 0000000..c53ec70

Binary files /dev/null and b/documentation/test/js-test-data/searchdata-ns2-ri4-fo3.bin differ
diff --git a/documentation/test/js-test-data/searchdata-ns2-ri4-fo4.bin b/documentation/test/js-test-data/searchdata-ns2-ri4-fo4.bin

new file mode 100644 (file)

index 0000000..16b08a6

Binary files /dev/null and b/documentation/test/js-test-data/searchdata-ns2-ri4-fo4.bin differ
diff --git a/documentation/test/js-test-data/searchdata.b85 b/documentation/test/js-test-data/searchdata.b85

deleted file mode 100644 (file)

index 1a915d1..0000000
--- a/documentation/test/js-test-data/searchdata.b85
+++ /dev/null
@@ -1 +0,0 @@
-O+!-w2LQSO007AX005Q&000310RR921ONaj009U904M+f4gdgd009&L0BHdL0{{R4AOHX<00ATb04M+fDgXd(00A%n0BHaLHUI!^00BGz06GBy0suk)fI0vHNB{tG00B?{0B-;RRsaBW00CS80Am0FVgLYT0RRO600C|Q04V?gasU7*00DRa0B!&QegFVz00D#m0BryPiU0sQ0RaR6kN|)>00EW&0A&CHo&W%600E=`0B!&QssI3C00SBT0BvXh0Cund0CE5Uwg3P+0RaF2!~lRg00GJX0B8UK(f|N-0{{U40{{g800G_r04V?g<^TXF00Ha(0B!&R*Z=@w@&Ev70RRX9009C40A&CH1_1zU009gE0A~OJ5&-~i0RagB7y$rb00ABW0CWHWCIJ9r00OE20AVZv0A&FH2LJ#8JOKb@00BS&0A~OJMgag}00B$^0B`^SQUL&B00CG50CfNXUI74e00CqH03ZMXY5@Sd00D3T0Kx$Q1^{*efFJ+?d;tJu00D#n0A~OJiU9y&00sB}0BvXh0Cq9~0CJE40B~Lb0COw=03bsE07+W_06KpF07;bq064b*08PyR01(>%02uZF0003200|EP002#4bZ7u>VQpn|aA9L*O<{CsE@*UZYybcf2s%1#X>KTKZgealX>N2W03&T_ZU6uPIyzQmV{~tF0Ap-nb8}5$bZB2OUolo?V{~tFE@*UZYyton20A)zX>KSfAY*TCb94YBZE0=*0025VQekdqWdLJrVRLg$VRUF;F<&uKVQyz-E@*UZYy<!o20A)zX>KSfAY*TCb94YBZE0>$VP|CkaA9X<E@*UZYz6=TAi}#KQ*~l+VP|Ckb9G{HVP|D7Xmo9C1OQTHV{~C|XKZBz00;m80y;WUWn*+GDFO-s4gnVh8~`A2VP|D-VQpn|aA9L*V{Bn_b7pmJV*mgE
-\ No newline at end of file
diff --git a/documentation/test/js-test-data/searchdata.bin b/documentation/test/js-test-data/searchdata.bin

deleted file mode 100644 (file)

index bf52b59..0000000

Binary files a/documentation/test/js-test-data/searchdata.bin and /dev/null differ
diff --git a/documentation/test/js-test-data/short.bin b/documentation/test/js-test-data/short.bin

index ce004a8421bbd5c0022b80bb8942ac451f21962a..3bde6d3c25be25ca1a1859747dee736716e5c6a0 100644 (file)
--- a/documentation/test/js-test-data/short.bin
+++ b/documentation/test/js-test-data/short.bin
@@ -1 +1 @@
-#############
-\ No newline at end of file
+##############################
+\ No newline at end of file
diff --git a/documentation/test/js-test-data/unicode.bin b/documentation/test/js-test-data/unicode.bin

index 132cec998a3484675fc17a0353b3b7f784e3dbca..d72ae17f51a1c12e86d2e9a86c34e5fa5e5e506c 100644 (file)

Binary files a/documentation/test/js-test-data/unicode.bin and b/documentation/test/js-test-data/unicode.bin differ
diff --git a/documentation/test/js-test-data/wrong-magic.bin b/documentation/test/js-test-data/wrong-magic.bin

index 6a3dd03eb7cd398f2b0b43a2b7234fd00cff1f77..3e3670235969810fa1e52e6dd3cd52d51bb9d834 100644 (file)

Binary files a/documentation/test/js-test-data/wrong-magic.bin and b/documentation/test/js-test-data/wrong-magic.bin differ
diff --git a/documentation/test/js-test-data/wrong-result-id-bytes.bin b/documentation/test/js-test-data/wrong-result-id-bytes.bin

new file mode 100644 (file)

index 0000000..3e99537

Binary files /dev/null and b/documentation/test/js-test-data/wrong-result-id-bytes.bin differ
diff --git a/documentation/test/js-test-data/wrong-version.bin b/documentation/test/js-test-data/wrong-version.bin

index 9b66517fd264a88dd8c3adcc63b62610134239ee..f555f700e8fbf8b8c5aa78f0a619c7126f5080df 100644 (file)

Binary files a/documentation/test/js-test-data/wrong-version.bin and b/documentation/test/js-test-data/wrong-version.bin differ
diff --git a/documentation/test/populate-js-test-data.py b/documentation/test/populate-js-test-data.py

index 81d9fa9c25d83e0ac79a42fd4a96329ae2a843e9..b17e02338c963c783e61528fdc7451163fd1b0d0 100755 (executable)
--- a/documentation/test/populate-js-test-data.py
+++ b/documentation/test/populate-js-test-data.py
@@ -31,23 +31,37 @@ import pathlib
  sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__))))
  sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
  
-from _search_test_metadata import EntryType, search_type_map
-from _search import Trie, ResultMap, ResultFlag, serialize_search_data, search_data_header_struct
+from _search_test_metadata import EntryType, search_type_map, type_sizes
+from _search import Trie, ResultMap, ResultFlag, serialize_search_data, Serializer
  
  basedir = pathlib.Path(os.path.dirname(os.path.realpath(__file__)))/'js-test-data'
  
+def type_size_suffix(*, name_size_bytes, result_id_bytes, file_offset_bytes):
+    return f'ns{name_size_bytes}-ri{result_id_bytes}-fo{file_offset_bytes}'
+
  # Basic error handling
  
+min_size = len(serialize_search_data(Serializer(name_size_bytes=1, result_id_bytes=2, file_offset_bytes=3), Trie(), ResultMap(), [], 0))
+
  with open(basedir/'short.bin', 'wb') as f:
-    f.write(b'#'*(search_data_header_struct.size - 1))
+    f.write(b'#'*(min_size - 1))
  with open(basedir/'wrong-magic.bin', 'wb') as f:
-    f.write(b'MOS\1                      ')
+    f.write(b'MOS\2')
+    f.write(b'\0'*(min_size - 4))
  with open(basedir/'wrong-version.bin', 'wb') as f:
-    f.write(b'MCS\0                      ')
-with open(basedir/'empty.bin', 'wb') as f:
-    f.write(serialize_search_data(Trie(), ResultMap(), [], 0))
+    f.write(b'MCS\1')
+    f.write(b'\0'*(min_size - 4))
+with open(basedir/'wrong-result-id-bytes.bin', 'wb') as f:
+    f.write(Serializer.header_struct.pack(b'MCS', 2, 3 << 1, 0, 0, 0))
+    f.write(b'\0'*(min_size - Serializer.header_struct.size))
+
+# Empty file, in all possible type size combinations
  
-# General test
+for i in type_sizes:
+    with open(basedir/'empty-{}.bin'.format(type_size_suffix(**i)), 'wb') as f:
+        f.write(serialize_search_data(Serializer(**i), Trie(), ResultMap(), [], 0))
+
+# General test, in all possible type size combinations
  
  trie = Trie()
  map = ResultMap()
@@ -78,12 +92,16 @@ trie.insert("subpage", index)
  trie.insert("rectangle", map.add("Rectangle", "", alias=range_index))
  trie.insert("rect", map.add("Rectangle::Rect()", "", suffix_length=2, alias=range_index))
  
-with open(basedir/'searchdata.bin', 'wb') as f:
-    f.write(serialize_search_data(trie, map, search_type_map, 7))
-with open(basedir/'searchdata.b85', 'wb') as f:
-    f.write(base64.b85encode(serialize_search_data(trie, map, search_type_map, 7), True))
+for i in type_sizes:
+    with open(basedir/'searchdata-{}.bin'.format(type_size_suffix(**i)), 'wb') as f:
+        f.write(serialize_search_data(Serializer(**i), trie, map, search_type_map, 7))
+
+# The Base-85 file however doesn't need to have all type size variants as it's
+# just used to verify it decodes to the right binary variant
+with open(basedir/'searchdata-{}.b85'.format(type_size_suffix(**type_sizes[0])), 'wb') as f:
+    f.write(base64.b85encode(serialize_search_data(Serializer(**type_sizes[0]), trie, map, search_type_map, 7), True))
  
-# UTF-8 names
+# UTF-8 names, nothing size-dependent here so just one variant
  
  trie = Trie()
  map = ResultMap()
@@ -92,9 +110,9 @@ trie.insert("hýždě", map.add("Hýždě", "#a", flags=ResultFlag.from_type(Res
  trie.insert("hárá", map.add("Hárá", "#b", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.PAGE)))
  
  with open(basedir/'unicode.bin', 'wb') as f:
-    f.write(serialize_search_data(trie, map, search_type_map, 2))
+    f.write(serialize_search_data(Serializer(**type_sizes[0]), trie, map, search_type_map, 2))
  
-# Heavy prefix nesting
+# Heavy prefix nesting, nothing size-dependent here so just one variant
  
  trie = Trie()
  map = ResultMap()
@@ -105,9 +123,10 @@ trie.insert("geometry", map.add("Magnum::Math::Geometry", "namespaceMagnum_1_1Ma
  trie.insert("range", map.add("Magnum::Math::Range", "classMagnum_1_1Math_1_1Range.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)))
  
  with open(basedir/'nested.bin', 'wb') as f:
-    f.write(serialize_search_data(trie, map, search_type_map, 4))
+    f.write(serialize_search_data(Serializer(**type_sizes[0]), trie, map, search_type_map, 4))
  
-# Extreme amount of search results (Python's __init__, usually)
+# Extreme amount of search results (Python's __init__, usually), in all
+# possible type size combinations
  
  trie = Trie()
  map = ResultMap()
@@ -120,5 +139,6 @@ for i in range(128):
  for i in [3, 15, 67]:
      trie.insert("__init__subclass__", map.add(f"Foo{i}.__init__subclass__(self)", f"Foo{i}.html#__init__subclass__", suffix_length=6, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC)))
  
-with open(basedir/'manyresults.bin', 'wb') as f:
-    f.write(serialize_search_data(trie, map, search_type_map, 128 + 3))
+for i in type_sizes:
+    with open(basedir/'manyresults-{}.bin'.format(type_size_suffix(**i)), 'wb') as f:
+        f.write(serialize_search_data(Serializer(**i), trie, map, search_type_map, 128 + 3))
diff --git a/documentation/test/test-search.js b/documentation/test/test-search.js

index 63827868467d3c697d5ffe5573f19db2ae578339..269d8cc806bf9b708208ce22070470e4bd6bdb2a 100644 (file)
--- a/documentation/test/test-search.js
+++ b/documentation/test/test-search.js
@@ -76,11 +76,28 @@ const { StringDecoder } = require('string_decoder');
      assert.deepEqual(Buffer.from(buf), Buffer.from([0, 0, 0, 0]));
  }
  
-/* Verify that base85-decoded file is equivalent to the binary */
+let type_size_suffixes = [
+    'ns1-ri2-fo3',
+    'ns1-ri2-fo4',
+    'ns1-ri3-fo3',
+    'ns1-ri3-fo4',
+    'ns1-ri4-fo3',
+    'ns1-ri4-fo4',
+
+    'ns2-ri2-fo3',
+    'ns2-ri2-fo4',
+    'ns2-ri3-fo3',
+    'ns2-ri3-fo4',
+    'ns2-ri4-fo3',
+    'ns2-ri4-fo4',
+]
+
+/* Verify that base85-decoded file is equivalent to the binary. Nothing
+   type-size-dependent in the decoder, so test just on the first variant. */
  {
-    let binary = fs.readFileSync(path.join(__dirname, "js-test-data/searchdata.bin"));
-    assert.equal(binary.byteLength, 745);
-    let b85 = fs.readFileSync(path.join(__dirname, "js-test-data/searchdata.b85"), {encoding: 'utf-8'});
+    let binary = fs.readFileSync(path.join(__dirname, "js-test-data/searchdata-" + type_size_suffixes[0] + ".bin"));
+    assert.equal(binary.byteLength, 750);
+    let b85 = fs.readFileSync(path.join(__dirname, "js-test-data/searchdata-" + type_size_suffixes[0] + ".b85"), {encoding: 'utf-8'});
      assert.deepEqual(new DataView(binary.buffer.slice(binary.byteOffset, binary.byteOffset + binary.byteLength)), new DataView(Search.base85decode(b85), 0, binary.byteLength));
  }
  
@@ -102,21 +119,48 @@ const { StringDecoder } = require('string_decoder');
      assert.ok(!Search.init(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)));
  }
  
-/* Search with empty data */
+/* Opening file with wrong result id byte count */
  {
-    let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/empty.bin"));
+    let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/wrong-result-id-bytes.bin"));
+    assert.ok(!Search.init(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)));
+}
+
+/* Search with empty data, in all type size variants */
+for(let i = 0; i != type_size_suffixes.length; ++i) {
+    let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/empty-" + type_size_suffixes[i] + ".bin"));
      assert.ok(Search.init(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)));
-    assert.equal(Search.dataSize, 26);
+
+    /* Test just the smallest and largest size, everything else should be in
+       between */
+    if(i == 0)
+        assert.equal(Search.dataSize, 31);
+    else if(i == type_size_suffixes.length - 1)
+        assert.equal(Search.dataSize, 32);
+    else {
+        assert.ok(Search.dataSize >= 31 && Search.dataSize <= 32);
+    }
+
      assert.equal(Search.symbolCount, "0 symbols (0 kB)");
      assert.deepEqual(Search.search(''), [[], '']);
  }
  
-/* Search */
-{
-    let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/searchdata.bin"));
+/* Search, in all type size variants */
+for(let i = 0; i != type_size_suffixes.length; ++i) {
+    let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/searchdata-" + type_size_suffixes[i] + ".bin"));
      assert.ok(Search.init(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)));
-    assert.equal(Search.dataSize, 745);
-    assert.equal(Search.symbolCount, "7 symbols (0.7 kB)");
+
+    /* Test just the smallest and largest size, everything else should be in
+       between */
+    if(i == 0) {
+        assert.equal(Search.dataSize, 750);
+        assert.equal(Search.symbolCount, "7 symbols (0.7 kB)");
+    } else if(i == type_size_suffixes.length - 1) {
+        assert.equal(Search.dataSize, 883);
+        assert.equal(Search.symbolCount, "7 symbols (0.9 kB)");
+    } else {
+        assert.ok(Search.dataSize > 750 && Search.dataSize < 883);
+    }
+
      assert.equal(Search.maxResults, 100);
  
      /* Blow up */
@@ -217,11 +261,12 @@ const { StringDecoder } = require('string_decoder');
            suffixLength: 8 }], '']);
  }
  
-/* Search with spaces */
+/* Search with spaces. Nothing type-size-dependent here, so test just on the
+   first variant. */
  {
-    let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/searchdata.bin"));
+    let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/searchdata-" + type_size_suffixes[0] + ".bin"));
      assert.ok(Search.init(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)));
-    assert.equal(Search.dataSize, 745);
+    assert.equal(Search.dataSize, 750);
      assert.equal(Search.symbolCount, "7 symbols (0.7 kB)");
      assert.equal(Search.maxResults, 100);
  
@@ -269,11 +314,12 @@ const { StringDecoder } = require('string_decoder');
            suffixLength: 10 }], Search.toUtf8('» subpage')]);
  }
  
-/* Search, limiting the results to 3 */
+/* Search, limiting the results to 3. Nothing type-size-dependent here, so test
+   just on the first variant. */
  {
-    let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/searchdata.bin"));
+    let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/searchdata-" + type_size_suffixes[0] + ".bin"));
      assert.ok(Search.init(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength), 3));
-    assert.equal(Search.dataSize, 745);
+    assert.equal(Search.dataSize, 750);
      assert.equal(Search.symbolCount, "7 symbols (0.7 kB)");
      assert.equal(Search.maxResults, 3);
      assert.deepEqual(Search.search('m'), [[
@@ -297,11 +343,12 @@ const { StringDecoder } = require('string_decoder');
            suffixLength: 10 }], '']);
  }
  
-/* Search loaded from a base85-encoded file should work properly */
+/* Search loaded from a base85-encoded file should work properly. Nothing
+   type-size-dependent here, so test just on the first variant. */
  {
-    let b85 = fs.readFileSync(path.join(__dirname, "js-test-data/searchdata.b85"), {encoding: 'utf-8'});
+    let b85 = fs.readFileSync(path.join(__dirname, "js-test-data/searchdata-" + type_size_suffixes[0] + ".b85"), {encoding: 'utf-8'});
      assert.ok(Search.load(b85));
-    assert.equal(Search.dataSize, 748); /* some padding on the end, that's okay */
+    assert.equal(Search.dataSize, 752); /* some padding on the end, that's okay */
      assert.equal(Search.symbolCount, "7 symbols (0.7 kB)");
      assert.equal(Search.maxResults, 100);
      assert.deepEqual(Search.search('min'), [[
@@ -325,11 +372,11 @@ const { StringDecoder } = require('string_decoder');
            suffixLength: 8 }], '()']);
  }
  
-/* Search, Unicode */
+/* Search, Unicode. Nothing type-size-dependent here. */
  {
      let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/unicode.bin"));
      assert.ok(Search.init(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)));
-    assert.equal(Search.dataSize, 160);
+    assert.equal(Search.dataSize, 165);
      assert.equal(Search.symbolCount, "2 symbols (0.2 kB)");
      /* Both "Hýždě" and "Hárá" have common autocompletion to "h\xA1", which is
         not valid UTF-8, so it has to get truncated */
@@ -363,11 +410,11 @@ const { StringDecoder } = require('string_decoder');
            suffixLength: 3 }], Search.toUtf8('rá')]);
  }
  
-/* Properly combine heavily nested URLs */
+/* Properly combine heavily nested URLs. Nothing type-size-dependent here. */
  {
      let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/nested.bin"));
      assert.ok(Search.init(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)));
-    assert.equal(Search.dataSize, 331);
+    assert.equal(Search.dataSize, 336);
      assert.equal(Search.symbolCount, "4 symbols (0.3 kB)");
      assert.deepEqual(Search.search('geo'), [[
          { name: 'Magnum::Math::Geometry',
@@ -386,12 +433,24 @@ const { StringDecoder } = require('string_decoder');
            suffixLength: 3 }], 'nge']);
  }
  
-/* Extreme amount of search results */
-{
-    let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/manyresults.bin"));
+/* Extreme amount of search results, in all type size variants to ensure no
+   size assumptions were left there */
+for(let i = 0; i != type_size_suffixes.length; ++i) {
+    let buffer = fs.readFileSync(path.join(__dirname, "js-test-data/manyresults-" + type_size_suffixes[i] + ".bin"));
      assert.ok(Search.init(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength), 10000));
-    assert.equal(Search.dataSize, 6415);
-    assert.equal(Search.symbolCount, "131 symbols (6.3 kB)");
+
+    /* Test just the smallest and largest size, everything else should be in
+       between */
+    if(i == 0) {
+        assert.equal(Search.dataSize, 6421);
+        assert.equal(Search.symbolCount, "131 symbols (6.3 kB)");
+    } else if(i == type_size_suffixes.length - 1) {
+        assert.equal(Search.dataSize, 6964);
+        assert.equal(Search.symbolCount, "131 symbols (6.8 kB)");
+    } else {
+        assert.ok(Search.dataSize > 6421 && Search.dataSize < 6964);
+    }
+
      assert.equal(Search.maxResults, 10000);
      assert.deepEqual(Search.search('__init__')[0].length, 128 + 3);
      assert.deepEqual(Search.search('__init__')[1], '');
diff --git a/documentation/test/test_search.py b/documentation/test/test_search.py

index c716589a021440c3b448450ed70ac00b51539267..2e8c8e016ee590d01a062b1aed7a77fa7cf97f88 100755 (executable)
--- a/documentation/test/test_search.py
+++ b/documentation/test/test_search.py
@@ -27,8 +27,8 @@ import sys
  import unittest
  from types import SimpleNamespace as Empty
  
-from ._search_test_metadata import EntryType, search_type_map
-from _search import Trie, ResultMap, ResultFlag, serialize_search_data, pretty_print_trie, pretty_print_map, pretty_print
+from ._search_test_metadata import EntryType, search_type_map, trie_type_sizes, type_sizes
+from _search import Trie, ResultMap, ResultFlag, Serializer, Deserializer, serialize_search_data, pretty_print_trie, pretty_print_map, pretty_print
  
  from test_doxygen import IntegrationTestCase
  
@@ -37,28 +37,40 @@ class TrieSerialization(unittest.TestCase):
          super().__init__(*args, **kwargs)
          self.maxDiff = None
  
-    def compare(self, serialized: bytes, expected: str):
-        pretty = pretty_print_trie(serialized)[0]
+    def compare(self, deserializer: Deserializer, serialized: bytes, expected: str):
+        pretty = pretty_print_trie(deserializer, serialized)[0]
          #print(pretty)
          self.assertEqual(pretty, expected.strip())
  
      def test_empty(self):
          trie = Trie()
  
-        serialized = trie.serialize()
-        self.compare(serialized, "")
-        self.assertEqual(len(serialized), 6)
+        for i in trie_type_sizes:
+            with self.subTest(**i):
+                serialized = trie.serialize(Serializer(**i))
+                self.compare(Deserializer(**i), serialized, "")
+                self.assertEqual(len(serialized), 6)
  
      def test_single(self):
          trie = Trie()
          trie.insert("magnum", 1337)
          trie.insert("magnum", 21)
  
-        serialized = trie.serialize()
-        self.compare(serialized, """
+        for i in trie_type_sizes:
+            with self.subTest(**i):
+                serialized = trie.serialize(Serializer(**i))
+                self.compare(Deserializer(**i), serialized, """
  magnum [1337, 21]
  """)
-        self.assertEqual(len(serialized), 46)
+                # Verify just the smallest and largest size, everything else
+                # should fit in between
+                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2:
+                    self.assertEqual(len(serialized), 46)
+                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4:
+                    self.assertEqual(len(serialized), 56)
+                else:
+                    self.assertGreater(len(serialized), 46)
+                    self.assertLess(len(serialized), 56)
  
      def test_multiple(self):
          trie = Trie()
@@ -94,8 +106,10 @@ magnum [1337, 21]
          trie.insert("range::max", 10)
          trie.insert("max", 10)
  
-        serialized = trie.serialize()
-        self.compare(serialized, """
+        for i in trie_type_sizes:
+            with self.subTest(**i):
+                serialized = trie.serialize(Serializer(**i))
+                self.compare(Deserializer(**i), serialized, """
  math [0]
  ||| :$
  |||  :vector [1]
@@ -123,7 +137,15 @@ range [2]
  |     :min [9]
  |       ax [10]
  """)
-        self.assertEqual(len(serialized), 340)
+                # Verify just the smallest and largest size, everything else
+                # should fit in between
+                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2:
+                    self.assertEqual(len(serialized), 340)
+                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4:
+                    self.assertEqual(len(serialized), 428)
+                else:
+                    self.assertGreater(len(serialized), 340)
+                    self.assertLess(len(serialized), 428)
  
      def test_unicode(self):
          trie = Trie()
@@ -131,8 +153,8 @@ range [2]
          trie.insert("hýždě", 0)
          trie.insert("hárá", 1)
  
-        serialized = trie.serialize()
-        self.compare(serialized, """
+        serialized = trie.serialize(Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1))
+        self.compare(Deserializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1), serialized, """
  h0xc3
    0xbd
     0xc5
@@ -147,7 +169,7 @@ h0xc3
  """)
          self.assertEqual(len(serialized), 82)
  
-    def test_many_results(self):
+    def test_16bit_result_count(self):
          trie = Trie()
  
          for i in range(128):
@@ -158,39 +180,99 @@ h0xc3
          for i in [203, 215, 267]:
              trie.insert("__init__subclass__", i)
  
-        serialized = trie.serialize()
-        self.compare(serialized, """
+        for i in trie_type_sizes:
+            with self.subTest(**i):
+                serialized = trie.serialize(Serializer(**i))
+                self.compare(Deserializer(**i), serialized, """
  __init__ [{}]
          subclass__ [203, 215, 267]
  """.format(', '.join([str(i) for i in range(128)])))
-        self.assertEqual(len(serialized), 376)
+                # Verify just the smallest and largest size, everything else
+                # should fit in between
+                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2:
+                    self.assertEqual(len(serialized), 377)
+                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4:
+                    self.assertEqual(len(serialized), 657)
+                else:
+                    self.assertGreater(len(serialized), 377)
+                    self.assertLess(len(serialized), 657)
+
+    def test_16bit_result_id_too_small(self):
+        trie = Trie()
+        trie.insert("a", 65536)
+        with self.assertRaises(OverflowError):
+            trie.serialize(Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1))
+
+        # This should work
+        trie.serialize(Serializer(file_offset_bytes=3, result_id_bytes=3, name_size_bytes=1))
+
+    def test_24bit_result_id_too_small(self):
+        trie = Trie()
+        trie.insert("a", 16*1024*1024)
+        with self.assertRaises(OverflowError):
+            trie.serialize(Serializer(file_offset_bytes=3, result_id_bytes=3, name_size_bytes=1))
+
+        # This should work
+        trie.serialize(Serializer(file_offset_bytes=3, result_id_bytes=4, name_size_bytes=1))
+
+    def test_23bit_file_offset_too_small(self):
+        trie = Trie()
+
+        # The hight bit of the child offset stores a lookahead barrier, so the
+        # file has to be smaller than 8M, not 16. Python has a recursion limit
+        # of 1000, so we can't really insert a 8M character long string.
+        # Instead, insert one 130-character string where each char has 32k
+        # 16bit result IDs. 129 isn't enough to overflow the offsets.
+        results_32k = [j for j in range(32767)]
+        for i in range(130):
+            trie.insert('a'*i, results_32k)
+
+        with self.assertRaises(OverflowError):
+            trie.serialize(Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1))
+
+        # This should work
+        trie.serialize(Serializer(file_offset_bytes=4, result_id_bytes=2, name_size_bytes=1))
  
  class MapSerialization(unittest.TestCase):
      def __init__(self, *args, **kwargs):
          super().__init__(*args, **kwargs)
          self.maxDiff = None
  
-    def compare(self, serialized: bytes, expected: str):
-        pretty = pretty_print_map(serialized, entryTypeClass=EntryType)
+    def compare(self, deserializer: Deserializer, serialized: bytes, expected: str):
+        pretty = pretty_print_map(deserializer, serialized, entryTypeClass=EntryType)
          #print(pretty)
          self.assertEqual(pretty, expected.strip())
  
      def test_empty(self):
          map = ResultMap()
  
-        serialized = map.serialize()
-        self.compare(serialized, "")
-        self.assertEqual(len(serialized), 4)
+        for i in type_sizes:
+            with self.subTest(**i):
+                serialized = map.serialize(Serializer(**i))
+                self.compare(Deserializer(**i), serialized, "")
+                self.assertEqual(len(serialized), i['file_offset_bytes'])
  
      def test_single(self):
          map = ResultMap()
+
          self.assertEqual(map.add("Magnum", "namespaceMagnum.html", suffix_length=11, flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.NAMESPACE)), 0)
  
-        serialized = map.serialize()
-        self.compare(serialized, """
+        for i in type_sizes:
+            with self.subTest(**i):
+                serialized = map.serialize(Serializer(**i))
+                self.compare(Deserializer(**i), serialized, """
  0: Magnum [suffix_length=11, type=NAMESPACE] -> namespaceMagnum.html
  """)
-        self.assertEqual(len(serialized), 36)
+                # Verify just the smallest and largest size, everything else
+                # should fit in between. The `result_id_bytes` don't affect
+                # this case.
+                if i['file_offset_bytes'] == 3 and i['name_size_bytes'] == 1:
+                    self.assertEqual(len(serialized), 35)
+                elif i['file_offset_bytes'] == 4 and i['name_size_bytes'] == 2:
+                    self.assertEqual(len(serialized), 38)
+                else:
+                    self.assertGreater(len(serialized), 35)
+                    self.assertLess(len(serialized), 38)
  
      def test_multiple(self):
          map = ResultMap()
@@ -203,8 +285,10 @@ class MapSerialization(unittest.TestCase):
          self.assertEqual(map.add("Rectangle", "", alias=2), 5)
          self.assertEqual(map.add("Rectangle::Rect()", "", suffix_length=2, alias=2), 6)
  
-        serialized = map.serialize()
-        self.compare(serialized, """
+        for i in type_sizes:
+            with self.subTest(**i):
+                serialized = map.serialize(Serializer(**i))
+                self.compare(Deserializer(**i), serialized, """
  0: Math [type=NAMESPACE] -> namespaceMath.html
  1: ::Vector [prefix=0[:0], type=CLASS] -> classMath_1_1Vector.html
  2: ::Range [prefix=0[:0], type=CLASS] -> classMath_1_1Range.html
@@ -213,7 +297,97 @@ class MapSerialization(unittest.TestCase):
  5: Rectangle [alias=2] ->
  6: ::Rect() [alias=2, prefix=5[:0], suffix_length=2] ->
  """)
-        self.assertEqual(len(serialized), 203)
+                # Verify just the smallest and largest size, everything else
+                # should fit in between
+                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2 and i['name_size_bytes'] == 1:
+                    self.assertEqual(len(serialized), 202)
+                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4 and i['name_size_bytes'] == 2:
+                    self.assertEqual(len(serialized), 231)
+                else:
+                    self.assertGreater(len(serialized), 202)
+                    self.assertLess(len(serialized), 231)
+
+    def test_24bit_file_offset_too_small(self):
+        map = ResultMap()
+        # 3 bytes for the initial offset, 3 bytes for file size, 1 byte for the
+        # flags, 1 byte for the null terminator, 6 bytes for the URL
+        map.add('F'*(16*1024*1024 - 14), 'f.html', flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS))
+
+        with self.assertRaises(OverflowError):
+            # Disabling prefix merging otherwise memory usage goes to hell
+            map.serialize(Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1), merge_prefixes=False)
+
+        # This should work. Disabling prefix merging otherwise memory usage
+        # goes to hell.
+        map.serialize(Serializer(file_offset_bytes=4, result_id_bytes=2, name_size_bytes=1), merge_prefixes=False)
+
+    def test_8bit_suffix_length_too_small(self):
+        map = ResultMap()
+        map.add("F()" + ';'*256, "f.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC), suffix_length=256)
+
+        with self.assertRaises(OverflowError):
+            map.serialize(Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1))
+
+        # This should work
+        map.serialize(Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=2))
+
+    def test_8bit_prefix_length_too_small(self):
+        map = ResultMap()
+        map.add("A", 'a'*251 + ".html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS))
+        map.add("A::foo()", 'a'*251 + ".html#foo", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC))
+
+        with self.assertRaises(OverflowError):
+            map.serialize(Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1))
+
+        # This should work
+        map.serialize(Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=2))
+
+    def test_16bit_prefix_id_too_small(self):
+        map = ResultMap()
+
+        # Adding A0 to A65535 would be too slow due to the recursive Trie
+        # population during prefix merging (SIGH) so trying this instead. It's
+        # still hella slow, but at least not TWO MINUTES.
+        for i in range(128):
+            for j in range(128):
+                for k in range(4):
+                    map.add(bytes([i, j, k]).decode('utf-8'), "a.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS))
+
+        self.assertEqual(map.add("B", "b.html", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS)), 65536)
+        map.add("B::foo()", "b.html#foo", flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC))
+
+        with self.assertRaises(OverflowError):
+            map.serialize(Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1))
+
+        # This should work
+        map.serialize(Serializer(file_offset_bytes=3, result_id_bytes=3, name_size_bytes=1))
+
+        # Testing this error for a 24bit prefix seems infeasibly slow, not
+        # doing that
+
+    def test_16bit_alias_id_too_small(self):
+        map = ResultMap()
+
+        # The alias doesn't exist of course, hopefully that's fine in this case
+        map.add("B", "", alias=65536)
+
+        with self.assertRaises(OverflowError):
+            map.serialize(Serializer(file_offset_bytes=3, result_id_bytes=2, name_size_bytes=1))
+
+        # This should work
+        map.serialize(Serializer(file_offset_bytes=3, result_id_bytes=3, name_size_bytes=1))
+
+    def test_24bit_alias_id_too_small(self):
+        map = ResultMap()
+
+        # The alias doesn't exist of course, hopefully that's fine in this case
+        map.add("B", "", alias=16*1024*1024)
+
+        with self.assertRaises(OverflowError):
+            map.serialize(Serializer(file_offset_bytes=3, result_id_bytes=3, name_size_bytes=1))
+
+        # This should work
+        map.serialize(Serializer(file_offset_bytes=3, result_id_bytes=4, name_size_bytes=1))
  
  class Serialization(unittest.TestCase):
      def __init__(self, *args, **kwargs):
@@ -237,8 +411,10 @@ class Serialization(unittest.TestCase):
          trie.insert("math::range", index)
          trie.insert("range", index)
  
-        serialized = serialize_search_data(trie, map, search_type_map, 3)
-        self.compare(serialized, """
+        for i in type_sizes:
+            with self.subTest(**i):
+                serialized = serialize_search_data(Serializer(**i), trie, map, search_type_map, 3)
+                self.compare(serialized, """
  3 symbols
  math [0]
  |   ::vector [1]
@@ -253,4 +429,12 @@ range [2]
  (EntryType.CLASS, CssClass.PRIMARY, 'class'),
  (EntryType.FUNC, CssClass.INFO, 'func')
  """)
-        self.assertEqual(len(serialized), 277)
+                # Verify just the smallest and largest size, everything else
+                # should fit in between
+                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2 and i['name_size_bytes'] == 1:
+                    self.assertEqual(len(serialized), 282)
+                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4 and i['name_size_bytes'] == 2:
+                    self.assertEqual(len(serialized), 317)
+                else:
+                    self.assertGreater(len(serialized), 282)
+                    self.assertLess(len(serialized), 317)
diff --git a/documentation/test_doxygen/layout/pages.html b/documentation/test_doxygen/layout/pages.html

index 943167cf807968dc1bea0770da2bde61d9f136cd..13d27fb131723d06a68a8e8ad8c69118594f2d58 100644 (file)
--- a/documentation/test_doxygen/layout/pages.html
+++ b/documentation/test_doxygen/layout/pages.html
@@ -111,8 +111,8 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
-<script src="searchdata-v1.js" async="async"></script>
+<script src="search-v2.js"></script>
+<script src="searchdata-v2.js" async="async"></script>
  <footer><nav>
    <div class="m-container">
      <div class="m-row">
diff --git a/documentation/test_doxygen/layout_generated_doxyfile/index.html b/documentation/test_doxygen/layout_generated_doxyfile/index.html

index bb79a2ad773934f44aca5a62e0bebb633c625059..6fcb2176423716fb5f46e775acf69ee63fd3626c 100644 (file)
--- a/documentation/test_doxygen/layout_generated_doxyfile/index.html
+++ b/documentation/test_doxygen/layout_generated_doxyfile/index.html
@@ -84,8 +84,8 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
-<script src="searchdata-v1.js" async="async"></script>
+<script src="search-v2.js"></script>
+<script src="searchdata-v2.js" async="async"></script>
  <footer><nav>
    <div class="m-container">
      <div class="m-row">
diff --git a/documentation/test_doxygen/layout_minimal/index.html b/documentation/test_doxygen/layout_minimal/index.html

index bb79a2ad773934f44aca5a62e0bebb633c625059..6fcb2176423716fb5f46e775acf69ee63fd3626c 100644 (file)
--- a/documentation/test_doxygen/layout_minimal/index.html
+++ b/documentation/test_doxygen/layout_minimal/index.html
@@ -84,8 +84,8 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
-<script src="searchdata-v1.js" async="async"></script>
+<script src="search-v2.js"></script>
+<script src="searchdata-v2.js" async="async"></script>
  <footer><nav>
    <div class="m-container">
      <div class="m-row">
diff --git a/documentation/test_doxygen/layout_search_binary/index.html b/documentation/test_doxygen/layout_search_binary/index.html

index eb5dd92578e024bb041967a6ce7d76f1f672ac04..2544ee6ab0434358ee4ac3b8bb47fa30edc86d9a 100644 (file)
--- a/documentation/test_doxygen/layout_search_binary/index.html
+++ b/documentation/test_doxygen/layout_search_binary/index.html
@@ -68,9 +68,9 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
+<script src="search-v2.js"></script>
  <script>
-  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v1.bin');
+  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v2.bin');
  </script>
  </body>
  </html>
diff --git a/documentation/test_doxygen/layout_search_opensearch/index.html b/documentation/test_doxygen/layout_search_opensearch/index.html

index 40ecab6760e769eba43d6921acada1fbb3944876..1d6a0a4d3f46783759c3ff4d8c79c5565d4a74b1 100644 (file)
--- a/documentation/test_doxygen/layout_search_opensearch/index.html
+++ b/documentation/test_doxygen/layout_search_opensearch/index.html
@@ -70,7 +70,7 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
-<script src="searchdata-v1.js" async="async"></script>
+<script src="search-v2.js"></script>
+<script src="searchdata-v2.js" async="async"></script>
  </body>
  </html>
diff --git a/documentation/test_doxygen/test_search.py b/documentation/test_doxygen/test_search.py

index 267ce5351ee6a8f31e9a5addec9545139155680e..6c5a6d3d9912621b78459915400d6cdc3b97f695 100755 (executable)
--- a/documentation/test_doxygen/test_search.py
+++ b/documentation/test_doxygen/test_search.py
@@ -41,7 +41,7 @@ class Search(IntegrationTestCase):
              serialized = f.read()
              search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0]
          #print(search_data_pretty)
-        self.assertEqual(len(serialized), 4836)
+        self.assertEqual(len(serialized), 4841)
          self.assertEqual(search_data_pretty, """
  53 symbols
  deprecated_macro [0]
@@ -233,7 +233,7 @@ class LongSuffixLength(IntegrationTestCase):
              serialized = f.read()
              search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0]
          #print(search_data_pretty)
-        self.assertEqual(len(serialized), 473)
+        self.assertEqual(len(serialized), 478)
          # The parameters get cut off with an ellipsis
          self.assertEqual(search_data_pretty, """
  2 symbols
diff --git a/documentation/test_doxygen/test_undocumented.py b/documentation/test_doxygen/test_undocumented.py

index 17e89448cd61c79baaca6460e9e230293f7b8efb..49c28a835b033c59bc4bf9c8990c69314fb17515 100644 (file)
--- a/documentation/test_doxygen/test_undocumented.py
+++ b/documentation/test_doxygen/test_undocumented.py
@@ -26,7 +26,7 @@
  
  import os
  
-from _search import search_data_header_struct, searchdata_filename
+from _search import Serializer, searchdata_filename
  
  from . import IntegrationTestCase
  
@@ -53,5 +53,5 @@ class Undocumented(IntegrationTestCase):
          # TODO: reuse the search data deserialization API once done
          with open(os.path.join(self.path, 'html', searchdata_filename.format(search_filename_prefix='searchdata')), 'rb') as f:
              serialized = f.read()
-            magic, version, symbol_count, map_offset, type_map_offset = search_data_header_struct.unpack_from(serialized)
+            magic, version, type_data, symbol_count, map_offset, type_map_offset = Serializer.header_struct.unpack_from(serialized)
              self.assertEqual(symbol_count, 44)
diff --git a/documentation/test_doxygen/undocumented/File_8h.html b/documentation/test_doxygen/undocumented/File_8h.html

index 6abf12642b229624d6bf8b0464a03319a3cb7a41..1cd738a1554804ed5b64a6128f4a08cbf8d735d3 100644 (file)
--- a/documentation/test_doxygen/undocumented/File_8h.html
+++ b/documentation/test_doxygen/undocumented/File_8h.html
@@ -295,9 +295,9 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
+<script src="search-v2.js"></script>
  <script>
-  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v1.bin');
+  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v2.bin');
  </script>
  </body>
  </html>
diff --git a/documentation/test_doxygen/undocumented/annotated.html b/documentation/test_doxygen/undocumented/annotated.html

index 63fdb38bdd04d7d17dd5b325e6275564f1662965..3984c1743c859f556aa07312e23c4bbf70c9736b 100644 (file)
--- a/documentation/test_doxygen/undocumented/annotated.html
+++ b/documentation/test_doxygen/undocumented/annotated.html
@@ -115,9 +115,9 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
+<script src="search-v2.js"></script>
  <script>
-  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v1.bin');
+  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v2.bin');
  </script>
  </body>
  </html>
diff --git a/documentation/test_doxygen/undocumented/classClass.html b/documentation/test_doxygen/undocumented/classClass.html

index 63a9834d03ba66a040d3ca1f5cc75cac6afd0c16..6b796e61fe638e8405dc36a491e9c72405d6e496 100644 (file)
--- a/documentation/test_doxygen/undocumented/classClass.html
+++ b/documentation/test_doxygen/undocumented/classClass.html
@@ -84,9 +84,9 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
+<script src="search-v2.js"></script>
  <script>
-  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v1.bin');
+  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v2.bin');
  </script>
  </body>
  </html>
diff --git a/documentation/test_doxygen/undocumented/dir_4b0d5f8864bf89936129251a2d32609b.html b/documentation/test_doxygen/undocumented/dir_4b0d5f8864bf89936129251a2d32609b.html

index 51a7a1604e495562962f02a3eab7fabfacdd33aa..afc3cd9faf783c22fee284a136925d9440c29771 100644 (file)
--- a/documentation/test_doxygen/undocumented/dir_4b0d5f8864bf89936129251a2d32609b.html
+++ b/documentation/test_doxygen/undocumented/dir_4b0d5f8864bf89936129251a2d32609b.html
@@ -101,9 +101,9 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
+<script src="search-v2.js"></script>
  <script>
-  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v1.bin');
+  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v2.bin');
  </script>
  </body>
  </html>
diff --git a/documentation/test_doxygen/undocumented/files.html b/documentation/test_doxygen/undocumented/files.html

index 1a8187e200ad8a894480805bcb3e0c2f5d233f34..909f1ad52ce2f84c3bcef303e58fec42296849fa 100644 (file)
--- a/documentation/test_doxygen/undocumented/files.html
+++ b/documentation/test_doxygen/undocumented/files.html
@@ -104,9 +104,9 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
+<script src="search-v2.js"></script>
  <script>
-  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v1.bin');
+  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v2.bin');
  </script>
  </body>
  </html>
diff --git a/documentation/test_doxygen/undocumented/group__group.html b/documentation/test_doxygen/undocumented/group__group.html

index 2a98ac25873951380378b145449c6aa7c6124d41..2e173ad66917c610f6fa5e051533fb5312c06517 100644 (file)
--- a/documentation/test_doxygen/undocumented/group__group.html
+++ b/documentation/test_doxygen/undocumented/group__group.html
@@ -201,9 +201,9 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
+<script src="search-v2.js"></script>
  <script>
-  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v1.bin');
+  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v2.bin');
  </script>
  </body>
  </html>
diff --git a/documentation/test_doxygen/undocumented/namespaceNamespace.html b/documentation/test_doxygen/undocumented/namespaceNamespace.html

index c228b0d9c232c7296497475c702be917277174a9..1b4c6854566e28a50f825253566097e47cc2866c 100644 (file)
--- a/documentation/test_doxygen/undocumented/namespaceNamespace.html
+++ b/documentation/test_doxygen/undocumented/namespaceNamespace.html
@@ -203,9 +203,9 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
+<script src="search-v2.js"></script>
  <script>
-  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v1.bin');
+  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v2.bin');
  </script>
  </body>
  </html>
diff --git a/documentation/test_doxygen/undocumented/structNamespace_1_1ClassInANamespace.html b/documentation/test_doxygen/undocumented/structNamespace_1_1ClassInANamespace.html

index 6a65de9d3c78d66656fa0f1bf2571efff155b680..766677ccff5ffbeb2e9ee1c397385809c42eb0f4 100644 (file)
--- a/documentation/test_doxygen/undocumented/structNamespace_1_1ClassInANamespace.html
+++ b/documentation/test_doxygen/undocumented/structNamespace_1_1ClassInANamespace.html
@@ -136,9 +136,9 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
+<script src="search-v2.js"></script>
  <script>
-  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v1.bin');
+  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v2.bin');
  </script>
  </body>
  </html>
diff --git a/documentation/test_python/layout/index.html b/documentation/test_python/layout/index.html

index 04e955853ea0408243936c44aaa68d191be2bbd1..06558c00f2df9757da87460a5734454abe1a4bfe 100644 (file)
--- a/documentation/test_python/layout/index.html
+++ b/documentation/test_python/layout/index.html
@@ -90,8 +90,8 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
-<script src="searchdata-v1.js" async="async"></script>
+<script src="search-v2.js"></script>
+<script src="searchdata-v2.js" async="async"></script>
  <footer><nav>
    <div class="m-container">
      <div class="m-row">
diff --git a/documentation/test_python/layout_search_binary/index.html b/documentation/test_python/layout_search_binary/index.html

index b7dade2718630878b1a846ba384bb34e61b006bd..bcabe9f67d4635901c457f42b60518ade2ca817e 100644 (file)
--- a/documentation/test_python/layout_search_binary/index.html
+++ b/documentation/test_python/layout_search_binary/index.html
@@ -77,9 +77,9 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
+<script src="search-v2.js"></script>
  <script>
-  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v1.bin');
+  Search.download(window.location.pathname.substr(0, window.location.pathname.lastIndexOf('/') + 1) + 'searchdata-v2.bin');
  </script>
  </body>
  </html>
diff --git a/documentation/test_python/layout_search_open_search/index.html b/documentation/test_python/layout_search_open_search/index.html

index 33df5667e31023a24ece27b4449d7af251ad7759..91e6676a044ea1b88f04d0077e484042b4bae921 100644 (file)
--- a/documentation/test_python/layout_search_open_search/index.html
+++ b/documentation/test_python/layout_search_open_search/index.html
@@ -70,7 +70,7 @@
      </div>
    </div>
  </div>
-<script src="search-v1.js"></script>
-<script src="searchdata-v1.js" async="async"></script>
+<script src="search-v2.js"></script>
+<script src="searchdata-v2.js" async="async"></script>
  </body>
  </html>
diff --git a/documentation/test_python/link_formatting/c.link_formatting.Class.Sub.html b/documentation/test_python/link_formatting/c.link_formatting.Class.Sub.html

index c2501ce4cd6a08dbffe4cb3f0a968440bf65cae1..4e577430917c0c1a739908a47f1955d581dad054 100644 (file)
--- a/documentation/test_python/link_formatting/c.link_formatting.Class.Sub.html
+++ b/documentation/test_python/link_formatting/c.link_formatting.Class.Sub.html
@@ -76,9 +76,9 @@
      </div>
    </div>
  </div>
-<script src="t.search-v1.js#this-is-an-url"></script>
+<script src="t.search-v2.js#this-is-an-url"></script>
  <script>
-  Search.download('t.absolutesearchdata-v1.bin#this-is-an-url');
+  Search.download('t.absolutesearchdata-v2.bin#this-is-an-url');
  </script>
  </body>
  </html>
diff --git a/documentation/test_python/link_formatting/c.link_formatting.Class.html b/documentation/test_python/link_formatting/c.link_formatting.Class.html

index 0eac3992554aaba955d5fca737828e168e1ee535..dac5232f4d39d2c2f958223b24a4a46baa5112f0 100644 (file)
--- a/documentation/test_python/link_formatting/c.link_formatting.Class.html
+++ b/documentation/test_python/link_formatting/c.link_formatting.Class.html
@@ -104,9 +104,9 @@
      </div>
    </div>
  </div>
-<script src="t.search-v1.js#this-is-an-url"></script>
+<script src="t.search-v2.js#this-is-an-url"></script>
  <script>
-  Search.download('t.absolutesearchdata-v1.bin#this-is-an-url');
+  Search.download('t.absolutesearchdata-v2.bin#this-is-an-url');
  </script>
  </body>
  </html>
diff --git a/documentation/test_python/link_formatting/c.link_formatting.pybind.Foo.html b/documentation/test_python/link_formatting/c.link_formatting.pybind.Foo.html

index 34a47e62b8a582afa1b1baaaa0fc2ff8baee6416..911e95f485dd90750130a1a89403e8331fd44794 100644 (file)
--- a/documentation/test_python/link_formatting/c.link_formatting.pybind.Foo.html
+++ b/documentation/test_python/link_formatting/c.link_formatting.pybind.Foo.html
@@ -112,9 +112,9 @@
      </div>
    </div>
  </div>
-<script src="t.search-v1.js#this-is-an-url"></script>
+<script src="t.search-v2.js#this-is-an-url"></script>
  <script>
-  Search.download('t.absolutesearchdata-v1.bin#this-is-an-url');
+  Search.download('t.absolutesearchdata-v2.bin#this-is-an-url');
  </script>
  </body>
  </html>
diff --git a/documentation/test_python/link_formatting/m.link_formatting.html b/documentation/test_python/link_formatting/m.link_formatting.html

index cae0ece02df9c6588355b39b7d9b9866a63c4ea4..124dbbe1f40471949577a0ff2c1ab5ff4fa76448 100644 (file)
--- a/documentation/test_python/link_formatting/m.link_formatting.html
+++ b/documentation/test_python/link_formatting/m.link_formatting.html
@@ -135,9 +135,9 @@
      </div>
    </div>
  </div>
-<script src="t.search-v1.js#this-is-an-url"></script>
+<script src="t.search-v2.js#this-is-an-url"></script>
  <script>
-  Search.download('t.absolutesearchdata-v1.bin#this-is-an-url');
+  Search.download('t.absolutesearchdata-v2.bin#this-is-an-url');
  </script>
  </body>
  </html>
diff --git a/documentation/test_python/link_formatting/m.link_formatting.pybind.html b/documentation/test_python/link_formatting/m.link_formatting.pybind.html

index acccdf393140593250da4bfc173428d600dc491b..11b3a8c5d6835b2099de743d21a0210c8b868006 100644 (file)
--- a/documentation/test_python/link_formatting/m.link_formatting.pybind.html
+++ b/documentation/test_python/link_formatting/m.link_formatting.pybind.html
@@ -118,9 +118,9 @@
      </div>
    </div>
  </div>
-<script src="t.search-v1.js#this-is-an-url"></script>
+<script src="t.search-v2.js#this-is-an-url"></script>
  <script>
-  Search.download('t.absolutesearchdata-v1.bin#this-is-an-url');
+  Search.download('t.absolutesearchdata-v2.bin#this-is-an-url');
  </script>
  </body>
  </html>
diff --git a/documentation/test_python/link_formatting/m.link_formatting.sub.html b/documentation/test_python/link_formatting/m.link_formatting.sub.html

index 6576dca0abbe034be1edbcb8e77aba46973d9216..11aaaf38fef79e75744a8a3a105214668f999a5d 100644 (file)
--- a/documentation/test_python/link_formatting/m.link_formatting.sub.html
+++ b/documentation/test_python/link_formatting/m.link_formatting.sub.html
@@ -76,9 +76,9 @@
      </div>
    </div>
  </div>
-<script src="t.search-v1.js#this-is-an-url"></script>
+<script src="t.search-v2.js#this-is-an-url"></script>
  <script>
-  Search.download('t.absolutesearchdata-v1.bin#this-is-an-url');
+  Search.download('t.absolutesearchdata-v2.bin#this-is-an-url');
  </script>
  </body>
  </html>
diff --git a/documentation/test_python/link_formatting/p.page.html b/documentation/test_python/link_formatting/p.page.html

index 1b9436b01384b85cc51b859e75a96380fda168b2..f670104cb3dad99f9424e2bd49d3f4238fa0fc34 100644 (file)
--- a/documentation/test_python/link_formatting/p.page.html
+++ b/documentation/test_python/link_formatting/p.page.html
@@ -76,9 +76,9 @@
      </div>
    </div>
  </div>
-<script src="t.search-v1.js#this-is-an-url"></script>
+<script src="t.search-v2.js#this-is-an-url"></script>
  <script>
-  Search.download('t.absolutesearchdata-v1.bin#this-is-an-url');
+  Search.download('t.absolutesearchdata-v2.bin#this-is-an-url');
  </script>
  </body>
  </html>
diff --git a/documentation/test_python/link_formatting/s.classes.html b/documentation/test_python/link_formatting/s.classes.html

index 7df15cb7b21818ebbe0da7e5f00b0b1b8d679b72..6099d594afcfad92d0e76acd3bfdbb123f981963 100644 (file)
--- a/documentation/test_python/link_formatting/s.classes.html
+++ b/documentation/test_python/link_formatting/s.classes.html
@@ -109,9 +109,9 @@
      </div>
    </div>
  </div>
-<script src="t.search-v1.js#this-is-an-url"></script>
+<script src="t.search-v2.js#this-is-an-url"></script>
  <script>
-  Search.download('t.absolutesearchdata-v1.bin#this-is-an-url');
+  Search.download('t.absolutesearchdata-v2.bin#this-is-an-url');
  </script>
  </body>
  </html>
diff --git a/documentation/test_python/link_formatting/s.modules.html b/documentation/test_python/link_formatting/s.modules.html

index 4d6d4153ce4fd3582d97f552c26b1b7a4574b7f7..87eca0144b734b6b75d422ceb8af51cb95de72bf 100644 (file)
--- a/documentation/test_python/link_formatting/s.modules.html
+++ b/documentation/test_python/link_formatting/s.modules.html
@@ -98,9 +98,9 @@
      </div>
    </div>
  </div>
-<script src="t.search-v1.js#this-is-an-url"></script>
+<script src="t.search-v2.js#this-is-an-url"></script>
  <script>
-  Search.download('t.absolutesearchdata-v1.bin#this-is-an-url');
+  Search.download('t.absolutesearchdata-v2.bin#this-is-an-url');
  </script>
  </body>
  </html>
diff --git a/documentation/test_python/link_formatting/s.pages.html b/documentation/test_python/link_formatting/s.pages.html

index 209e9d4dabf1df5e5bf47d4715f7db56b3a371f6..546c41150c247d693a1f6615cd67699827456b76 100644 (file)
--- a/documentation/test_python/link_formatting/s.pages.html
+++ b/documentation/test_python/link_formatting/s.pages.html
@@ -92,9 +92,9 @@
      </div>
    </div>
  </div>
-<script src="t.search-v1.js#this-is-an-url"></script>
+<script src="t.search-v2.js#this-is-an-url"></script>
  <script>
-  Search.download('t.absolutesearchdata-v1.bin#this-is-an-url');
+  Search.download('t.absolutesearchdata-v2.bin#this-is-an-url');
  </script>
  </body>
  </html>
diff --git a/documentation/test_python/test_search.py b/documentation/test_python/test_search.py

index fe43d9e1445f60d8c8e3b3d981966a06c5d1930d..52adfe2a4bb6d8a53f31087686525005153fffcd 100644 (file)
--- a/documentation/test_python/test_search.py
+++ b/documentation/test_python/test_search.py
@@ -44,7 +44,7 @@ class Search(BaseInspectTestCase):
              serialized = f.read()
              search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0]
          #print(search_data_pretty)
-        self.assertEqual(len(serialized), 2269)
+        self.assertEqual(len(serialized), 2274)
          self.assertEqual(search_data_pretty, """
  21 symbols
  search [14]
@@ -197,7 +197,7 @@ class LongSuffixLength(BaseInspectTestCase):
              serialized = f.read()
              search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0]
          #print(search_data_pretty)
-        self.assertEqual(len(serialized), 633)
+        self.assertEqual(len(serialized), 638)
          # The parameters get cut off with an ellipsis
          self.assertEqual(search_data_pretty, """
  3 symbols
author	Vladimír Vondruš <mosra@centrum.cz>
	Sat, 8 Jan 2022 19:49:26 +0000 (20:49 +0100)
committer	Vladimír Vondruš <mosra@centrum.cz>
	Sun, 9 Jan 2022 15:51:50 +0000 (16:51 +0100)
documentation/_search.py		patch \| blob \| history
documentation/doxygen.py		patch \| blob \| history
documentation/python.py		patch \| blob \| history
documentation/search.js		patch \| blob \| history
documentation/test/_search_test_metadata.py		patch \| blob \| history
documentation/test/js-test-data/empty-ns1-ri2-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/empty-ns1-ri2-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/empty-ns1-ri3-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/empty-ns1-ri3-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/empty-ns1-ri4-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/empty-ns1-ri4-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/empty-ns2-ri2-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/empty-ns2-ri2-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/empty-ns2-ri3-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/empty-ns2-ri3-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/empty-ns2-ri4-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/empty-ns2-ri4-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/empty.bin	[deleted file]	patch \| blob \| history
documentation/test/js-test-data/manyresults-ns1-ri2-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/manyresults-ns1-ri2-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/manyresults-ns1-ri3-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/manyresults-ns1-ri3-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/manyresults-ns1-ri4-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/manyresults-ns1-ri4-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/manyresults-ns2-ri2-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/manyresults-ns2-ri2-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/manyresults-ns2-ri3-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/manyresults-ns2-ri3-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/manyresults-ns2-ri4-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/manyresults-ns2-ri4-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/manyresults.bin	[deleted file]	patch \| blob \| history
documentation/test/js-test-data/nested.bin		patch \| blob \| history
documentation/test/js-test-data/searchdata-ns1-ri2-fo3.b85	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata-ns1-ri2-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata-ns1-ri2-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata-ns1-ri3-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata-ns1-ri3-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata-ns1-ri4-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata-ns1-ri4-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata-ns2-ri2-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata-ns2-ri2-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata-ns2-ri3-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata-ns2-ri3-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata-ns2-ri4-fo3.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata-ns2-ri4-fo4.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/searchdata.b85	[deleted file]	patch \| blob \| history
documentation/test/js-test-data/searchdata.bin	[deleted file]	patch \| blob \| history
documentation/test/js-test-data/short.bin		patch \| blob \| history
documentation/test/js-test-data/unicode.bin		patch \| blob \| history
documentation/test/js-test-data/wrong-magic.bin		patch \| blob \| history
documentation/test/js-test-data/wrong-result-id-bytes.bin	[new file with mode: 0644]	patch \| blob
documentation/test/js-test-data/wrong-version.bin		patch \| blob \| history
documentation/test/populate-js-test-data.py		patch \| blob \| history
documentation/test/test-search.js		patch \| blob \| history
documentation/test/test_search.py		patch \| blob \| history
documentation/test_doxygen/layout/pages.html		patch \| blob \| history
documentation/test_doxygen/layout_generated_doxyfile/index.html		patch \| blob \| history
documentation/test_doxygen/layout_minimal/index.html		patch \| blob \| history
documentation/test_doxygen/layout_search_binary/index.html		patch \| blob \| history
documentation/test_doxygen/layout_search_opensearch/index.html		patch \| blob \| history
documentation/test_doxygen/test_search.py		patch \| blob \| history
documentation/test_doxygen/test_undocumented.py		patch \| blob \| history
documentation/test_doxygen/undocumented/File_8h.html		patch \| blob \| history
documentation/test_doxygen/undocumented/annotated.html		patch \| blob \| history
documentation/test_doxygen/undocumented/classClass.html		patch \| blob \| history
documentation/test_doxygen/undocumented/dir_4b0d5f8864bf89936129251a2d32609b.html		patch \| blob \| history
documentation/test_doxygen/undocumented/files.html		patch \| blob \| history
documentation/test_doxygen/undocumented/group__group.html		patch \| blob \| history
documentation/test_doxygen/undocumented/namespaceNamespace.html		patch \| blob \| history
documentation/test_doxygen/undocumented/structNamespace_1_1ClassInANamespace.html		patch \| blob \| history
documentation/test_python/layout/index.html		patch \| blob \| history
documentation/test_python/layout_search_binary/index.html		patch \| blob \| history
documentation/test_python/layout_search_open_search/index.html		patch \| blob \| history
documentation/test_python/link_formatting/c.link_formatting.Class.Sub.html		patch \| blob \| history
documentation/test_python/link_formatting/c.link_formatting.Class.html		patch \| blob \| history
documentation/test_python/link_formatting/c.link_formatting.pybind.Foo.html		patch \| blob \| history
documentation/test_python/link_formatting/m.link_formatting.html		patch \| blob \| history
documentation/test_python/link_formatting/m.link_formatting.pybind.html		patch \| blob \| history
documentation/test_python/link_formatting/m.link_formatting.sub.html		patch \| blob \| history
documentation/test_python/link_formatting/p.page.html		patch \| blob \| history
documentation/test_python/link_formatting/s.classes.html		patch \| blob \| history
documentation/test_python/link_formatting/s.modules.html		patch \| blob \| history
documentation/test_python/link_formatting/s.pages.html		patch \| blob \| history
documentation/test_python/test_search.py		patch \| blob \| history