chiark - git - mdw - mLib/blob - struct/sym.3.in

   1 .\" -*-nroff-*-
   2 .\"
   3 .\" Manual for symbol table
   4 .\"
   5 .\" (c) 1999, 2001, 2003, 2005, 2007, 2009, 2023, 2024 Straylight/Edgeware
   6 .\"
   7 .
   8 .\"----- Licensing notice ---------------------------------------------------
   9 .\"
  10 .\" This file is part of the mLib utilities library.
  11 .\"
  12 .\" mLib is free software: you can redistribute it and/or modify it under
  13 .\" the terms of the GNU Library General Public License as published by
  14 .\" the Free Software Foundation; either version 2 of the License, or (at
  15 .\" your option) any later version.
  16 .\"
  17 .\" mLib is distributed in the hope that it will be useful, but WITHOUT
  18 .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  19 .\" FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
  20 .\" License for more details.
  21 .\"
  22 .\" You should have received a copy of the GNU Library General Public
  23 .\" License along with mLib.  If not, write to the Free Software
  24 .\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  25 .\" USA.
  26 .
  27 .\"--------------------------------------------------------------------------
  28 .so ../defs.man \" @@@PRE@@@
  29 .
  30 .\"--------------------------------------------------------------------------
  31 .TH sym 3mLib "8 May 1999" "Straylight/Edgeware" "mLib utilities library"
  32 .\" @sym_create
  33 .\" @sym_destroy
  34 .\" @sym_find
  35 .\" @sym_remove
  36 .\" @sym_mkiter
  37 .\" @sym_next
  38 .
  39 .\" @SYM_NAME
  40 .\" @SYM_LEN
  41 .\" @SYM_HASH
  42 .
  43 .\"--------------------------------------------------------------------------
  44 .SH NAME
  45 sym \- symbol table manager
  46 .
  47 .\"--------------------------------------------------------------------------
  48 .SH SYNOPSIS
  49 .
  50 .nf
  51 .B "#include <mLib/sym.h>"
  52 .PP
  53 .B "type struct { ...\& } sym_table;"
  54 .B "type struct { ...\& } sym_base;"
  55 .B "type struct { ...\& } sym_iter;"
  56 .PP
  57 .BI "void sym_create(sym_table *" t );
  58 .BI "void sym_destroy(sym_table *" t );
  59 .PP
  60 .ta \w'\fBvoid *sym_find('u
  61 .BI "void *sym_find(sym_table *" t ,
  62 .BI "   const char *" n ", long " l ,
  63 .BI "   size_t " sz ", unsigned *" f );
  64 .BI "void sym_remove(sym_table *" t ", void *" b );
  65 .PP
  66 .BI "const char *SYM_NAME(const void *" p );
  67 .BI "size_t SYM_LEN(const void *" p );
  68 .BI "uint32 SYM_HASH(const void *" p );
  69 .PP
  70 .BI "void sym_mkiter(sym_iter *" i ", sym_table *" t );
  71 .BI "void *sym_next(sym_iter *" i );
  72 .fi
  73 .
  74 .\"--------------------------------------------------------------------------
  75 .SH "DESCRIPTION"
  76 .
  77 The
  78 .B sym
  79 functions implement a data structure often described as a dictionary, a
  80 finite map, an associative array, or a symbol table.  It associates
  81 .I values
  82 with
  83 .I keys
  84 such that the value corresponding to a given key can be found quickly.
  85 Additionally, all stored associations can be enumerated.
  86 .PP
  87 The interface provides an
  88 .I intrusive
  89 symbol table.  The data objects stored in the table must include a small
  90 header used by the symbol table manager.  This reduces the amount of
  91 pointer fiddling that needs to be done, and in practice doesn't seem to
  92 be much of a problem.  It's also fairly easy to construct a
  93 non-intrusive interface if you really want one.
  94 .PP
  95 There are three main data structures involved in the interface:
  96 .TP
  97 .B sym_table
  98 Keeps track of the information associated with a particular table.
  99 .TP
 100 .B sym_base
 101 The header which must be attached to the front of all the value
 102 objects.
 103 .TP
 104 .B sym_iter
 105 An iterator object, used for enumerating all of the associations stored
 106 in a symbol table.
 107 .PP
 108 All of the above data structures should be considered
 109 .IR opaque :
 110 don't try looking inside.  Representations have changed in the past, and
 111 they may change again in the future.
 112 .
 113 .SS "Creation and destruction"
 114 The
 115 .B sym_table
 116 object itself needs to be allocated by the caller.  It is initialized by
 117 passing it to the function
 118 .BR sym_create .
 119 After initialization, the table contains no entries.
 120 .PP
 121 Initializing a symbol table involves allocating some memory.  If this
 122 allocation fails, an
 123 .B EXC_NOMEM
 124 exception is raised.
 125 .PP
 126 When a symbol table is no longer needed, the memory occupied by the
 127 values and other maintenance structures can be reclaimed by calling
 128 .BR sym_destroy .
 129 Any bits of user data attached to values should previously have been
 130 destroyed.
 131 .
 132 .SS "Adding, searching and removing"
 133 Most of the actual work is done by the function
 134 .BR sym_find .
 135 It does both lookup and creation, depending on its arguments.  To do its
 136 job, it needs to know the following bits of information:
 137 .TP
 138 .BI "sym_table *" t
 139 A pointer to a symbol table to manipulate.
 140 .TP
 141 .BI "const char *" n
 142 The address of the
 143 .I key
 144 to look up or create.  Usually this will be a simple text string,
 145 although it can actually be any arbitrary binary data.
 146 .TP
 147 .BI "long " l
 148 The length of the key.  If this is \-1,
 149 .B sym_find
 150 assumes that the key is a null-terminated string, and calculates its
 151 length itself.  This is entirely equivalent to passing
 152 .BI strlen( n )\fR.
 153 .TP
 154 .BI "size_t " sz
 155 The size of the value block to allocate if the key could not be found.
 156 If this is zero, no value is allocated, and a null pointer is returned
 157 to indicate an unsuccessful lookup.
 158 .TP
 159 .BI "unsigned *" f
 160 The address of a `found' flag to set.  This is an output parameter.  On
 161 exit,
 162 .B sym_find
 163 will set the value of
 164 .BI * f
 165 to zero if the key could not be found, or nonzero if it was found.  This
 166 can be used to tell whether the value returned has been newly allocated,
 167 or whether it was already in the table.
 168 .PP
 169 A terminating null byte is appended to the copy of the symbol's name in
 170 memory.  This is not considered to be a part of the symbol's name, and
 171 does not contribute to the name's length as reported by the
 172 .B SYM_LEN
 173 macro.
 174 .PP
 175 A symbol can be removed from the table by calling
 176 .BR sym_remove ,
 177 passing the symbol table itself, and the value block that needs
 178 removing.
 179 .
 180 .SS "Enquiries about symbols"
 181 Three macros are provided to enable simple enquiries about a symbol.
 182 Given a pointer
 183 .I s
 184 to a symbol table entry,
 185 .BI SYM_LEN( s )
 186 returns the length of the symbol's name (excluding any terminating null
 187 byte);
 188 .BI SYM_NAME( s )
 189 returns a pointer to the symbol's name; and
 190 .BI SYM_HASH( s )
 191 returns the symbol's hash value.
 192 .
 193 .SS "Enumerating symbols"
 194 Enumerating the values in a symbol table is fairly simple.  Allocate a
 195 .B sym_iter
 196 object from somewhere.  Attach it to a symbol table by calling
 197 .BR sym_mkiter ,
 198 and passing in the addresses of the iterator and the symbol table.
 199 Then, each call to
 200 .B sym_next
 201 will return a different value from the symbol table, until all of them
 202 have been enumerated, at which point,
 203 .B sym_next
 204 returns a null pointer.
 205 .PP
 206 It's safe to remove the symbol you've just been returned by
 207 .BR sym_next .
 208 However, it's not safe to remove any other symbol.  So don't do that.
 209 .PP
 210 When you've finished with an iterator, it's safe to just throw it away.
 211 You don't need to call any functions beforehand.
 212 .
 213 .SS "Use in practice"
 214 In normal use, the keys are simple strings (usually identifiers from
 215 some language), and the values are nontrivial structures providing
 216 information about types and values.
 217 .PP
 218 In this case, you'd define something like the following structure for
 219 your values:
 220 .VS
 221 .ta 2 20m
 222 typedef struct val {
 223         sym_base _base; /* Symbol header */
 224         unsigned type;  /* Type of this symbol */
 225         int dispoff;    /* Which display variable is in */
 226         size_t frameoff;        /* Offset of variable in frame */
 227 } val;
 228 .VE
 229 Given a pointer
 230 .I v
 231 to a
 232 .BR val ,
 233 you can find the variable's name by calling
 234 .BI SYM_NAME( v )\fR.
 235 .PP
 236 You can look up a name in the table by saying something like:
 237 .VS
 238 .ta 2n
 239 val *v = sym_find(t, name, -1, 0, 0);
 240 if (!v)
 241         error("unknown variable `%s'", name);
 242 .VE
 243 You can add in a new variable by saying something like
 244 .VS
 245 .ta 2n
 246 unsigned f;
 247 val *v = sym_find(t, name, -1, sizeof(val), &f);
 248 if (f)
 249         error("variable `%s' already exists", name);
 250 /* fill in v */
 251 .VE
 252 You can examine all the variables in your symbol table by saying
 253 something like:
 254 .VS
 255 .ta 2n
 256 sym_iter i;
 257 val *v;
 258 .VP
 259 for (sym_mkiter(&i, t); (v = sym_next(&i)) != 0; ) {
 260         /* ... */
 261 }
 262 .VE
 263 That ought to be enough examples to be getting on with.
 264 .
 265 .SS Implementation
 266 The symbol table is an extensible hashtable, using the universal hash
 267 function described in
 268 .BR unihash (3)
 269 and the global hashing key.  The hash chains are kept very short
 270 (probably too short, actually).  Every time a symbol is found, its block
 271 is promoted to the front of its bin chain so it gets found faster next
 272 time.
 273 .
 274 .\"--------------------------------------------------------------------------
 275 .SH SEE ALSO
 276 .
 277 .BR hash (3),
 278 .BR mLib (3).
 279 .
 280 .\"--------------------------------------------------------------------------
 281 .SH AUTHOR
 282 .
 283 Mark Wooding, <mdw@distorted.org.uk>
 284 .
 285 .\"----- That's all, folks --------------------------------------------------