1 /* $Id: tdx-structure.h 5327 2002-03-16 00:33:55Z rra $
3 ** Data structures for the tradindexed overview method.
5 ** This header defines the data structures used by the tradindexed overview
6 ** method. Currently, these data structures are read and written directly to
7 ** disk (and the disk files are therefore endian-dependent and possibly
8 ** architecture-dependent due to structure padding). This will eventually be
11 ** The structure of a tradindexed overview spool is as follows: At the root
12 ** of the spool is a group.index file composed of a struct group_header
13 ** followed by some number of struct group_entry's, one for each group plus
14 ** possibly some number of free entries linked to a free list that's headed
15 ** in the struct index_header. Each entry corresponds to a particular
16 ** newsgroup carried by the server and stores the high and low article
17 ** numbers for that group, its status flag, and the base of the index file
20 ** The storage of the group.index file implements a hash table with chaining;
21 ** in other words, there is a table indexed by hash value stored in the
22 ** header that points to the starts of the chains, new entries are appended
23 ** to the end of the file and added to the hash table, and if they collide
24 ** with an existing entry are instead linked to the appropriate hash chain.
26 ** The overview information for each group is stored in a pair of files named
27 ** <group>.IDX and <group>.DAT. These files are found in a subdirectory
28 ** formed by taking the first letter of component of the newsgroup name as
29 ** a directory name; in other words, news.announce.newgroups overview data is
30 ** stored in <pathoverview>/n/a/n/news.announce.newgroups.{IDX,DAT}. The
31 ** .DAT file contains the individual overview entries, one per line, stored
32 ** in wire format (in other words, suitable for dumping directly across the
33 ** network to a client in response to an XOVER command). The overview data
34 ** stored in that file may be out of order.
36 ** The .IDX file consists of a series of struct index_entry's, one for each
37 ** overview entry stored in the .DAT file. Each index entry stores the
38 ** offset of the data for one article in the .DAT file and its length, along
39 ** with some additional metainformation about the article used to drive
40 ** article expiration. The .IDX file is addressed like an array; the first
41 ** entry corresponds to the article with the number stored in the base field
42 ** of the group_entry for that newsgroup in the group.index file and each
43 ** entry stores the data for the next consecutive article. Index entries may
44 ** be tagged as deleted if that article has been deleted or expired.
47 #ifndef INN_TDX_STRUCTURE_H
48 #define INN_TDX_STRUCTURE_H 1
51 #include <sys/types.h>
56 /* A location in group.index (this many records past the end of the header of
57 the file). There's no reason for this to be a struct, but that can't be
58 changed until the format of the group.index file is changed to be
59 architecture-independent since putting it into a struct may have changed
60 the alignment or padding on some architectures. */
65 /* The hard-coded constant size of the hash table for group.index. This need
66 not be a power of two and has no special constraints. Changing this at
67 present will break backward compatibility with group.index files written by
68 previous versions of the code. */
69 #define TDX_HASH_SIZE (16 * 1024)
71 /* A magic number for the group.index file so that we can later change the
72 format in a backward-compatible fashion. */
73 #define TDX_MAGIC (~(0xf1f0f33d))
75 /* The header at the top of group.index. magic contains GROUPHEADERMAGIC
76 always; hash contains pointers to the heads of the entry chains, and
77 freelist points to a linked list of free entries (entries that were used
78 for groups that have since been deleted). */
81 struct loc hash[TDX_HASH_SIZE];
85 /* An entry for a particular group. Note that a good bit of active file
86 information is duplicated here, and depending on the portion of INN asking
87 questions, sometimes the main active file is canonical and sometimes the
88 overview data is canonical. This needs to be rethought at some point.
90 Groups are matched based on the MD5 hash of their name. This may prove
91 inadequate in the future. Ideally, INN really needs to assign unique
92 numbers to each group, which could then be used here as well as in
93 tradspool rather than having to do hacks like using a hash of the group
94 name or constructing one's own number to name mapping like tradspool does.
95 Unfortunately, this ideally requires a non-backward-compatible change to
96 the active file format.
98 Several of these elements aren't used. This structure, like the others,
99 cannot be changed until the whole format of the group.index file is changed
100 since it's currently read as binary structs directly from disk. */
102 HASH hash; /* MD5 hash of the group name. */
103 HASH alias; /* Intended to point to the group this group
104 is an alias for. Not currently used. */
105 ARTNUM high; /* High article number in the group. */
106 ARTNUM low; /* Low article number in the group. */
107 ARTNUM base; /* Article number of the first entry in the
108 .IDX index file for the group. */
109 int count; /* Number of articles in group. */
110 int flag; /* Posting/moderation status. */
111 time_t deleted; /* When this group was deleted, or 0 if the
112 group is still valid. */
113 ino_t indexinode; /* The inode of the index file for the group,
114 used to detect when the file has been
115 recreated and swapped out. */
116 struct loc next; /* Next block in this chain. */
119 /* An entry in the per-group .IDX index file. */
124 time_t expires; /* Expiration time from Expires: header. */
128 #endif /* INN_TDX_STRUCTURE_H */