1 /* $Id: tdx-group.c 7598 2007-02-09 02:40:51Z eagle $
3 ** Group index handling for the tradindexed overview method.
5 ** Implements the handling of the group.index file for the tradindexed
6 ** overview method. This file contains an entry for every group and stores
7 ** the high and low article marks and the base article numbers for each
8 ** individual group index file.
10 ** Externally visible functions have a tdx_ prefix; internal functions do
11 ** not. (Externally visible unfortunately means everything that needs to be
12 ** visible outside of this object file, not just interfaces exported to
13 ** consumers of the overview API.)
15 ** This code has to support readers and writers sharing the same files, and
16 ** we want to avoid locking where possible since locking may be very slow
17 ** (such as over NFS). Each group has two data files (and one has to get the
18 ** right index file for a given data file or get mangled results) and one
19 ** piece of data in the main index file required to interpret the individual
20 ** index file, namely the article base of that index.
22 ** We can make the following assumptions:
24 ** - The high water mark for a group is monotonically increasing; in other
25 ** words, the highest numbered article in a group won't ever decrease.
27 ** - While the article base may either increase or decrease, it will never
28 ** change unless the inode of the index file on disk also changes, since
29 ** changing the base requires rewriting the index file.
31 ** - No two files will have the same inode (this requirement should be safe
32 ** even in strange Unix file formats, since the files are all in the same
35 ** We therefore use the following procedure to update the data: The high
36 ** water mark may be changed at any time but surrounded in a write lock. The
37 ** base may only be changed as part of an index rebuild. To do an index
38 ** rebuild, we follow the following procedure:
40 ** 1) Obtain a write lock on the group entry in the main index.
41 ** 2) Write out new index and data files to new temporary file names.
42 ** 3) Store the new index inode into the main index.
43 ** 4) Update the high, low, and base article numbers in the main index.
44 ** 5) Rename the data file to its correct name.
45 ** 6) Rename the index file to its correct name.
46 ** 7) Release the write lock.
48 ** We use the following procedure to read the data:
50 ** 1) Open the group data files (both index and data).
51 ** 2) Store copies of the current high water mark and base in variables.
52 ** 3) Check to be sure the index inode matches the master index file.
54 ** If it does match, then we have a consistent set of data, since the high
55 ** water mark and base values have to match the index we have (the inode
56 ** value is updated first). It may not be the most current set of data, but
57 ** since we have those index and data files open, even if they're later
58 ** rebuilt we'll continue looking at the same files. They may have further
59 ** data appended to them, but that's safe.
61 ** If the index inode doesn't match, someone's rebuilt the file while we were
62 ** trying to open it. Continue with the following procedure:
64 ** 4) Close the data files that we opened.
65 ** 5) Obtain a read lock on the group entry in the main index.
66 ** 6) Reopen the data files.
67 ** 7) Grab the current high water mark and base.
68 ** 8) Release the read lock.
70 ** In other words, if there appears to be contention, we fall back to using
71 ** locking so that we don't try to loop (which also avoids an infinite loop
72 ** in the event of corruption of the main index).
74 ** Note that once we have a consistent set of data files open, we don't need
75 ** to aggressively check for new data files until someone asks for an article
76 ** outside the range of articles that we know about. We may be working from
77 ** outdated data files, but the most we'll miss is a cancel or an expiration
78 ** run. Overview data doesn't change; new data is appended and old data is
79 ** expired. We can afford to check only every once in a while, just to be
80 ** sure that we're not going to hand out overview data for a bunch of expired
86 #include "portable/mmap.h"
93 #include "inn/hashtab.h"
94 #include "inn/innconf.h"
95 #include "inn/messages.h"
98 #include "inn/vector.h"
101 #include "tdx-private.h"
102 #include "tdx-structure.h"
104 /* Returned to callers as an opaque data type, this stashes all of the
105 information about an open group.index file. */
110 struct group_header *header;
111 struct group_entry *entries;
115 /* Forward declaration. */
118 /* Internal prototypes. */
119 static int index_entry_count(size_t size);
120 static size_t index_file_size(int count);
121 static bool index_lock(int fd, enum inn_locktype type);
122 static bool index_lock_group(int fd, ptrdiff_t offset, enum inn_locktype);
123 static bool index_map(struct group_index *);
124 static bool index_maybe_remap(struct group_index *, long loc);
125 static void index_unmap(struct group_index *);
126 static bool index_expand(struct group_index *);
127 static long index_find(struct group_index *, const char *group);
131 ** Given a file size, return the number of group entries that it contains.
134 index_entry_count(size_t size)
136 return (size - sizeof(struct group_header)) / sizeof(struct group_entry);
141 ** Given a number of group entries, return the required file size.
144 index_file_size(int count)
146 return sizeof(struct group_header) + count * sizeof(struct group_entry);
151 ** Lock the hash table for the group index, used to acquire global locks on
152 ** the group index when updating it.
155 index_lock(int fd, enum inn_locktype type)
159 status = inn_lock_range(fd, type, true, 0, sizeof(struct group_header));
161 syswarn("tradindexed: cannot %s index hash table",
162 (type == INN_LOCK_UNLOCK) ? "unlock" : "lock");
168 ** Lock the group entry for a particular group. Takes the offset of that
169 ** group entry from the start of the group entries (not the start of the
170 ** file; we have to add the size of the group header). Used for coordinating
171 ** updates of the data for a group.
174 index_lock_group(int fd, ptrdiff_t offset, enum inn_locktype type)
179 size = sizeof(struct group_entry);
180 offset = offset * size + sizeof(struct group_header);
181 status = inn_lock_range(fd, type, true, offset, size);
183 syswarn("tradindexed: cannot %s group entry at %lu",
184 (type == INN_LOCK_UNLOCK) ? "unlock" : "lock",
185 (unsigned long) offset);
191 ** Memory map (or read into memory) the key portions of the group.index
192 ** file. Takes a struct group_index to fill in and returns true on success
193 ** and false on failure.
196 index_map(struct group_index *index)
198 if (!innconf->tradindexedmmap && index->writable) {
199 warn("tradindexed: cannot open for writing without mmap");
203 if (!innconf->tradindexedmmap) {
207 header_size = sizeof(struct group_header);
208 entry_size = index->count * sizeof(struct group_entry);
209 index->header = xmalloc(header_size);
210 index->entries = xmalloc(entry_size);
211 if (read(index->fd, index->header, header_size) != header_size) {
212 syswarn("tradindexed: cannot read header from %s", index->path);
215 if (read(index->fd, index->entries, entry_size) != entry_size) {
216 syswarn("tradindexed: cannot read entries from %s", index->path);
223 free(index->entries);
224 index->header = NULL;
225 index->entries = NULL;
231 int flag = PROT_READ;
234 flag = PROT_READ | PROT_WRITE;
235 size = index_file_size(index->count);
236 data = mmap(NULL, size, flag, MAP_SHARED, index->fd, 0);
237 if (data == MAP_FAILED) {
238 syswarn("tradindexed: cannot mmap %s", index->path);
241 index->header = (struct group_header *)(void *) data;
242 index->entries = (struct group_entry *)
243 (void *)(data + sizeof(struct group_header));
250 file_open_group_index(struct group_index *index, struct stat *st)
254 index->header = NULL;
255 open_mode = index->writable ? O_RDWR | O_CREAT : O_RDONLY;
256 index->fd = open(index->path, open_mode, ARTFILE_MODE);
258 syswarn("tradindexed: cannot open %s", index->path);
262 if (fstat(index->fd, st) < 0) {
263 syswarn("tradindexed: cannot fstat %s", index->path);
266 close_on_exec(index->fd, true);
270 if (index->fd >= 0) {
279 ** Given a group location, remap the index file if our existing mapping isn't
280 ** large enough to include that group. (This can be the case when another
281 ** writer is appending entries to the group index.)
284 index_maybe_remap(struct group_index *index, long loc)
290 if (loc < index->count)
293 /* Don't remap if remapping wouldn't actually help. */
294 r = fstat(index->fd, &st);
296 if (errno == ESTALE) {
298 if (!file_open_group_index(index, &st))
301 syswarn("tradindexed: cannot stat %s", index->path);
305 count = index_entry_count(st.st_size);
306 if (count < loc && index->header != NULL)
309 /* Okay, remapping will actually help. */
311 index->count = count;
312 return index_map(index);
317 ** Unmap the index file, either in preparation for closing the overview
318 ** method or to get ready to remap it. We warn about failures to munmap but
319 ** don't do anything about them; there isn't much that we can do.
322 index_unmap(struct group_index *index)
324 if (index->header == NULL)
326 if (!innconf->tradindexedmmap) {
328 free(index->entries);
330 if (munmap(index->header, index_file_size(index->count)) < 0)
331 syswarn("tradindexed: cannot munmap %s", index->path);
333 index->header = NULL;
334 index->entries = NULL;
339 ** Expand the group.index file to hold more entries; also used to build the
340 ** initial file. The caller is expected to lock the group index.
343 index_expand(struct group_index *index)
348 index->count += 1024;
349 if (ftruncate(index->fd, index_file_size(index->count)) < 0) {
350 syswarn("tradindexed: cannot expand %s", index->path);
354 /* If mapping the index fails, we've already extended it but we haven't
355 done anything with the new portion of the file. That means that it's
356 all zeroes, which means that it contains index entries who all think
357 their next entry is entry 0. We don't want to leave things in this
358 state (particularly if this was the first expansion of the index file,
359 in which case entry 0 points to entry 0 and our walking functions may
360 go into infinite loops. Undo the file expansion. */
361 if (!index_map(index)) {
362 index->count -= 1024;
363 if (ftruncate(index->fd, index_file_size(index->count)) < 0) {
364 syswarn("tradindexed: cannot shrink %s", index->path);
369 /* If the magic isn't right, assume this is a new index file. */
370 if (index->header->magic != TDX_MAGIC) {
371 index->header->magic = TDX_MAGIC;
372 index->header->freelist.recno = -1;
373 for (i = 0; i < TDX_HASH_SIZE; i++)
374 index->header->hash[i].recno = -1;
377 /* Walk the new entries back to front, adding them to the free list. */
378 for (i = index->count - 1; i >= index->count - 1024; i--) {
379 index->entries[i].next = index->header->freelist;
380 index->header->freelist.recno = i;
383 inn_mapcntl(index->header, index_file_size(index->count), MS_ASYNC);
389 ** Open the group.index file and allocate a new struct for it, returning a
390 ** pointer to that struct. Takes a bool saying whether or not the overview
391 ** should be opened for write.
394 tdx_index_open(bool writable)
396 struct group_index *index;
399 index = xmalloc(sizeof(struct group_index));
400 index->path = concatpath(innconf->pathoverview, "group.index");
401 index->writable = writable;
402 if (!file_open_group_index(index, &st)) {
405 if ((size_t) st.st_size > sizeof(struct group_header)) {
406 index->count = index_entry_count(st.st_size);
407 if (!index_map(index))
411 if (index->writable) {
413 warn("tradindexed: recreating truncated %s", index->path);
414 if (!index_expand(index))
417 index->header = NULL;
418 index->entries = NULL;
424 tdx_index_close(index);
430 ** Given a group name hash, return an index into the hash table in the
431 ** group.index header.
434 index_bucket(HASH hash)
438 memcpy(&bucket, &hash, sizeof(bucket));
439 return bucket % TDX_HASH_SIZE;
444 ** Given a pointer to a group entry, return its location number.
447 entry_loc(const struct group_index *index, const struct group_entry *entry)
449 return entry - index->entries;
454 ** Splice out a particular group entry. Takes the entry and a pointer to the
455 ** location where a pointer to it is stored.
458 entry_splice(struct group_entry *entry, int *parent)
460 *parent = entry->next.recno;
461 entry->next.recno = -1;
462 inn_mapcntl(parent, sizeof(*parent), MS_ASYNC);
467 ** Add a new entry to the appropriate hash chain.
470 index_add(struct group_index *index, struct group_entry *entry)
474 bucket = index_bucket(entry->hash);
475 loc = entry_loc(index, entry);
476 if (loc == index->header->hash[bucket].recno) {
477 warn("tradindexed: refusing to add a loop for %ld in bucket %ld",
481 entry->next.recno = index->header->hash[bucket].recno;
482 index->header->hash[bucket].recno = entry_loc(index, entry);
483 inn_mapcntl(&index->header->hash[bucket], sizeof(struct loc), MS_ASYNC);
484 inn_mapcntl(entry, sizeof(*entry), MS_ASYNC);
489 ** Find a group in the index file, returning the group number for that group
490 ** or -1 if the group can't be found.
493 index_find(struct group_index *index, const char *group)
498 if (index->header == NULL || index->entries == NULL)
500 hash = Hash(group, strlen(group));
501 if (innconf->nfsreader && !index_maybe_remap(index, LONG_MAX))
503 loc = index->header->hash[index_bucket(hash)].recno;
505 while (loc >= 0 && loc < index->count) {
506 struct group_entry *entry;
508 if (loc > index->count && !index_maybe_remap(index, loc))
510 entry = index->entries + loc;
511 if (entry->deleted == 0)
512 if (memcmp(&hash, &entry->hash, sizeof(hash)) == 0)
514 if (loc == entry->next.recno) {
515 syswarn("tradindexed: index loop for entry %ld", loc);
518 loc = entry->next.recno;
525 ** Add a given entry to the free list.
528 freelist_add(struct group_index *index, struct group_entry *entry)
530 entry->next.recno = index->header->freelist.recno;
531 index->header->freelist.recno = entry_loc(index, entry);
532 inn_mapcntl(&index->header->freelist, sizeof(struct loc), MS_ASYNC);
533 inn_mapcntl(entry, sizeof(*entry), MS_ASYNC);
538 ** Find an entry by hash value (rather than group name) and splice it out of
539 ** whatever chain it might belong to. This function is called by both
540 ** index_unlink and index_audit_group. Locking must be done by the caller.
541 ** Returns the group location of the spliced group.
544 index_unlink_hash(struct group_index *index, HASH hash)
549 parent = &index->header->hash[index_bucket(hash)].recno;
552 while (current >= 0 && current < index->count) {
553 struct group_entry *entry;
555 if (current > index->count && !index_maybe_remap(index, current))
557 entry = &index->entries[current];
558 if (entry->deleted == 0)
559 if (memcmp(&hash, &entry->hash, sizeof(hash)) == 0) {
560 entry_splice(entry, parent);
563 if (current == entry->next.recno) {
564 syswarn("tradindexed: index loop for entry %ld", current);
567 parent = &entry->next.recno;
575 ** Like index_find, but also removes that entry out of whatever chain it
576 ** might belong to. This function is called by tdx_index_delete. Locking
577 ** must be done by the caller.
580 index_unlink(struct group_index *index, const char *group)
584 hash = Hash(group, strlen(group));
585 return index_unlink_hash(index, hash);
590 ** Return the information stored about a given group in the group index.
593 tdx_index_entry(struct group_index *index, const char *group)
596 struct group_entry *entry;
598 loc = index_find(index, group);
601 entry = index->entries + loc;
602 if (innconf->tradindexedmmap && innconf->nfsreader)
603 inn_mapcntl(entry, sizeof *entry, MS_INVALIDATE);
609 ** Add a new newsgroup to the group.index file. Takes the newsgroup name,
610 ** its high and low water marks, and the newsgroup flag. Note that aliased
611 ** newsgroups are not currently handled. If the group already exists, just
612 ** update the flag (not the high and low water marks).
615 tdx_index_add(struct group_index *index, const char *group, ARTNUM low,
616 ARTNUM high, const char *flag)
620 struct group_entry *entry;
621 struct group_data *data;
623 if (!index->writable)
626 /* If the group already exists, update the flag as necessary and then
628 loc = index_find(index, group);
630 entry = &index->entries[loc];
631 if (entry->flag != *flag) {
633 inn_mapcntl(entry, sizeof(*entry), MS_ASYNC);
638 index_lock(index->fd, INN_LOCK_WRITE);
640 /* Find a free entry. If we don't have any free space, make some. */
641 if (index->header->freelist.recno == -1)
642 if (!index_expand(index)) {
643 index_lock(index->fd, INN_LOCK_UNLOCK);
646 loc = index->header->freelist.recno;
647 index->header->freelist.recno = index->entries[loc].next.recno;
648 inn_mapcntl(&index->header->freelist, sizeof(struct loc), MS_ASYNC);
650 /* Initialize the entry. */
651 entry = &index->entries[loc];
652 hash = Hash(group, strlen(group));
654 entry->low = (low == 0 && high != 0) ? high + 1 : low;
660 data = tdx_data_new(group, index->writable);
661 if (!tdx_data_open_files(data))
662 warn("tradindexed: unable to create data files for %s", group);
663 entry->indexinode = data->indexinode;
664 tdx_data_close(data);
665 index_add(index, entry);
667 index_lock(index->fd, INN_LOCK_UNLOCK);
673 ** Delete a group index entry.
676 tdx_index_delete(struct group_index *index, const char *group)
679 struct group_entry *entry;
681 if (!index->writable)
684 /* Lock the header for the entire operation, mostly as prevention against
685 interfering with ongoing audits (which lock while they're running). */
686 index_lock(index->fd, INN_LOCK_WRITE);
688 /* Splice out the entry and mark it as deleted. */
689 loc = index_unlink(index, group);
691 index_lock(index->fd, INN_LOCK_UNLOCK);
694 entry = &index->entries[loc];
695 entry->deleted = time(NULL);
696 HashClear(&entry->hash);
698 /* Add the entry to the free list. */
699 freelist_add(index, entry);
700 index_lock(index->fd, INN_LOCK_UNLOCK);
702 /* Delete the group data files for this group. */
703 tdx_data_delete(group, NULL);
710 ** Close an open handle to the group index file, freeing the group_index
711 ** structure at the same time. The argument to this function becomes invalid
715 tdx_index_close(struct group_index *index)
718 if (index->fd >= 0) {
728 ** Open the data files for a particular group. The interface to this has to
729 ** be in this file because we have to lock the group and retry if the inode
730 ** of the opened index file doesn't match the one recorded in the group index
731 ** file. Optionally take a pointer to the group index entry if the caller
732 ** has already gone to the work of finding it.
735 tdx_data_open(struct group_index *index, const char *group,
736 struct group_entry *entry)
738 struct group_data *data;
743 entry = tdx_index_entry(index, group);
747 offset = entry - index->entries;
748 data = tdx_data_new(group, index->writable);
750 /* Check to see if the inode of the index file matches. If it doesn't,
751 this probably means that as we were opening the index file, someone
752 else rewrote it (either expire or repack). Obtain a lock and try
753 again. If there's still a mismatch, go with what we get; there's some
756 This code is very sensitive to order and parallelism. See the comment
757 at the beginning of this file for methodology. */
758 if (!tdx_data_open_files(data))
762 if (entry->indexinode != data->indexinode) {
763 index_lock_group(index->fd, offset, INN_LOCK_READ);
764 if (!tdx_data_open_files(data)) {
765 index_lock_group(index->fd, offset, INN_LOCK_UNLOCK);
768 if (entry->indexinode != data->indexinode)
769 warn("tradindexed: index inode mismatch for %s", group);
772 index_lock_group(index->fd, offset, INN_LOCK_UNLOCK);
779 tdx_data_close(data);
785 ** Add an overview record for a particular article. Takes the group entry,
786 ** the open overview data structure, and the information about the article
787 ** and returns true on success, false on failure. This function calls
788 ** tdx_data_store to do most of the real work and then updates the index
792 tdx_data_add(struct group_index *index, struct group_entry *entry,
793 struct group_data *data, const struct article *article)
797 ptrdiff_t offset = entry - index->entries;
799 if (!index->writable)
801 index_lock_group(index->fd, offset, INN_LOCK_WRITE);
803 /* Make sure we have the most current data files and that we have the
804 right base article number. */
805 if (entry->indexinode != data->indexinode) {
806 if (!tdx_data_open_files(data))
808 if (entry->indexinode != data->indexinode)
809 warn("tradindexed: index inode mismatch for %s",
810 HashToText(entry->hash));
811 data->base = entry->base;
814 /* If the article number is too low to store in the group index, repack
815 the group with a lower base index. */
816 if (entry->base > article->number) {
817 if (!tdx_data_pack_start(data, article->number))
819 old_inode = entry->indexinode;
820 old_base = entry->base;
821 entry->indexinode = data->indexinode;
822 entry->base = data->base;
823 inn_mapcntl(entry, sizeof(*entry), MS_ASYNC);
824 if (!tdx_data_pack_finish(data)) {
825 entry->base = old_base;
826 entry->indexinode = old_inode;
827 inn_mapcntl(entry, sizeof(*entry), MS_ASYNC);
832 /* Store the data. */
833 if (!tdx_data_store(data, article))
835 if (entry->base == 0)
836 entry->base = data->base;
837 if (entry->low == 0 || entry->low > article->number)
838 entry->low = article->number;
839 if (entry->high < article->number)
840 entry->high = article->number;
842 inn_mapcntl(entry, sizeof(*entry), MS_ASYNC);
843 index_lock_group(index->fd, offset, INN_LOCK_UNLOCK);
847 index_lock_group(index->fd, offset, INN_LOCK_UNLOCK);
853 ** Start a rebuild of the group data for a newsgroup. Right now, all this
854 ** does is lock the group index entry.
857 tdx_index_rebuild_start(struct group_index *index, struct group_entry *entry)
861 offset = entry - index->entries;
862 return index_lock_group(index->fd, offset, INN_LOCK_WRITE);
867 ** Finish a rebuild of the group data for a newsgroup. Takes the old and new
868 ** entry and writes the data from the new entry into the group index, and
872 tdx_index_rebuild_finish(struct group_index *index, struct group_entry *entry,
873 struct group_entry *new)
878 new_inode = new->indexinode;
879 new->indexinode = entry->indexinode;
881 entry->indexinode = new_inode;
882 new->indexinode = new_inode;
883 inn_mapcntl(entry, sizeof(*entry), MS_ASYNC);
884 offset = entry - index->entries;
885 index_lock_group(index->fd, offset, INN_LOCK_UNLOCK);
891 ** Expire a single newsgroup. Most of the work is done by tdx_data_expire*,
892 ** but this routine has the responsibility to do locking (the same as would
893 ** be done for repacking, since the group base may change) and updating the
897 tdx_expire(const char *group, ARTNUM *low, struct history *history)
899 struct group_index *index;
900 struct group_entry *entry;
901 struct group_entry new_entry;
902 struct group_data *data = NULL;
907 index = tdx_index_open(true);
910 entry = tdx_index_entry(index, group);
912 tdx_index_close(index);
915 tdx_index_rebuild_start(index, entry);
917 /* tdx_data_expire_start builds the new IDX and DAT files and fills in the
918 struct group_entry that was passed to it. tdx_data_rebuild_finish does
919 the renaming of the new files to the final file names. */
924 data = tdx_data_open(index, group, entry);
927 if (!tdx_data_expire_start(group, data, &new_entry, history))
929 old_inode = entry->indexinode;
930 old_base = entry->base;
931 entry->indexinode = new_entry.indexinode;
932 entry->base = new_entry.base;
933 inn_mapcntl(entry, sizeof(*entry), MS_ASYNC);
934 tdx_data_close(data);
935 if (!tdx_data_rebuild_finish(group)) {
936 entry->base = old_base;
937 entry->indexinode = old_inode;
938 inn_mapcntl(entry, sizeof(*entry), MS_ASYNC);
942 /* Almost done. Update the group index. If there are no articles in the
943 group, the low water mark should be one more than the high water
945 if (new_entry.low == 0)
946 new_entry.low = new_entry.high + 1;
947 tdx_index_rebuild_finish(index, entry, &new_entry);
950 tdx_index_close(index);
954 offset = entry - index->entries;
955 index_lock_group(index->fd, offset, INN_LOCK_UNLOCK);
957 tdx_data_close(data);
958 tdx_index_close(index);
964 ** RECOVERY AND AUDITING
966 ** All code below this point is not used in the normal operations of the
967 ** overview method. Instead, it's code to dump various data structures or
968 ** audit them for consistency, used by recovery tools and inspection tools.
971 /* Holds a newsgroup name and its hash, used to form a hash table mapping
972 newsgroup hash values to the actual names. */
979 /* Holds information needed by hash traversal functions. Right now, this is
980 just the pointer to the group index and a flag saying whether to fix
983 struct group_index *index;
989 ** Hash table functions for the mapping from group hashes to names.
992 hashmap_hash(const void *entry)
995 const struct hashmap *group = entry;
997 memcpy(&hash, &group->hash, sizeof(hash));
1003 hashmap_key(const void *entry)
1005 return &((const struct hashmap *) entry)->hash;
1010 hashmap_equal(const void *key, const void *entry)
1012 const HASH *first = key;
1015 second = &((const struct hashmap *) entry)->hash;
1016 return memcmp(first, second, sizeof(HASH)) == 0;
1021 hashmap_delete(void *entry)
1023 struct hashmap *group = entry;
1031 ** Construct a hash table of group hashes to group names by scanning the
1032 ** active file. Returns the constructed hash table.
1034 static struct hash *
1039 char *activepath, *line;
1040 struct cvector *data = NULL;
1043 struct hashmap *group;
1046 activepath = concatpath(innconf->pathdb, _PATH_ACTIVE);
1047 active = QIOopen(activepath);
1051 if (fstat(QIOfileno(active), &st) < 0)
1052 hash_size = 32 * 1024;
1054 hash_size = st.st_size / 30;
1055 hash = hash_create(hash_size, hashmap_hash, hashmap_key, hashmap_equal,
1058 line = QIOread(active);
1059 while (line != NULL) {
1060 data = cvector_split_space(line, data);
1061 if (data->count != 4) {
1062 warn("tradindexed: malformed active file line %s", line);
1065 group = xmalloc(sizeof(struct hashmap));
1066 group->name = xstrdup(data->strings[0]);
1067 group->flag = data->strings[3][0];
1068 grouphash = Hash(group->name, strlen(group->name));
1069 memcpy(&group->hash, &grouphash, sizeof(HASH));
1070 hash_insert(hash, &group->hash, group);
1071 line = QIOread(active);
1080 ** Print the stored information about a single group in human-readable form
1081 ** to stdout. The format is:
1083 ** name high low base count flag deleted inode
1085 ** all on one line. Name is passed into this function.
1088 tdx_index_print(const char *name, const struct group_entry *entry,
1091 fprintf(output, "%s %lu %lu %lu %lu %c %lu %lu\n", name, entry->high,
1092 entry->low, entry->base, (unsigned long) entry->count,
1093 entry->flag, (unsigned long) entry->deleted,
1094 (unsigned long) entry->indexinode);
1099 ** Dump the complete contents of the group.index file in human-readable form
1100 ** to the specified file, one line per group.
1103 tdx_index_dump(struct group_index *index, FILE *output)
1107 struct group_entry *entry;
1108 struct hash *hashmap;
1109 struct hashmap *group;
1112 if (index->header == NULL || index->entries == NULL)
1114 hashmap = hashmap_load();
1115 for (bucket = 0; bucket < TDX_HASH_SIZE; bucket++) {
1116 current = index->header->hash[bucket].recno;
1117 while (current != -1) {
1118 if (!index_maybe_remap(index, current))
1120 entry = index->entries + current;
1122 if (hashmap != NULL) {
1123 group = hash_lookup(hashmap, &entry->hash);
1128 name = HashToText(entry->hash);
1129 tdx_index_print(name, entry, output);
1130 if (current == entry->next.recno) {
1131 warn("tradindexed: index loop for entry %ld", current);
1134 current = entry->next.recno;
1137 if (hashmap != NULL)
1143 ** Audit a particular group entry location to ensure that it points to a
1144 ** valid entry within the group index file. Takes a pointer to the location,
1145 ** the number of the location, a pointer to the group entry if any (if not,
1146 ** the location is assumed to be part of the header hash table), and a flag
1147 ** saying whether to fix problems that are found.
1150 index_audit_loc(struct group_index *index, int *loc, long number,
1151 struct group_entry *entry, bool fix)
1155 if (*loc >= index->count) {
1156 warn("tradindexed: out of range index %d in %s %ld",
1157 *loc, (entry == NULL ? "bucket" : "entry"), number);
1160 if (*loc < 0 && *loc != -1) {
1161 warn("tradindexed: invalid negative index %d in %s %ld",
1162 *loc, (entry == NULL ? "bucket" : "entry"), number);
1165 if (entry != NULL && *loc == number) {
1166 warn("tradindexed: index loop for entry %ld", number);
1172 inn_mapcntl(loc, sizeof(*loc), MS_ASYNC);
1178 ** Check an entry to see if it was actually deleted. Make sure that all the
1179 ** information is consistent with a deleted group if it's not and the fix
1183 index_audit_deleted(struct group_entry *entry, long number, bool fix)
1185 if (entry->deleted != 0 && !HashEmpty(entry->hash)) {
1186 warn("tradindexed: entry %ld has a delete time but a non-zero hash",
1189 HashClear(&entry->hash);
1190 inn_mapcntl(entry, sizeof(*entry), MS_ASYNC);
1197 ** Audit the group header for any inconsistencies. This checks the
1198 ** reachability of all of the group entries, makes sure that deleted entries
1199 ** are on the free list, and otherwise checks the linked structure of the
1200 ** whole file. The data in individual entries is not examined. If the
1201 ** second argument is true, also attempt to fix inconsistencies.
1204 index_audit_header(struct group_index *index, bool fix)
1206 long bucket, current;
1207 struct group_entry *entry;
1211 /* First, walk all of the regular hash buckets, making sure that all of
1212 the group location pointers are valid and sane, that all groups that
1213 have been deleted are correctly marked as such, and that all groups are
1214 in their correct hash chain. Build reachability information as we go,
1215 used later to ensure that all group entries are reachable. */
1216 reachable = xcalloc(index->count, sizeof(bool));
1217 for (bucket = 0; bucket < TDX_HASH_SIZE; bucket++) {
1218 parent = &index->header->hash[bucket].recno;
1219 index_audit_loc(index, parent, bucket, NULL, fix);
1221 while (current >= 0 && current < index->count) {
1222 entry = &index->entries[current];
1223 next = &entry->next.recno;
1224 if (entry->deleted == 0 && bucket != index_bucket(entry->hash)) {
1225 warn("tradindexed: entry %ld is in bucket %ld instead of its"
1226 " correct bucket %ld", current, bucket,
1227 index_bucket(entry->hash));
1229 entry_splice(entry, parent);
1233 if (reachable[current])
1234 warn("tradindexed: entry %ld is reachable from multiple"
1236 reachable[current] = true;
1238 index_audit_deleted(entry, current, fix);
1239 index_audit_loc(index, &entry->next.recno, current, entry, fix);
1240 if (entry->deleted != 0) {
1241 warn("tradindexed: entry %ld is deleted but not in the free"
1244 entry_splice(entry, parent);
1246 reachable[current] = false;
1249 if (*next == current)
1256 /* Now, walk the free list. Make sure that each group in the free list is
1257 actually deleted, and update the reachability information. */
1258 index_audit_loc(index, &index->header->freelist.recno, 0, NULL, fix);
1259 parent = &index->header->freelist.recno;
1261 while (current >= 0 && current < index->count) {
1262 entry = &index->entries[current];
1263 index_audit_deleted(entry, current, fix);
1264 reachable[current] = true;
1265 if (!HashEmpty(entry->hash) && entry->deleted == 0) {
1266 warn("tradindexed: undeleted entry %ld in free list", current);
1268 entry_splice(entry, parent);
1269 reachable[current] = false;
1272 index_audit_loc(index, &entry->next.recno, current, entry, fix);
1273 if (entry->next.recno == current)
1275 parent = &entry->next.recno;
1279 /* Finally, check all of the unreachable entries and if fix is true, try
1280 to reattach them in the appropriate location. */
1281 for (current = 0; current < index->count; current++)
1282 if (!reachable[current]) {
1283 warn("tradindexed: unreachable entry %ld", current);
1285 entry = &index->entries[current];
1286 if (!HashEmpty(entry->hash) && entry->deleted == 0)
1287 index_add(index, entry);
1289 HashClear(&entry->hash);
1291 freelist_add(index, entry);
1302 ** Audit a particular group entry for any inconsistencies. This doesn't
1303 ** check any of the structure, or whether the group is deleted, just the data
1304 ** as stored in the group data files (mostly by calling tdx_data_audit to do
1305 ** the real work). Note that while the low water mark may be updated, the
1306 ** high water mark is left unchanged.
1309 index_audit_group(struct group_index *index, struct group_entry *entry,
1310 struct hash *hashmap, bool fix)
1312 struct hashmap *group;
1315 offset = entry - index->entries;
1316 index_lock_group(index->fd, offset, INN_LOCK_WRITE);
1317 group = hash_lookup(hashmap, &entry->hash);
1318 if (group == NULL) {
1319 warn("tradindexed: group %ld not found in active file",
1320 entry_loc(index, entry));
1322 index_unlink_hash(index, entry->hash);
1323 HashClear(&entry->hash);
1324 entry->deleted = time(NULL);
1325 freelist_add(index, entry);
1328 if (entry->flag != group->flag) {
1329 entry->flag = group->flag;
1330 inn_mapcntl(entry, sizeof(*entry), MS_ASYNC);
1332 tdx_data_audit(group->name, entry, fix);
1334 index_lock_group(index->fd, offset, INN_LOCK_UNLOCK);
1339 ** Check to be sure that a given group exists in the overview index, and if
1340 ** missing, adds it. Assumes that the index isn't locked, since it calls the
1341 ** normal functions for adding new groups (this should only be called after
1342 ** the index has already been repaired, for the same reason). Called as a
1343 ** hash traversal function, walking the hash table of groups from the active
1347 index_audit_active(void *value, void *cookie)
1349 struct hashmap *group = value;
1350 struct audit_data *data = cookie;
1351 struct group_entry *entry;
1353 entry = tdx_index_entry(data->index, group->name);
1354 if (entry == NULL) {
1355 warn("tradindexed: group %s missing from overview", group->name);
1357 tdx_index_add(data->index, group->name, 0, 0, &group->flag);
1363 ** Audit the group index for any inconsistencies. If the argument is true,
1364 ** also attempt to fix those inconsistencies.
1367 tdx_index_audit(bool fix)
1369 struct group_index *index;
1373 struct hash *hashmap;
1375 struct group_entry *entry;
1376 struct audit_data data;
1378 index = tdx_index_open(true);
1382 /* Keep a lock on the header through the whole audit process. This will
1383 stall any newgroups or rmgroups, but not normal article reception. We
1384 don't want the structure of the group entries changing out from under
1385 us, although we don't mind if the data does until we're validating that
1386 particular group. */
1387 index_lock(index->fd, INN_LOCK_WRITE);
1389 /* Make sure the size looks sensible. */
1390 if (fstat(index->fd, &st) < 0) {
1391 syswarn("tradindexed: cannot fstat %s", index->path);
1394 count = index_entry_count(st.st_size);
1395 expected = index_file_size(count);
1396 if (expected != st.st_size) {
1397 syswarn("tradindexed: %ld bytes of trailing trash in %s",
1398 (unsigned long) (st.st_size - expected), index->path);
1400 if (ftruncate(index->fd, expected) < 0)
1401 syswarn("tradindexed: cannot truncate %s", index->path);
1403 index_maybe_remap(index, count);
1405 /* Okay everything is now mapped and happy. Validate the header. */
1406 index_audit_header(index, fix);
1407 index_lock(index->fd, INN_LOCK_UNLOCK);
1409 /* Walk all the group entries and check them individually. To do this, we
1410 need to map hashes to group names, so load a hash of the active file to
1411 do that resolution. */
1412 hashmap = hashmap_load();
1415 hash_traverse(hashmap, index_audit_active, &data);
1416 for (bucket = 0; bucket < index->count; bucket++) {
1417 entry = &index->entries[bucket];
1418 if (HashEmpty(entry->hash) || entry->deleted != 0)
1420 index_audit_group(index, entry, hashmap, fix);
1422 if (hashmap != NULL)