1 /* $Id: caf.c 6723 2004-05-16 21:12:53Z rra $
3 ** Library routines needed for handling CAF (Crunched Article Files)
4 ** Written by Richard Todd (rmtodd@mailhost.ecn.uoknor.edu) 3/24/96,
5 ** modified extensively since then. Altered to work with storage manager
6 ** in INN1.8 by rmtodd 3/27/98.
21 /* following code lifted from inndf.c */
24 #include <sys/statvfs.h> /* specific includes */
25 /* XXX is there a 'fstatvfs'? I don't have such a system to check--rmtodd*/
26 #define STATFUNCT fstatvfs /* function call */
27 #define STATSTRUC statvfs /* structure name */
28 #define STATAVAIL f_bavail /* blocks available */
29 #define STATMULTI f_frsize /* fragment size/block size */
30 #define STATINODE f_favail /* inodes available */
31 #define STATTYPES u_long /* type of f_bavail etc */
32 #define STATFORMT "%lu" /* format string to match */
33 #define STATFORMTPAD "%*lu" /* format string to match */
34 #endif /* HAVE_STATVFS */
39 #endif /* HAVE_SYS_VFS_H */
40 #ifdef HAVE_SYS_PARAM_H
41 #include <sys/param.h>
42 #endif /* HAVE_SYS_PARAM_H */
43 #ifdef HAVE_SYS_MOUNT_H
44 #include <sys/mount.h>
45 #endif /* HAVE_SYS_MOUNT_H */
46 #define STATFUNCT fstatfs
47 #define STATSTRUC statfs
48 #define STATAVAIL f_bavail
49 #define STATMULTI f_bsize
50 #define STATINODE f_ffree;
51 #define STATTYPES long
52 #define STATFORMT "%ld"
53 #define STATFORMTPAD "%*ld"
54 #endif /* HAVE_STATFS */
56 int CAFClean(char *path, int verbose, double PercentFreeThreshold);
61 /* check assertions in code (lifted from lib/malloc.c) */
62 #define ASSERT(p) do { if (!(p)) botch(__FILE__, __LINE__, #p); } while (0)
65 botch(const char *f, int l, const char *s)
68 fprintf(stderr, "assertion botched: %s:%d:%s\n", f,l,s);
69 fflush(stderr); /* if stderr writing to file--needed? */
74 /* set error code appropriately. */
79 if (caf_error == CAF_ERR_IO) {
85 ** Wrapper around read that calls CAFError if needed. 0 for success, -1 for failure.
89 OurRead(int fd, void *buf, size_t n)
93 rval = read(fd, buf, n);
98 if ((size_t) rval < n) {
99 /* not enough data! */
100 CAFError(CAF_ERR_BADFILE);
106 /* Same as OurRead except for writes. */
108 OurWrite(int fd, const void *buf, size_t n)
112 rval = write(fd, buf, n);
114 CAFError(CAF_ERR_IO);
117 if ((size_t) rval < n) {
118 /* not enough data written */
119 CAFError(CAF_ERR_IO);
126 ** Given an fd, read in a CAF_HEADER from a file. Ret. 0 on success.
130 CAFReadHeader(int fd, CAFHEADER *h)
132 /* probably already at start anyway, but paranoia is good. */
133 if (lseek(fd, 0L, SEEK_SET) < 0) {
134 CAFError(CAF_ERR_IO);
138 if (OurRead(fd, h, sizeof(CAFHEADER)) < 0) return -1;
140 if (strncmp(h->Magic, CAF_MAGIC, CAF_MAGIC_LEN) != 0) {
141 CAFError(CAF_ERR_BADFILE);
148 ** Seek to the TOC entry for a given article. As usual, -1 for error, 0 succ.
152 CAFSeekTOCEnt(int fd, CAFHEADER *head, ARTNUM art)
156 offset = sizeof(CAFHEADER) + head->FreeZoneTabSize;
157 offset += (art - head->Low) * sizeof(CAFTOCENT);
158 if (lseek(fd, offset, SEEK_SET) < 0) {
159 CAFError(CAF_ERR_IO);
166 ** Fetch the TOC entry for a given article. As usual -1 for error, 0 success */
169 CAFGetTOCEnt(int fd, CAFHEADER *head, ARTNUM art, CAFTOCENT *tocp)
171 if (CAFSeekTOCEnt(fd, head, art) < 0) {
175 if (OurRead(fd, tocp, sizeof(CAFTOCENT)) < 0) return -1;
181 ** Round an offset up to the next highest block boundary. Needs the CAFHEADER
182 ** to find out what the blocksize is.
185 CAFRoundOffsetUp(off_t off, unsigned int blocksize)
189 /* Zero means default blocksize, though we shouldn't need this for long,
190 as all new CAF files will have BlockSize set. */
191 if (blocksize == 0) {
192 blocksize = CAF_DEFAULT_BLOCKSIZE;
195 off2 = ((off + blocksize - 1) / blocksize) * blocksize;
200 ** Dispose of an already-allocated CAFBITMAP.
203 CAFDisposeBitmap(CAFBITMAP *bm)
208 for (i = 0 ; i < bm->NumBMB ; ++i) {
211 if (bmb->BMBBits) free(bmb->BMBBits);
221 ** Read the index bitmap from a CAF file, return a CAFBITMAP structure.
224 /* define this instead of littering all our formulas with semi-mysterious 8s. */
228 CAFReadFreeBM(int fd, CAFHEADER *h)
234 if (lseek(fd, sizeof(CAFHEADER), SEEK_SET) < 0) {
235 CAFError(CAF_ERR_IO);
238 bm = xmalloc(sizeof(CAFBITMAP));
240 bm->FreeZoneTabSize = h->FreeZoneTabSize;
241 bm->FreeZoneIndexSize = h->FreeZoneIndexSize;
242 bm->NumBMB = BYTEWIDTH * bm->FreeZoneIndexSize;
243 bm->BytesPerBMB = (h->BlockSize) * (h->BlockSize * BYTEWIDTH);
244 bm->BlockSize = h->BlockSize;
246 bm->Blocks = xmalloc(bm->NumBMB * sizeof(CAFBMB *));
247 bm->Bits = xmalloc(bm->FreeZoneIndexSize);
248 for (i = 0 ; i < bm->NumBMB ; ++i) {
249 bm->Blocks[i] = NULL;
252 if (OurRead(fd, bm->Bits, bm->FreeZoneIndexSize) < 0) {
253 CAFDisposeBitmap(bm);
257 bm->StartDataBlock = h->StartDataBlock;
259 if (fstat(fd, &statbuf) < 0) {
260 /* it'd odd for this to fail, but paranoia is good for the soul. */
261 CAFError(CAF_ERR_IO);
262 CAFDisposeBitmap(bm);
265 /* round st_size down to a mult. of BlockSize */
266 bm->MaxDataBlock = (statbuf.st_size / bm->BlockSize) * bm->BlockSize + bm->BlockSize;
267 /* (note: MaxDataBlock points to the block *after* the last block of the file. */
272 ** Fetch a given bitmap block into memory, and make the CAFBITMAP point to
273 ** the new BMB appropriately. Return NULL on failure, and the BMB * on success.
276 CAFFetchBMB(unsigned int blkno, int fd, CAFBITMAP *bm)
280 ASSERT(blkno < bm->NumBMB);
281 /* if already in memory, don't need to do anything. */
282 if (bm->Blocks[blkno]) return bm->Blocks[blkno];
284 newbmb = xmalloc(sizeof(CAFBMB));
287 newbmb->StartDataBlock = bm->StartDataBlock + blkno*(bm->BytesPerBMB);
289 newbmb->MaxDataBlock = newbmb->StartDataBlock + bm->BytesPerBMB;
290 if (newbmb->MaxDataBlock > bm->MaxDataBlock) {
291 /* limit the per-BMB MaxDataBlock to that for the bitmap as a whole */
292 newbmb->MaxDataBlock = bm->MaxDataBlock;
295 newbmb->BMBBits = xmalloc(bm->BlockSize);
297 if (lseek(fd, (blkno + 1) * bm->BlockSize, SEEK_SET) < 0) {
298 free(newbmb->BMBBits);
300 CAFError(CAF_ERR_IO);
304 if (OurRead(fd, newbmb->BMBBits, bm->BlockSize) < 0) {
305 free(newbmb->BMBBits);
310 bm->Blocks[blkno] = newbmb;
315 ** Flush out (if needed) a BMB to disk. Return 0 on success, -1 on failure.
319 CAFFlushBMB(unsigned int blkno, int fd, CAFBITMAP *bm)
323 ASSERT(blkno < bm->NumBMB);
325 if (bm->Blocks[blkno] == NULL) return 0; /* nothing to do. */
327 bmb = bm->Blocks[blkno];
328 if (!bmb->Dirty) return 0;
330 if (lseek(fd, (blkno + 1) * bm->BlockSize, SEEK_SET) < 0) {
331 CAFError(CAF_ERR_IO);
335 if (OurWrite(fd, bmb->BMBBits, bm->BlockSize) < 0) return -1;
343 ** Write the free bit map to the CAF file. Return 0 on success, -1 on failure.
346 CAFWriteFreeBM(int fd, CAFBITMAP *bm)
350 for (blkno = 0 ; blkno < bm->NumBMB ; ++blkno) {
351 if (CAFFlushBMB(blkno, fd, bm) < 0) {
356 if (lseek(fd, sizeof(CAFHEADER), SEEK_SET) < 0) {
357 CAFError(CAF_ERR_IO);
361 if(OurWrite(fd, bm->Bits, bm->FreeZoneIndexSize) < 0) return -1;
367 ** Determine if a block at a given offset is free. Return 1 if it is, 0
372 CAFIsBlockFree(CAFBITMAP *bm, int fd, off_t block)
379 /* round block down to BlockSize boundary. */
380 block = block - (block % bm->BlockSize);
382 /* if < Start, always return 0 (should never happen in real usage) */
383 if (block < bm->StartDataBlock) return 0;
385 /* if off the end, also return 0. */
386 if (block >= bm->MaxDataBlock) return 0;
388 /* find blk # of appropriate BMB */
389 blkno = (block - bm->StartDataBlock) / bm->BytesPerBMB;
391 bmb = CAFFetchBMB(blkno, fd, bm);
392 /* ick. not a lot we can do here if this fails. */
393 if (bmb == NULL) return 0;
395 /* Sanity checking that we have the right BMB. */
396 ASSERT(block >= bmb->StartDataBlock);
397 ASSERT(block < bmb->MaxDataBlock);
399 ind = ((block - bmb->StartDataBlock) / bm->BlockSize) / BYTEWIDTH;
400 mask = 1 << (((block - bmb->StartDataBlock) / bm->BlockSize) % BYTEWIDTH);
402 ASSERT(ind < bm->BlockSize);
404 return ((bmb->BMBBits[ind]) & mask) != 0;
408 ** Check if a bitmap chunk is all zeros or not.
411 IsMapAllZero(char *data, int len)
414 for (i = 0 ; i < len ; ++i) {
415 if (data[i] != 0) return 0;
420 /* Set the free bitmap entry for a given block to be a given value (1 or 0). */
422 CAFSetBlockFree(CAFBITMAP *bm, int fd, off_t block, int isfree)
430 /* round block down to BlockSize boundary. */
431 block = block - (block % bm->BlockSize);
433 /* if < Start, always return (should never happen in real usage) */
434 if (block < bm->StartDataBlock) return;
436 /* if off the end, also return. */
437 if (block >= bm->MaxDataBlock) return;
438 /* find blk # of appropriate BMB */
439 blkno = (block - bm->StartDataBlock) / bm->BytesPerBMB;
441 bmb = CAFFetchBMB(blkno, fd, bm);
442 /* ick. not a lot we can do here if this fails. */
443 if (bmb == NULL) return;
445 /* Sanity checking that we have the right BMB. */
446 ASSERT(block >= bmb->StartDataBlock);
447 ASSERT(block < bmb->MaxDataBlock);
449 ind = ((block - bmb->StartDataBlock) / bm->BlockSize) / BYTEWIDTH;
450 mask = 1 << (((block - bmb->StartDataBlock) / bm->BlockSize) % BYTEWIDTH);
452 ASSERT(ind < bm->BlockSize);
455 bmb->BMBBits[ind] |= mask; /* set bit */
457 bmb->BMBBits[ind] &= ~mask; /* clear bit. */
462 /* now have to set top level (index) bitmap appropriately */
463 allzeros = IsMapAllZero(bmb->BMBBits, bm->BlockSize);
465 ind = blkno/BYTEWIDTH;
466 mask = 1 << (blkno % BYTEWIDTH);
469 bm->Bits[ind] &= ~mask; /* clear bit */
471 bm->Bits[ind] |= mask;
478 ** Search a freebitmap to find n contiguous free blocks. Returns 0 for
479 ** failure, offset of starting block if successful.
480 ** XXX does not attempt to find chunks that span BMB boundaries. This is
482 ** (Actually I think this case works, as does the case when it tries to find
483 ** a block bigger than BytesPerBMB. Testing reveals that it does seem to work,
484 ** though not optimally (some BMBs will get scanned several times).
487 CAFFindFreeBlocks(CAFBITMAP *bm, int fd, unsigned int n)
489 off_t startblk, curblk;
490 unsigned int i, ind, blkno, j;
491 unsigned int bmblkno, k, l;
494 /* Iterate over all bytes and all bits in the toplevel bitmap. */
495 for (k = 0 ; k < bm->FreeZoneIndexSize ; ++k) {
496 if (bm->Bits[k] == 0) continue;
497 for (l = 0; l < BYTEWIDTH ; ++l) {
498 if ((bm->Bits[k] & (1 << l)) != 0) {
499 /* found a bit set! fetch the BMB. */
500 bmblkno = k*BYTEWIDTH + l;
501 bmb = CAFFetchBMB(bmblkno, fd, bm);
502 if (bmb == NULL) return 0;
504 curblk = bmb->StartDataBlock;
505 while (curblk < bmb->MaxDataBlock) {
506 blkno = (curblk - bmb->StartDataBlock)/(bm->BlockSize);
507 ind = blkno/BYTEWIDTH;
508 if (bmb->BMBBits[ind] == 0) {
509 /* nothing set in this byte, skip this byte and move on. */
510 blkno = (ind+1)*BYTEWIDTH;
511 curblk = blkno*bm->BlockSize + bmb->StartDataBlock;
515 /* scan rest of current byte for 1 bits */
516 for (j = blkno % BYTEWIDTH ; j < BYTEWIDTH ; j++, curblk += bm->BlockSize) {
517 if ((bmb->BMBBits[ind] & (1 << j)) != 0) break;
519 if (j == BYTEWIDTH) continue;
521 /* found a 1 bit, set startblk to be locn of corresponding free blk. */
523 curblk += bm->BlockSize;
525 /* scan for n blocks in a row. */
526 for (i = 1 ; i < n ; ++i, curblk += bm->BlockSize) {
527 if (!CAFIsBlockFree(bm, fd, curblk)) break;
530 if (i == n) return startblk;
532 /* otherwise curblk points to a non-free blk, continue searching from there. */
542 ** Open a CAF file for reading and seek to the start of a given article.
543 ** Take as args the CAF file pathname, article #, and a pointer to where
544 ** the art. length can be returned.
548 CAFOpenArtRead(const char *path, ARTNUM art, size_t *len)
555 if ( (fd = open(path, O_RDONLY)) < 0) {
557 ** if ENOENT (not there), just call this "article not found",
558 ** otherwise it's a more serious error and stash the errno.
560 if (errno == ENOENT) {
561 CAFError(CAF_ERR_ARTNOTHERE);
563 CAFError(CAF_ERR_IO);
568 /* Fetch the header */
569 if (CAFReadHeader(fd, &head) < 0) {
574 /* Is the requested article even in the file? */
575 if (art < head.Low || art > head.High) {
576 CAFError(CAF_ERR_ARTNOTHERE);
581 if (CAFGetTOCEnt(fd, &head, art, &tocent) < 0) {
586 if (tocent.Size == 0) {
587 /* empty/otherwise not present article */
588 CAFError(CAF_ERR_ARTNOTHERE);
593 if (lseek(fd, tocent.Offset, SEEK_SET) < 0) {
594 CAFError(CAF_ERR_IO);
599 /* I'm not sure if this fstat is worth the speed hit, but unless we check
600 here, we may simply segfault when we try to access mmap'd space beyond
601 the end of the file. I think robustness wins. */
602 if (fstat(fd, &st) == 0)
603 if (tocent.Size > st.st_size - tocent.Offset) {
604 CAFError(CAF_ERR_IO);
614 ** variables for keeping track of currently pending write.
615 ** FIXME: assumes only one article open for writing at a time.
618 static int CAF_fd_write;
619 static ARTNUM CAF_artnum_write;
620 static off_t CAF_startoffset_write;
621 static CAFHEADER CAF_header_write;
622 static CAFBITMAP *CAF_free_bitmap_write;
623 static unsigned int CAF_numblks_write;
626 ** Given estimated size of CAF file (i.e., the size of the old CAF file found
627 ** by cafclean), find an "optimal" blocksize (one big enough so that the
628 ** default FreeZoneTabSize can cover the entire
629 ** file so that we don't "lose" free space and not be able to reuse it.
630 ** (Currently only returns CAF_DEFAULT_BLOCKSIZE, as with the new 2-level
631 ** bitmaps, the FreeZoneTabSize that results from a 512-byte blocksize can
632 ** handle any newsgroup with <7.3G of data. Yow!)
636 CAFFindOptimalBlocksize(ARTNUM tocsize UNUSED, size_t cfsize)
639 if (cfsize == 0) return CAF_DEFAULT_BLOCKSIZE; /* no size given, use default. */
641 return CAF_DEFAULT_BLOCKSIZE;
645 ** Create an empty CAF file. Used by CAFOpenArtWrite.
646 ** Must be careful here and create the new CAF file under a temp name and then
647 ** link it into place, to avoid possible race conditions.
648 ** Note: CAFCreateCAFFile returns fd locked, also to avoid race conds.
649 ** New args added for benefit of the cleaner program: "nolink", a flag that
650 ** tells it not to bother with the link business, and "temppath", a pointer
651 ** to a buffer that (if non-null) gets the pathname of the temp file copied
652 ** to it. "estcfsize", if nonzero, is an estimate of what the CF filesize will
653 ** be, used to automatically select a good blocksize.
656 CAFCreateCAFFile(char *cfpath, ARTNUM artnum, ARTNUM tocsize,
657 size_t estcfsize, int nolink, char *temppath)
661 char path[SPOOLNAMEBUFF];
662 char finalpath[SPOOLNAMEBUFF];
666 strlcpy(finalpath, cfpath, sizeof(finalpath));
667 snprintf(path, sizeof(path), "%s.%d", cfpath, getpid());/* create path with PID attached */
669 ** Shouldn't be anyone else with our pid trying to write to the temp.
670 ** file, but there might be an old one lying around. Nuke it.
671 ** (yeah, I'm probably being overly paranoid.)
673 if (unlink(path) < 0 && errno != ENOENT) {
674 CAFError(CAF_ERR_IO);
677 if ((fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0666)) < 0) {
678 CAFError(CAF_ERR_IO);
682 /* Initialize the header. */
683 strncpy(head.Magic, CAF_MAGIC, CAF_MAGIC_LEN);
686 head.NumSlots = tocsize;
688 head.BlockSize = CAFFindOptimalBlocksize(tocsize, estcfsize);
689 head.FreeZoneIndexSize = head.BlockSize - sizeof(CAFHEADER);
690 head.FreeZoneTabSize = head.FreeZoneIndexSize
691 + head.BlockSize*head.FreeZoneIndexSize*BYTEWIDTH;
692 head.StartDataBlock = CAFRoundOffsetUp(sizeof(CAFHEADER)
693 + head.FreeZoneTabSize + tocsize*sizeof(CAFTOCENT), head.BlockSize);
695 head.spare[0] = head.spare[1] = head.spare[2] = 0;
697 if (OurWrite(fd, &head, sizeof(head)) < 0) {
702 offset = sizeof(CAFHEADER) + head.FreeZoneTabSize +
703 sizeof(CAFTOCENT) * tocsize;
705 if (lseek(fd, offset, SEEK_SET) < 0) {
706 CAFError(CAF_ERR_IO);
710 ** put a null after the TOC as a 'placeholder', so that we'll have a sparse
711 ** file and that EOF will be at where the articles should start going.
714 if (OurWrite(fd, nulls, 1) < 0) {
718 /* shouldn't be anyone else locking our file, since temp file has unique
719 PID-based name ... */
720 if (!inn_lock_file(fd, INN_LOCK_WRITE, false)) {
721 CAFError(CAF_ERR_IO);
727 if (temppath != NULL) {
728 strcpy(temppath, path);
734 ** Try to link to the real one. NOTE: we may get EEXIST here, which we
735 ** will handle specially in OpenArtWrite.
737 if (link(path, finalpath) < 0) {
738 CAFError(CAF_ERR_IO);
739 /* bounced on the link attempt, go ahead and unlink the temp file and return. */
745 ** Unlink the temp. link. Do we really care if this fails? XXX
746 ** Not sure what we can do anyway.
753 ** Try to open a CAF file for writing a given article. Return an fd to
754 ** write to (already positioned to the right place to write at) if successful,
755 ** else -1 on error. if LockFlag is true, we wait for a lock on the file,
756 ** otherwise we fail if we can't lock it. If size is != 0, we try to allocate
757 ** a chunk from free space in the CAF instead of writing at the end of the
758 ** file. Artp is a pointer to the article number to use; if the article number
759 ** is zero, the next free article # ("High"+1) will be used, and *artp will
760 ** be set accordingly. Once the CAF file is open/created, CAFStartWriteFd()
761 ** does the remaining dirty work.
765 CAFOpenArtWrite(char *path, ARTNUM *artp, int waitlock, size_t size)
770 /* try to open the file and lock it. */
771 if ((fd = open(path, O_RDWR)) < 0) {
772 /* if ENOENT, try creating CAF file, otherwise punt. */
773 if (errno != ENOENT) {
774 CAFError(CAF_ERR_IO);
778 ** the *artp? business is so that if *artp==0, we set initial
781 fd = CAFCreateCAFFile(path, (*artp ? *artp : 1),
782 CAF_DEFAULT_TOC_SIZE, 0, 0, NULL);
784 ** XXX possible race condition here, so we check to see if
785 ** create failed because of EEXIST. If so, we go back to top
786 ** of loop, because someone else was trying to create at the
788 ** Is this the best way to solve this?
789 ** (Hmm. this condition should be quite rare, occuring only
790 ** when two different programs are simultaneously doing
791 ** CAFOpenArtWrite()s, and no CF file exists previously.)
794 if (caf_errno == EEXIST) {
795 /* ignore the error and try again */
798 return -1; /* other error, assume caf_errno set properly. */
801 ** break here, because CreateCAFFile does
802 ** lock fd, so we don't need to flock it ourselves.
808 /* try a nonblocking lock attempt first. */
809 if (inn_lock_file(fd, INN_LOCK_WRITE, false)) break;
812 CAFError(CAF_ERR_FILEBUSY);
813 close(fd); /* keep from leaking fds. */
816 /* wait around to try and get a lock. */
817 inn_lock_file(fd, INN_LOCK_WRITE, true);
819 ** and then close and reopen the file, in case someone changed the
820 ** file out from under us.
824 return CAFStartWriteFd(fd, artp, size);
828 ** Like CAFOpenArtWrite(), except we assume the CAF file is already
829 ** open/locked, and we have an open fd to it.
832 CAFStartWriteFd(int fd, ARTNUM *artp, size_t size)
836 off_t offset, startoffset;
837 unsigned int numblks = 0;
841 /* fd is open to the CAF file, open for write and locked. */
842 /* Fetch the header */
843 if (CAFReadHeader(fd, &head) < 0) {
848 /* check for zero article number and handle accordingly. */
851 /* assign next highest article number. */
853 /* and pass to caller. */
857 /* Is the requested article even in the file? */
858 if (art < head.Low || art >= head.Low + head.NumSlots) {
859 CAFError(CAF_ERR_ARTWONTFIT);
865 ** Get the CAFTOCENT for that article, but only if article# is in the range
866 ** Low <= art# <= High. If art# > High, use a zero CAFTOCENT. This means
867 ** that in cases where the CAF file is inconsistent due to a crash ---
868 ** the CAFTOCENT shows an article as being existent, but the header
869 ** doesn't show that article as being in the currently valid range ---
870 ** the header value "wins" and we assume the article does not exist.
871 ** This avoids problems with "half-existent" articles that showed up
872 ** in the CAF TOC, but were never picked up by ctlinnd renumber '' .
874 /* (Note: We already checked above that art >= head.Low.) */
876 if (art > head.High) {
877 /* clear the tocent */
878 memset(&tocent, 0, sizeof(tocent));
880 if (CAFGetTOCEnt(fd, &head, art, &tocent) < 0) {
886 if (tocent.Size != 0) {
887 /* article is already here */
888 CAFError(CAF_ERR_ARTALREADYHERE);
896 if (size != 0 && (freebm = CAFReadFreeBM(fd, &head)) != NULL) {
897 numblks = (size + head.BlockSize - 1) / head.BlockSize;
898 startoffset = CAFFindFreeBlocks(freebm, fd, numblks);
899 if (startoffset == 0) {
900 CAFDisposeBitmap(freebm);
905 if (startoffset == 0) {
907 ** No size given or free space not available, so
908 ** seek to EOF to prepare to start writing article.
911 if ((offset = lseek(fd, 0, SEEK_END)) < 0) {
912 CAFError(CAF_ERR_IO);
916 /* and round up offset to a block boundary. */
917 startoffset = CAFRoundOffsetUp(offset, head.BlockSize);
920 /* Seek to starting offset for the new artiicle. */
921 if (lseek(fd, startoffset, SEEK_SET) < 0) {
922 CAFError(CAF_ERR_IO);
927 /* stash data for FinishArtWrite's use. */
929 CAF_artnum_write = art;
930 CAF_startoffset_write = startoffset;
931 CAF_header_write = head;
932 CAF_free_bitmap_write = freebm;
933 CAF_numblks_write = numblks;
939 ** write out TOC entries for the previous article. Note that we do *not*
940 ** (as was previously done) close the fd; this allows reuse of the fd to write
941 ** another article to this CAF file w/o an (soemwhat expensive) open().
945 CAFFinishArtWrite(int fd)
953 /* blah, really should handle multiple pending OpenArtWrites. */
954 if (fd != CAF_fd_write) {
955 fprintf(stderr, "CAF: fd mismatch in CloseArtWrite.\n");
959 headp = &CAF_header_write;
961 /* Find out where we left off writing in the file. */
962 if ((curpos = lseek(fd, 0, SEEK_CUR)) < 0) {
963 CAFError(CAF_ERR_IO);
968 /* Write the new TOC entry. */
969 if (CAFSeekTOCEnt(fd, headp, CAF_artnum_write) < 0) {
973 tocentry.Offset = CAF_startoffset_write;
974 tocentry.Size = curpos - CAF_startoffset_write;
975 tocentry.ModTime = time((time_t *)NULL);
976 if (OurWrite(fd, &tocentry, sizeof(CAFTOCENT)) < 0) {
981 /* if needed, update free bitmap. */
982 if (CAF_free_bitmap_write != NULL) {
983 /* Paranoia: check to make sure we didn't write more than we said we would. */
984 if (tocentry.Size > CAF_numblks_write * headp->BlockSize) {
986 ** for now core dump (might as well, if we've done this the CAF
987 ** file is probably thoroughly hosed anyway.)
989 fprintf(stderr, "CAF: article written overran declared size.\n");
993 curblk = CAF_startoffset_write;
995 for (i = 0 ; i < CAF_numblks_write ; ++i, curblk += headp->BlockSize) {
996 CAFSetBlockFree(CAF_free_bitmap_write, fd, curblk, 0);
998 if (CAFWriteFreeBM(fd, CAF_free_bitmap_write) < 0){
999 CAFError(CAF_ERR_IO);
1003 CAFDisposeBitmap(CAF_free_bitmap_write);
1004 /* and update the Free value in the header. */
1005 headp->Free -= CAF_numblks_write * headp->BlockSize;
1008 if (CAF_artnum_write > headp->High || CAF_free_bitmap_write) {
1009 /* need to update header. */
1010 if (CAF_artnum_write > headp->High) {
1011 headp->High = CAF_artnum_write;
1013 if (lseek(fd, 0, SEEK_SET) < 0) {
1014 CAFError(CAF_ERR_IO);
1018 if (OurWrite(fd, headp, sizeof(CAFHEADER)) < 0) {
1024 if (close(fd) < 0) {
1025 CAFError(CAF_ERR_IO);
1035 ** return a string containing a description of the error.
1036 ** Warning: uses a static buffer, or possibly a static string.
1039 static char errbuf[512];
1044 if (caf_error == CAF_ERR_IO || caf_error == CAF_ERR_CANTCREATECAF) {
1045 snprintf(errbuf, sizeof(errbuf), "%s errno=%s\n",
1046 (caf_error == CAF_ERR_IO) ? "CAF_ERR_IO" : "CAF_ERR_CANTCREATECAF",
1051 case CAF_ERR_BADFILE:
1052 return "CAF_ERR_BADFILE";
1053 case CAF_ERR_ARTNOTHERE:
1054 return "CAF_ERR_ARTNOTHERE";
1055 case CAF_ERR_FILEBUSY:
1056 return "CAF_ERR_FILEBUSY";
1057 case CAF_ERR_ARTWONTFIT:
1058 return "CAF_ERR_ARTWONTFIT";
1059 case CAF_ERR_ARTALREADYHERE:
1060 return "CAF_ERR_ARTALREADYHERE";
1061 case CAF_ERR_BOGUSPATH:
1062 return "CAF_ERR_BOGUSPATH";
1064 snprintf(errbuf, sizeof(errbuf), "CAF error %d", caf_error);
1071 ** Open a CAF file, snarf the TOC entries for all the articles inside,
1072 ** and close the file. NOTE: returns the header for the CAF file in
1073 ** the storage pointed to by *ch. Dynamically allocates storage for
1074 ** the TOC entries, which should be freed by the caller when the
1075 ** caller's done with it. Return NULL on failure.
1077 ** This function calls CAFOpenReadTOC(dir, ch, &tocp), which does most
1078 ** (practically all) of the dirty work. CAFOpenReadTOC leaves the fd open
1079 ** (and returns it); this is needed by cafls. CAFReadTOC() closes the fd
1080 ** after CAFOpenReadTOC() is done with it.
1084 CAFReadTOC(char *path, CAFHEADER *ch)
1089 if ((fd = CAFOpenReadTOC(path, ch, &tocp)) < 0) {
1090 return NULL; /* some sort of error happened */
1098 CAFOpenReadTOC(char *path, CAFHEADER *ch, CAFTOCENT **tocpp)
1105 if ( (fd = open(path, O_RDONLY)) < 0) {
1107 ** if ENOENT (not there), just call this "article not found",
1108 ** otherwise it's a more serious error and stash the errno.
1110 if (errno == ENOENT) {
1111 CAFError(CAF_ERR_ARTNOTHERE);
1113 CAFError(CAF_ERR_IO);
1118 /* Fetch the header */
1119 if (CAFReadHeader(fd, ch) < 0) {
1124 /* Allocate memory for TOC. */
1125 tocp = xmalloc((ch->High - ch->Low + 1) * sizeof(CAFTOCENT));
1126 nb = (sizeof(CAFTOCENT))*(ch->High - ch->Low + 1); /* # bytes to read for toc. */
1128 /* seek to beginning of TOC */
1129 offset = sizeof(CAFHEADER) + ch->FreeZoneTabSize;
1131 if (lseek(fd, offset, SEEK_SET) < 0) {
1132 CAFError(CAF_ERR_IO);
1136 if (OurRead(fd, tocp, nb) < 0) {
1140 /* read TOC successfully, return fd and stash tocp where we were told to */
1147 ** Cancel/expire articles from a CAF file. This involves zeroing the Size
1148 ** field of the TOC entry, and updating the Free field of the CAF header.
1149 ** note that no disk space is actually freed by this process; space will only
1150 ** be returned to the OS when the cleaner daemon runs on the CAF file.
1154 CAFRemoveMultArts(char *path, unsigned int narts, ARTNUM *artnums)
1159 CAFBITMAP *freebitmap;
1161 unsigned int numblksfreed, i, j;
1163 int errorfound = false;
1166 /* try to open the file and lock it */
1167 if ((fd = open(path, O_RDWR)) < 0) {
1168 /* if ENOENT, CAF file isn't there, so return ARTNOTHERE, otherwise it's an I/O error. */
1169 if (errno != ENOENT) {
1170 CAFError(CAF_ERR_IO);
1173 CAFError(CAF_ERR_ARTNOTHERE);
1177 /* try a nonblocking lock attempt first. */
1178 if (inn_lock_file(fd, INN_LOCK_WRITE, false)) break;
1180 /* wait around to try and get a lock. */
1181 inn_lock_file(fd, INN_LOCK_WRITE, true);
1183 ** and then close and reopen the file, in case someone changed the
1184 ** file out from under us.
1188 /* got the file, open for write and locked. */
1189 /* Fetch the header */
1190 if (CAFReadHeader(fd, &head) < 0) {
1195 if ((freebitmap = CAFReadFreeBM(fd, &head)) == NULL) {
1200 for (j = 0 ; j < narts ; ++j) {
1203 /* Is the requested article even in the file? */
1204 if (art < head.Low || art > head.High) {
1205 CAFError(CAF_ERR_ARTNOTHERE);
1207 continue; /* don't abandon the whole remove if just one art is missing */
1210 if (CAFGetTOCEnt(fd, &head, art, &tocent) < 0) {
1212 CAFDisposeBitmap(freebitmap);
1216 if (tocent.Size == 0) {
1217 CAFError(CAF_ERR_ARTNOTHERE);
1219 continue; /* don't abandon the whole remove if just one art is missing */
1222 numblksfreed = (tocent.Size + head.BlockSize - 1) / head.BlockSize;
1224 /* Mark all the blocks as free. */
1225 for (curblk = tocent.Offset, i = 0 ; i < numblksfreed; ++i, curblk += head.BlockSize) {
1226 CAFSetBlockFree(freebitmap, fd, curblk, 1);
1228 /* Note the amount of free space added. */
1229 head.Free += numblksfreed * head.BlockSize;
1230 /* and mark the tocent as a deleted entry. */
1233 if (CAFSeekTOCEnt(fd, &head, art) < 0) {
1235 CAFDisposeBitmap(freebitmap);
1239 if (OurWrite(fd, &tocent, sizeof(CAFTOCENT)) < 0) {
1241 CAFDisposeBitmap(freebitmap);
1246 if (CAFWriteFreeBM(fd, freebitmap) < 0) {
1248 CAFDisposeBitmap(freebitmap);
1251 /* dispose of bitmap storage. */
1252 CAFDisposeBitmap(freebitmap);
1254 /* need to update header. */
1255 if (lseek(fd, 0, SEEK_SET) < 0) {
1256 CAFError(CAF_ERR_IO);
1259 if (OurWrite(fd, &head, sizeof(CAFHEADER)) < 0) {
1263 if (close(fd) < 0) {
1264 CAFError(CAF_ERR_IO);
1268 if (CAFClean(path, 0, 10.0) < 0) errorfound=true;
1270 return errorfound ? -1 : 0;
1274 ** Do a fake stat() of a CAF-stored article. Both 'inpaths' and 'innfeed'
1275 ** find this functionality useful, so we've added a function to do this.
1276 ** Caveats: not all of the stat structure is filled in, only these items:
1277 ** st_mode, st_size, st_atime, st_ctime, st_mtime. (Note:
1278 ** atime==ctime==mtime always, as we don't track times of CAF reads.)
1282 CAFStatArticle(char *path, ARTNUM art, struct stat *stbuf)
1288 if ( (fd = open(path, O_RDONLY)) < 0) {
1290 ** if ENOENT (not there), just call this "article not found",
1291 ** otherwise it's a more serious error and stash the errno.
1293 if (errno == ENOENT) {
1294 CAFError(CAF_ERR_ARTNOTHERE);
1296 CAFError(CAF_ERR_IO);
1301 /* Fetch the header */
1302 if (CAFReadHeader(fd, &head) < 0) {
1307 /* Is the requested article even in the file? */
1308 if (art < head.Low || art > head.High) {
1309 CAFError(CAF_ERR_ARTNOTHERE);
1314 if (CAFGetTOCEnt(fd, &head, art, &tocent) < 0) {
1319 if (tocent.Size == 0) {
1320 /* empty/otherwise not present article */
1321 CAFError(CAF_ERR_ARTNOTHERE);
1326 /* done with file, can close it. */
1329 memset(stbuf, 0, sizeof(struct stat));
1330 stbuf->st_mode = S_IFREG | 0444;
1331 stbuf->st_size = tocent.Size;
1332 stbuf->st_atime = stbuf->st_ctime = stbuf->st_mtime = tocent.ModTime;
1337 ** Taken from the old 'cafclean' program.
1338 ** Function to clean a single CAF file.
1339 ** Possibly the ugliest function I've ever written in my life.
1342 ** We try to keep the total TOC size this many times larger than the actual
1343 ** amount of TOC data in use so as not to have to reclean or compact the TOC
1346 #define TOC_CLEAN_RATIO 10
1348 ** ditto, but for compacting, we want to force a compacting if the High art#
1349 ** wanders into the top nth of the TOC slots.
1351 #define TOC_COMPACT_RATIO 5
1354 CAFClean(char *path, int verbose, double PercentFreeThreshold)
1357 CAFHEADER head, newhead;
1361 CAFTOCENT *tocarray, *tocp;
1362 CAFTOCENT *newtocarray, *newtocp;
1364 FILE *infile, *outfile;
1365 off_t startoffset, newstartoffset;
1369 unsigned int blocksize;
1371 struct stat statbuf;
1374 int toc_needs_expansion;
1375 int toc_needs_compacting;
1378 struct STATSTRUC fsinfo;
1379 long num_diskblocks_needed;
1382 /* allocate buffer for newpath */
1383 newpath = xmalloc(strlen(path) + 10);
1385 /* try to open the file and lock it. */
1386 if ((fdin = open(path, O_RDWR)) < 0) {
1388 ** if ENOENT, obviously no CAF file is here, so just return,
1389 ** otherwise report an error.
1391 if (errno != ENOENT) {
1392 CAFError(CAF_ERR_IO);
1399 /* try a nonblocking lock attempt first. */
1400 if (inn_lock_file(fdin, INN_LOCK_WRITE, false)) break;
1402 /* wait around to try and get a lock. */
1403 inn_lock_file(fdin, INN_LOCK_WRITE, true);
1405 ** and then close and reopen the file, in case someone changed the
1406 ** file out from under us.
1411 /* got the file, open for write and locked. */
1412 /* Fetch the header */
1413 if (CAFReadHeader(fdin, &head) < 0) {
1418 /* Stat the file to see how big it is */
1419 if (fstat(fdin, &statbuf) < 0) {
1421 CAFError(CAF_ERR_IO);
1426 /* compute amount of actual data in file. */
1427 datasize = statbuf.st_size - head.StartDataBlock;
1428 if (datasize <= 0) {
1429 /* nothing in the file, set percentfree==0 so won't bother cleaning */
1432 percentfree = (100.0 * head.Free) / datasize;
1436 ** Grumble, we need to read the TOC now even before we clean, just so
1437 ** we can decide if a clean or a compaction is needed.
1440 lseek(fdin, 0L, SEEK_SET);
1442 /* make input file stdio-buffered. */
1443 if ((infile = fdopen(fdin, "r+")) == NULL) {
1444 CAFError(CAF_ERR_IO);
1449 /* Allocate memory for TOC. */
1450 tocarray = xmalloc((head.High - head.Low + 1) * sizeof(CAFTOCENT));
1452 fseeko(infile, (off_t) (sizeof(CAFHEADER) + head.FreeZoneTabSize),
1455 n = fread(tocarray, sizeof(CAFTOCENT), (head.High - head.Low + 1), infile);
1457 CAFError(CAF_ERR_IO);
1464 if ((unsigned long) n < (head.High - head.Low +1)) {
1465 CAFError(CAF_ERR_BADFILE);
1472 /* Scan to see what the new lower bound for CAF file should be. */
1473 newlow = head.High + 1;
1475 for (tocp = tocarray, i = head.Low; i <= head.High; ++tocp, ++i) {
1476 if (tocp->Size != 0) {
1483 ** if newlow is head.High+1, the TOC is completely empty and we can
1484 ** just remove the entire file.
1486 if (newlow == head.High + 1) {
1495 ** Ah. NOW we get to decide if we need a clean!
1497 ** 1) the absolute freespace threshold is crossed
1498 ** 2) the percent free threshold is crossed.
1499 ** 3) The CAF TOC is over 10% full (assume it needs to be expanded,
1500 ** so we force a clean)
1501 ** Note that even if we do not need a clean, we may need a compaction
1502 ** if the high article number is in the top nth of the TOC.
1505 toc_needs_expansion = 0;
1506 if ( (head.High - newlow) >= head.NumSlots/TOC_CLEAN_RATIO) {
1507 toc_needs_expansion = 1;
1510 toc_needs_compacting = 0;
1511 if ( (head.Low + head.NumSlots - head.NumSlots/TOC_COMPACT_RATIO) <= head.High) {
1512 toc_needs_compacting = 1;
1515 if ( (percentfree < PercentFreeThreshold)
1516 && (!toc_needs_expansion) ) {
1517 /* no cleaning, but do we need a TOC compaction ? */
1518 if (toc_needs_compacting) {
1523 printf("Compacting %s: Free=%lu (%f%%)\n", path,
1524 (unsigned long) head.Free, percentfree);
1527 delta = newlow - head.Low;
1529 /* slide TOC array down delta units. */
1530 for (i = newlow, tocp = tocarray, tocp2 = tocarray+delta;
1531 i <= head.High ; ++i) {
1536 /* note we don't set LastCleaned, this doesn't count a a clean. */
1537 /* (XXX: do we need a LastCompacted as well? might be nice.) */
1539 /* write new header on top of old */
1540 fseeko(infile, 0, SEEK_SET);
1541 if (fwrite(&head, sizeof(CAFHEADER), 1, infile) < 1) {
1542 CAFError(CAF_ERR_IO);
1549 ** this next fseeko might actually fail, because we have buffered
1550 ** stuff that might fail on write.
1552 if (fseeko(infile, sizeof(CAFHEADER) + head.FreeZoneTabSize,
1561 if (fwrite(tocarray, sizeof(CAFTOCENT), head.High - newlow + 1, infile) < head.High - newlow + 1
1562 || fflush(infile) < 0) {
1563 CAFError(CAF_ERR_IO);
1569 /* all done, return. */
1575 /* need neither full cleaning nor compaction, so return. */
1577 printf("Not cleaning %s: Free=%lu (%f%%)\n", path,
1578 (unsigned long) head.Free, percentfree);
1588 ** If OS supports it, try to check for free space and skip this file if
1589 ** not enough free space on this filesystem.
1592 if (STATFUNCT(fdin, &fsinfo) >= 0) {
1593 /* compare avail # blocks to # blocks needed for current file.
1594 ** # blocks needed is approximately
1595 ** datasize/blocksize + (size of the TOC)/blocksize
1596 ** + Head.BlockSize/blocksize, but we need to take rounding
1599 #define RoundIt(n) (CAFRoundOffsetUp((n), fsinfo.STATMULTI) / fsinfo.STATMULTI)
1601 num_diskblocks_needed = RoundIt((head.High - head.Low + 1)*sizeof(CAFTOCENT))
1602 + RoundIt(datasize - head.Free) + RoundIt(head.BlockSize);
1603 if (num_diskblocks_needed > fsinfo.STATAVAIL) {
1605 printf("CANNOT clean %s: needs %ld blocks, only %ld avail.\n",
1606 path, num_diskblocks_needed,
1607 (unsigned long) fsinfo.f_bavail);
1618 printf("Am cleaning %s: Free=%d (%f%%) %s\n", path, head.Free,
1619 percentfree, toc_needs_expansion ? "(Expanding TOC)" : "");
1622 /* decide on proper size for new TOC */
1623 newtocsize = CAF_DEFAULT_TOC_SIZE;
1624 if (head.High - newlow > newtocsize/TOC_CLEAN_RATIO) {
1625 newtocsize = TOC_CLEAN_RATIO*(head.High - newlow);
1628 /* try to create new CAF file with some temp. pathname */
1629 /* note: new CAF file is created in flocked state. */
1630 if ((fdout = CAFCreateCAFFile(path, newlow, newtocsize,
1631 statbuf.st_size, 1, newpath)) < 0) {
1638 if ((outfile = fdopen(fdout, "w+")) == NULL) {
1639 CAFError(CAF_ERR_IO);
1647 newtocarray = xcalloc((head.High - newlow + 1), sizeof(CAFTOCENT));
1649 if (fseeko(outfile, 0, SEEK_SET) < 0) {
1660 /* read in the CAFheader from the new file. */
1661 if (fread(&newhead, sizeof(CAFHEADER), 1, outfile) < 1) {
1672 /* initialize blocksize, zeroes buffer. */
1673 blocksize = newhead.BlockSize;
1674 if (blocksize == 0) blocksize=CAF_DEFAULT_BLOCKSIZE;
1676 zerobuff = xcalloc(blocksize, 1);
1678 /* seek to end of output file/place to start writing new articles */
1679 fseeko(outfile, 0, SEEK_END);
1680 startoffset = ftello(outfile);
1681 startoffset = CAFRoundOffsetUp(startoffset, blocksize);
1682 fseeko(outfile, (off_t) startoffset, SEEK_SET);
1685 ** Note: startoffset will always give the start offset of the next
1686 ** art to be written to the outfile.
1690 ** Loop over all arts in old TOC, copy arts that are still here to new
1691 ** file and new TOC.
1694 for (tocp = tocarray, i = head.Low; i <= head.High; ++tocp, ++i) {
1695 if (tocp->Size != 0) {
1696 newtocp = &newtocarray[i - newlow];
1697 newtocp->Offset = startoffset;
1698 newtocp->Size = tocp->Size;
1699 newtocp->ModTime = tocp->ModTime;
1701 /* seek to right place in input. */
1702 fseeko(infile, (off_t) tocp->Offset, SEEK_SET);
1704 nbytes = tocp->Size;
1705 while (nbytes > 0) {
1706 ncur = (nbytes > BUFSIZ) ? BUFSIZ : nbytes;
1707 if (fread(buf, sizeof(char), ncur, infile) < ncur
1708 || fwrite(buf, sizeof(char), ncur, outfile) < ncur) {
1710 CAFError(CAF_ERR_BADFILE);
1712 CAFError(CAF_ERR_IO);
1727 /* startoffset = ftello(outfile); */
1728 startoffset += tocp->Size;
1729 newstartoffset = CAFRoundOffsetUp(startoffset, blocksize);
1730 /* fseeko(outfile, (off_t) startoffset, SEEK_SET); */
1731 /* but we don't want to call fseeko, since that seems to always
1732 force a write(2) syscall, even when the new location would
1733 still be inside stdio's buffer. */
1734 if (newstartoffset - startoffset > 0) {
1735 ncur = newstartoffset - startoffset;
1736 if (fwrite(zerobuff, sizeof(char), ncur, outfile) < ncur) {
1737 /* write failed, must be disk error of some sort. */
1739 goto errorexit; /* yeah, it's a goto. eurggh. */
1742 startoffset = newstartoffset;
1746 free(tocarray); /* don't need this guy anymore. */
1750 ** set up new file header, TOC.
1751 ** this next fseeko might actually fail, because we have buffered stuff
1752 ** that might fail on write.
1754 if (fseeko(outfile, 0, SEEK_SET) < 0) {
1764 /* Change what we need in new file's header. */
1765 newhead.Low = newlow;
1766 newhead.High = head.High;
1767 newhead.LastCleaned = time((time_t *) NULL);
1768 /* newhead.NumSlots = newtocsize; */
1769 /* newhead.Free = 0; */
1771 if (fwrite(&newhead, sizeof(CAFHEADER), 1, outfile) < 1) {
1772 CAFError(CAF_ERR_IO);
1782 ** this next fseeko might actually fail, because we have buffered stuff
1783 ** that might fail on write.
1785 if (fseeko(outfile, sizeof(CAFHEADER) + newhead.FreeZoneTabSize,
1796 if (fwrite(newtocarray, sizeof(CAFTOCENT), head.High - newlow + 1, outfile) < head.High - newlow + 1
1797 || fflush(outfile) < 0) {
1798 CAFError(CAF_ERR_IO);
1807 if (rename(newpath, path) < 0) {
1808 CAFError(CAF_ERR_IO);
1813 /* if can't rename, probably no point in trying to unlink newpath, is there? */
1816 /* written and flushed newtocarray, can safely fclose and get out of