1 /* $Id: fastrm.c 6155 2003-01-19 19:58:25Z rra $
3 ** Delete a list of filenames or tokens from stdin.
5 ** Originally written by <kre@munnari.oz.au> (to only handle files)
7 ** Files that can't be unlinked because they didn't exist are considered
8 ** okay. Any error condition results in exiting with non-zero exit
9 ** status. Input lines in the form @...@ are taken to be storage API
10 ** tokens. Input filenames should be fully qualified. For maximum
11 ** efficiency, input filenames should be sorted; fastrm will cd into each
12 ** directory to avoid additional directory lookups when removing a lot of
13 ** files in a single directory.
24 #include "inn/innconf.h"
25 #include "inn/messages.h"
30 /* We reject any path names longer than this. */
31 #define MAX_DIR_LEN 2048
33 /* Data structure for a list of files in a single directory. */
34 typedef struct filelist {
41 /* All relative paths are relative to this directory. */
42 static char *base_dir = NULL;
44 /* The absolute path of the current working directory. */
45 static char current_dir[MAX_DIR_LEN];
47 /* The prefix for the files that we're currently working with. We sometimes
48 also use this as working space for forming file names to remove, so give
49 ourselves a bit of additional leeway just in case. */
50 static char prefix_dir[MAX_DIR_LEN * 2];
51 static int prefix_len;
53 /* Some threshold values that govern the optimizations that we are willing
54 to perform. chdir_threshold determines how many files to be removed we
55 want in a directory before we chdir to that directory. sort_threshold
56 determines how many files must be in a directory before we use readdir to
57 remove them in order. relative_threshold determines how many levels of
58 "../" we're willing to try to use to move to the next directory rather
59 than just calling chdir with the new absolute path. */
60 static int chdir_threshold = 3;
61 static int relative_threshold = 0;
62 static int sort_threshold = 0;
64 /* True if we should only print what we would do, not actually do it. */
65 static bool debug_only = false;
67 /* A string used for constructing relative paths. */
68 static const char dotdots[] = "../../../../";
70 /* The number of errors encountered, used to determine exit status. */
71 static int error_count = 0;
73 /* Whether the storage manager has been initialized. */
74 static bool sm_initialized = false;
76 /* True if unlink may be able to remove directories. */
77 static bool unlink_dangerous = false;
82 ** Sorting predicate for qsort and bsearch.
85 file_compare(const void *a, const void *b)
89 f1 = *((const char **) a);
90 f2 = *((const char **) b);
91 return strcmp(f1, f2);
96 ** Create a new filelist.
99 filelist_new(char *dir)
103 new = xmalloc(sizeof(filelist));
113 ** Insert a file name into a list of files (unsorted).
116 filelist_insert(filelist *list, char *name)
118 if (list->count == list->size) {
119 list->size = (list->size == 0) ? 16 : list->size * 2;
120 list->files = xrealloc(list->files, list->size * sizeof(char *));
122 list->files[list->count++] = xstrdup(name);
127 ** Find a file name in a sorted list of files.
130 filelist_lookup(filelist *list, const char *name)
134 p = bsearch(&name, list->files, list->count, sizeof(char *),
136 return (p == NULL ? NULL : *p);
141 ** Empty a list of files, freeing all of the names but keeping the
145 filelist_empty(filelist *list)
149 for (i = 0; i < list->count; i++)
150 free(list->files[i]);
156 ** Free a list of files.
159 filelist_free(filelist *list)
161 filelist_empty(list);
162 if (list->files != NULL)
164 if (list->dir != NULL)
171 ** Exit handler for die. Shut down the storage manager before exiting.
182 ** Initialize the storage manager. This includes parsing inn.conf, which
183 ** fastrm doesn't need for any other purpose.
190 if (!innconf_read(NULL))
193 if (!SMsetup(SM_RDWR, &value) || !SMsetup(SM_PREOPEN, &value))
194 die("can't set up storage manager");
196 die("can't initialize storage manager: %s", SMerrorstr);
197 sm_initialized = true;
198 message_fatal_cleanup = sm_cleanup;
203 ** Get a line from a given QIO stream, returning a pointer to it. Warn
204 ** about and then skip lines that are too long. Returns NULL at EOF or on
208 get_line(QIOSTATE *qp)
215 while (QIOtoolong(qp) || (p != NULL && strlen(p) >= MAX_DIR_LEN)) {
216 warn("line %d too long", count);
218 while (p == NULL && QIOtoolong(qp))
224 syswarn("read error");
234 ** Read lines from stdin (including the first that may have been there
235 ** from our last time in) until we reach EOF or until we get a line that
236 ** names a file not in the same directory as the previous lot. Remember
237 ** the file names in the directory we're examining and return the list.
240 process_line(QIOSTATE *qp, int *queued, int *deleted)
242 static char *line = NULL;
243 filelist *list = NULL;
254 for (; line != NULL; line = get_line(qp)) {
258 printf("Token %s\n", line);
263 if (!SMcancel(TextToToken(line)))
264 if (SMerrno != SMERR_NOENT && SMerrno != SMERR_UNINIT) {
265 warn("can't cancel %s", line);
270 p = strrchr(line, '/');
280 list = filelist_new(dir);
282 if ((dlen < 0 && strchr(line, '/'))
283 || (dlen >= 0 && (line[dlen] != '/'
284 || strchr(line + dlen + 1, '/')
285 || strncmp(dir, line, dlen))))
288 filelist_insert(list, line + dlen + 1);
297 ** Copy n leading segments of a path.
300 copy_segments(char *to, const char *from, int n)
304 for (c = *from++; c != '\0'; c = *from++) {
305 if (c == '/' && --n <= 0)
314 ** Return the count of path segments in a file name (the number of
318 slashcount(char *name)
322 for (i = 0; *name != '\0'; name++)
330 ** Unlink a file, reporting errors if the unlink fails for a reason other
331 ** than the file not existing doesn't exist. Be careful to avoid unlinking
332 ** a directory if unlink_dangerous is true.
335 unlink_file(const char *file)
339 /* On some systems, unlink will remove directories if used by root. If
340 we're running as root, unlink_dangerous will be set, and we need to
341 make sure that the file isn't a directory first. */
342 if (unlink_dangerous) {
343 if (stat(file, &st) < 0) {
344 if (errno != ENOENT) {
346 syswarn("can't stat %s", file);
348 syswarn("can't stat %s in %s", file, current_dir);
353 if (S_ISDIR(st.st_mode)) {
355 syswarn("%s is a directory", file);
357 syswarn("%s in %s is a directory", file, current_dir);
365 printf("%s / ", current_dir);
366 printf("%s\n", file);
370 if (unlink(file) < 0 && errno != ENOENT) {
372 syswarn("can't unlink %s", file);
374 syswarn("can't unlink %s in %s", file, current_dir);
380 ** A wrapper around chdir that dies if chdir fails for a reason other than
381 ** the directory not existing, returns false if the directory doesn't
382 ** exist (reporting an error), and otherwise returns true. It also checks
383 ** to make sure that filecount is larger than chdir_threshold, and if it
384 ** isn't it instead just sets prefix_dir and prefix_len to point to the new
385 ** directory without changing the working directory.
388 chdir_checked(const char *path, int filecount)
390 if (filecount < chdir_threshold) {
391 strlcpy(prefix_dir, path, sizeof(prefix_dir));
392 prefix_len = strlen(path);
395 if (chdir(path) < 0) {
397 sysdie("can't chdir from %s to %s", current_dir, path);
399 syswarn("can't chdir from %s to %s", current_dir, path);
409 ** Set our environment (process working directory, and global vars) to
410 ** reflect a change of directory to dir (relative to base_dir if dir is not
411 ** an absolute path). We're likely to want to do different things
412 ** depending on the amount of work to do in dir, so we also take the number
413 ** of files to remove in dir as the second argument. Return false if the
414 ** directory doesn't exist (and therefore all files in it have already been
415 ** removed; otherwise, return true.
418 setup_dir(char *dir, int filecount)
420 char *p, *q, *absolute;
421 char path[MAX_DIR_LEN];
422 int base_depth, depth;
424 /* Set absolute to the absolute path to the new directory. */
427 else if (*dir == '/')
429 else if (*dir == '\0') {
430 strlcpy(path, "/", sizeof(path));
433 /* Strip off leading "./". */
434 while (dir[0] == '.' && dir[1] == '/')
435 for (dir += 2; *dir == '/'; dir++)
438 /* Handle any leading "../", but only up to the number of segments
440 base_depth = slashcount(base_dir);
441 while (base_depth > 0 && strncmp(dir, "../", 3) == 0)
442 for (base_depth--, dir += 3; *dir == '/'; dir++)
445 die("too many ../'s in path %s", dir);
446 copy_segments(path, base_dir, base_depth + 1);
447 if (strlen(path) + strlen(dir) + 2 > MAX_DIR_LEN)
448 die("path %s too long", dir);
449 strlcat(path, "/", sizeof(path));
450 strlcat(path, dir, sizeof(path));
454 /* Find the first point of difference between absolute and current_dir.
455 If there is no difference, we're done; we're changing to the same
456 directory we were in (this is probably some sort of error, but can
457 happen with odd relative paths). */
458 for (p = absolute, q = current_dir; *p == *q; p++, q++)
462 /* If we reached the end of current_dir and there's more left of
463 absolute, we're changing to a subdirectory of where we were. */
464 if (*q == '\0' && *p == '/') {
466 if (!chdir_checked(p, filecount))
469 strlcpy(current_dir, absolute, sizeof(current_dir));
473 /* Otherwise, if we were promised that we have a pure tree (in other
474 words, no symbolic links to directories), see if it's worth going up
475 the tree with ".." and then down again rather than chdir to the
476 absolute path. relative_threshold determines how many levels of ".."
477 we're willing to use; the default of 1 seems fractionally faster than
478 2 and 0 indicates to always use absolute paths. Values larger than 3
479 would require extending the dotdots string, but are unlikely to be
482 FIXME: It's too hard to figure out what this code does. It needs to be
484 if (p != '\0' && relative_threshold > 0) {
485 depth = slashcount(q);
486 if (depth <= relative_threshold) {
487 while (p > absolute && *--p != '/')
490 strlcpy(prefix_dir, dotdots + 9 - depth * 3, sizeof(prefix_dir));
491 strlcat(prefix_dir, p, sizeof(prefix_dir));
492 if (!chdir_checked(prefix_dir, filecount))
495 /* Now patch up current_dir to reflect where we are. */
496 if (prefix_len == 0) {
497 while (q > current_dir && *--q != '/')
500 strlcat(current_dir, p, sizeof(current_dir));
506 /* All else has failed; just use the absolute path. This includes the
507 case where current_dir is a subdirectory of absolute, in which case
508 it may be somewhat faster to use chdir("../..") or the like rather
509 than the absolute path, but this case rarely happens when the user
510 cares about speed (it usually doesn't happen with sorted input). So
512 if (!chdir_checked(absolute, filecount))
515 strlcpy(current_dir, absolute, sizeof(current_dir));
521 ** Process a filelist of files to be deleted, all in the same directory.
524 unlink_filelist(filelist *list, int filecount)
528 struct dirent *entry;
532 /* If setup_dir returns false, the directory doesn't exist and we're
534 if (!setup_dir(list->dir, filecount)) {
539 /* We'll use prefix_dir as a buffer to write each file name into as we
540 go, so get it set up. */
544 prefix_dir[prefix_len++] = '/';
545 file = prefix_dir + prefix_len;
549 /* If we're not sorting directories or if the number of files is under
550 the threshold, just remove the files. */
551 if (sort_threshold == 0 || filecount < sort_threshold) {
552 for (i = 0; i < list->count; i++) {
553 strlcpy(file, list->files[i], sizeof(prefix_dir) - prefix_len);
554 unlink_file(prefix_dir);
560 /* We have enough files to remove in this directory that it's worth
561 optimizing. First, make sure the list of files is sorted. It's not
562 uncommon for the files to already be sorted, so check first. */
563 for (sorted = true, i = 1; sorted && i < list->count; i++)
564 sorted = (strcmp(list->files[i - 1], list->files[i]) <= 0);
566 qsort(list->files, list->count, sizeof(char *), file_compare);
568 /* Now, begin doing our optimized unlinks. The technique we use is to
569 open the directory containing the files and read through it, checking
570 each file in the directory to see if it's one of the files we should
571 be removing. The theory is that we want to minimize the amount of
572 time the operating system spends doing string compares trying to find
573 the file to be removed in the directory. This is often an O(n)
574 operation. Note that this optimization may slightly slow more
575 effecient operating systems. */
576 dir = opendir(prefix_len == 0 ? "." : prefix_dir);
578 if (prefix_len > 0 && prefix_dir[0] == '/')
579 warn("can't open directory %s", prefix_dir);
581 warn("can't open directory %s in %s",
582 (prefix_len == 0) ? "." : prefix_dir, current_dir);
587 for (i = 0, entry = readdir(dir); entry != NULL; entry = readdir(dir))
588 if (filelist_lookup(list, entry->d_name) != NULL) {
590 strlcpy(file, entry->d_name, sizeof(prefix_dir) - prefix_len);
591 unlink_file(prefix_dir);
592 if (i == list->count)
601 ** Check a path to see if it's okay (not likely to confuse us). This
602 ** ensures that it doesn't contain elements like "./" or "../" and doesn't
603 ** contain doubled slashes.
606 bad_path(const char *p)
608 if (strlen(p) >= MAX_DIR_LEN)
611 if (p[0] == '.' && (p[1] == '/' || (p[1] == '.' && p[2] == '/')))
613 while (*p && *p != '/')
615 if (p[0] == '/' && p[1] == '/')
625 ** Main routine. Parse options, initialize the storage manager, and
626 ** initalize various global variables, and then go into a loop calling
627 ** process_line and unlink_filelist as needed.
630 main(int argc, char *argv[])
636 int filecount, deleted;
637 bool empty_error = false;
639 /* Establish our identity. Since we use the storage manager, we need to
640 set up syslog as well, although we won't use it ourselves. */
645 p = strrchr(name, '/');
649 message_program_name = name;
650 openlog(name, LOG_CONS | LOG_PID, LOG_INN_PROG);
652 /* If we're running as root, unlink may remove directories. */
653 unlink_dangerous = (geteuid() == 0);
655 /* Unfortunately, we can't use getopt, because several of our options
656 take optional arguments. Bleh. */
658 while (argc >= 2 && **arg == '-') {
663 die("invalid option -- %c", *p);
669 if (!CTYPE(isdigit, p[1]))
671 chdir_threshold = atoi(p + 1);
681 if (!CTYPE(isdigit, p[1]))
683 sort_threshold = atoi(p + 1);
686 relative_threshold = 1;
687 if (!CTYPE(isdigit, p[1]))
689 relative_threshold = atoi(p + 1);
690 if (relative_threshold >= (int) strlen(dotdots) / 3)
691 relative_threshold = strlen(dotdots) / 3 - 1;
700 die("usage error, wrong number of arguments");
702 /* The remaining argument is the base path. Make sure it's valid and
703 not excessively large and then change to it. */
705 if (*base_dir != '/' || bad_path(base_dir))
706 die("bad base path %s", base_dir);
707 strlcpy(current_dir, base_dir, sizeof(current_dir));
708 if (chdir(current_dir) < 0)
709 sysdie("can't chdir to base path %s", current_dir);
711 /* Open our input stream and then loop through it, building filelists
712 and processing them until done. */
713 qp = QIOfdopen(fileno(stdin));
715 sysdie("can't reopen stdin");
716 while ((list = process_line(qp, &filecount, &deleted)) != NULL) {
718 unlink_filelist(list, filecount);
726 die("no files to remove");
727 exit(error_count > 0 ? 1 : 0);