From: Mark Wooding Date: Sun, 28 Jan 2007 11:14:43 +0000 (+0000) Subject: space: New program for fixing whitespace problems in text files. X-Git-Tag: 1.2.0~7 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/misc/commitdiff_plain/df33ee5494fe4ac181807406cd8dcf6dba6f5275 space: New program for fixing whitespace problems in text files. --- diff --git a/.gitignore b/.gitignore index 3ea5846..f96dbaa 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ xtitle.so *.tar.gz *.o stamp +space diff --git a/Makefile b/Makefile index ae6a213..dcfbc39 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,10 @@ BINSCRIPTS = \ splitconf z buf create inplace SBINSCRIPTS = shadowfix SCRIPTS = $(BINSCRIPTS) $(SBINSCRIPTS) -BINPROGS = not cdb-probe cdb-check-domain gorp locking if-mtu pause stamp +BINPROGS = \ + space not \ + cdb-probe cdb-check-domain \ + gorp locking if-mtu pause stamp SBINPROGS = qmail-checkspam PROGS = $(BINPROGS) $(SBINPROGS) PERLLIBS = MdwOpt.pm @@ -78,6 +81,9 @@ cdb-probe: cdb-probe.o cdb-check-domain: cdb-check-domain.o $(LINK) -lcdb +space: space.o + $(LINK) + not: not.o $(LINK) diff --git a/debian/control b/debian/control index f574ae6..7914f51 100644 --- a/debian/control +++ b/debian/control @@ -11,7 +11,7 @@ Architecture: all Section: utils Depends: mdwopt-perl, nsict-cdb, locking, qmail-checkspam, nsict-mail, if-mtu, shadowfix, zz, gorp, splitconf, xtitle, pause, buf, create, inplace, - stamp + stamp, space Description: Dummy package for convenience. Package: mdwopt-perl @@ -106,3 +106,12 @@ Package: stamp Architecture: any Section: utils Description: Like cat, but prefixing each line with a datestamp. + +Package: space +Architecture: any +Section: utils +Description: Identify and fix problematic whitespace in text files. + The space program won't send you to the moon, but it will identify + and fix problems such as trailing whitespace and spaces before tabs. + It can safely update files in place, and could therefore be used as part + of a commit hook. diff --git a/debian/inst b/debian/inst index d22050d..54975d2 100644 --- a/debian/inst +++ b/debian/inst @@ -31,6 +31,8 @@ qmail-checkspam qmail-checkspam /usr/sbin qmail-checkspam.8 qmail-checkspam /usr/share/man/man8 shadowfix shadowfix /usr/sbin shadowfix.8 shadowfix /usr/share/man/man8 +space space /usr/bin +space.1 space /usr/share/man/man1 splitconf splitconf /usr/bin splitconf.1 splitconf /usr/share/man/man1 stamp stamp /usr/bin diff --git a/space.1 b/space.1 new file mode 100644 index 0000000..f4799ab --- /dev/null +++ b/space.1 @@ -0,0 +1,121 @@ +.\" -*-nroff-*- +.ie t .ds o \(bu +.el .ds o * +.de hP +.IP +\fB\h'-\w'\\$1'u-1m'\\$1\h'1m'\fP\c +.. +.TH space 1 "27 January 2007" "Straylight/Edgeware" +.SH NAME +space \- check and fix whitespace in files +.SH SYNOPSIS +.B space +.RB [ \-cmtuv ] +.RB [ \-i \c +.RI [ backup ]] +.RI [ file ...] +.SH DESCRIPTION +The (low-budget) +.B space +program identifies and fixes problematic use of whitespace in text +files. In particular, it will notice and (if you want) fix +.hP \*o +.I trailing whitespace +\(en tabs and spaces just before a newline; +.hP \*o +.I spaces before tabs +\(en which are obviously pointless; +.hP \*o +.I end-of-file in mid-line +\(en which confuses a number of programs, +.BR diff (1) +not least; +.hP \*o groups of spaces +which could be replaced by tabs; or +.hP \*o +.I all tab characters +\(en replacing them with appropriate numbers of spaces. +.PP +The program is capable of fixing files in place (leaving backups if you +ask for them). +.PP +The following command-line options are recognized. +.TP +.B \-h +Give a short help message describing the options available. +.TP +.B \-c +Just check files, exiting zero if they're all OK, or 1 if there are +whitespace problems with any of them; exit 32 if there are actual +errors. You may want the +.B \-v +option with this; without it, +.B space +is silent. +.TP +.BR \-i [ \fIbackup ] +Modify all the +.IR file s +in place. During processing, the fixed copy of the file is written to +some new name not currently in use. If the processing was successful, +the old +.I file +is renamed to +.I filebackup +and the new file is renamed to +.IR file . +If something very complicated goes wrong at just the right moment, it's +possible that you'll be left with just +.I filebackup +but that's not very likely. The new file is created with the same +permissions as the old file, but no attempt is made to give it the same +owner or anything like that. +.TP +.B \-m +Worry about tabs in the middle of lines as well as tabs forming the +initial indentation. Normally, identify (and fix) tabs preceded by +spaces in the middle of lines; with +.BR \-t , +fix all groups of whitespace which could be turned into tabs. +.TP +.B \-t +Turn runs of horizontal whitespace into tabs where possible. +With +.BR \-c , +only complain about runs of space which could be turned into tabs but +don't actually do anything. By default, only runs of whitespace at the +starts of lines are affected. With +.BR \-m , +however, all runs of whitespace are considered to be fair game. The +rules for internal whitespace are as follows. A run at the start of a +line, or which contains a tab is always fair game; a run of two or more +spaces is fair game +.I unless +it follows a +.RB ` . ' +or +.RB ` : ' +character, either immediately or separated only by +.RB ` """" ' +or +.RB ` ' ' +characters, in which case three spaces are required. This is because +some typists (including the author) put two spaces after a full stop, +and don't want them randomly turning into tabs for no good reason. +.TP +.B \-u +Turn tabs into appropriate numbers of spaces. With +.BR \-c , +only complain about finding tabs, but don't actually do anything. All +tabs are considered fair game here and the +.B \-m +option is ignored. +.PP +Tab stops are assumed to occur every eight columns, as is usual. +.SH BUGS +There's no way yet of controlling where the tab stops are. +.PP +This space program doesn't even attempt moon landings. The space man +page doesn't have wiggly antennae. +.SH AUTHOR +Mark Wooding, diff --git a/space.c b/space.c new file mode 100644 index 0000000..ef9485f --- /dev/null +++ b/space.c @@ -0,0 +1,398 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +enum { + OK = 0, + BADNESS = 1, + TROUBLE = 32 +}; + +static const char *ego = ""; + +static const char *bkp = 0; + +static unsigned flags = 0; +#define F_MIDLINETABS 1u +#define F_INPLACE 2u +#define F_CHECK 4u +#define F_BOGUS 8u +#define F_UNTABIFY 16u +#define F_TABIFY 32u +#define F_VERBOSE 64u + +static void usage(FILE *fp) + { fprintf(fp, "Usage: %s [-cmtuv] [-i[BKP]] [FILE...]\n\n", ego); } + +static char *augment(const char *name, const char *suffix) +{ + size_t n = strlen(name), nn = strlen(suffix); + char *p = malloc(n + nn + 1); + + if (!p) { + fprintf(stderr, "%s: Out of memory!\n", ego); + return (0); + } + memcpy(p, name, n); + memcpy(p + n, suffix, nn + 1); + return (p); +} + +static FILE *freshname(const char *name, char **newname, mode_t mode) +{ + char buf[16]; + int i; + int fd; + FILE *fp; + char *n; + + for (i = 0; i < 32767; i++) { + sprintf(buf, ".new%d", i); + if ((n = augment(name, buf)) == 0) + goto fail_0; + if ((fd = open(n, O_WRONLY | O_CREAT | O_EXCL, mode)) < 0) { + if (errno == EEXIST) { + free(n); + continue; + } + fprintf(stderr, "%s: Can't create new file for `%s': %s\n", + ego, name, strerror(errno)); + goto fail_1; + } + goto win; + } + fprintf(stderr, "%s: Can't find new file to update `%s'\n", ego, name); + goto fail_1; + +win: + if (chmod(n, mode)) { + fprintf(stderr, "%s: Can't set permissions on `%s': %s\n", + ego, n, strerror(errno)); + goto fail_2; + } + if ((fp = fdopen(fd, "w")) == 0) { + fprintf(stderr, "%s: fdopen on `%s' failed: %s\n", + ego, n, strerror(errno)); + goto fail_2; + } + *newname = n; + return (fp); + +fail_2: + close(fd); +fail_1: + free(n); +fail_0: + return (0); +} + +typedef struct buf { + char *b; + size_t n; + size_t sz; +} buf; +#define BUF_INIT { 0, 0, 0 } + +static void reset(buf *b) { b->n = 0; } + +static int put(buf *b, int ch) +{ + size_t w; + + if (b->n >= b->sz) { + if (!b->sz) { + w = 64; + b->b = malloc(w); + } else { + w = b->sz * 2; + b->b = realloc(b->b, w); + } + if (!b->b) { + fprintf(stderr, "%s: Not enough memory for buffer!\n", ego); + return (-1); + } + b->sz = w; + } + b->b[b->n++] = ch; + return (0); +} + +#define TABSTOP(n) (((n) + 8u) & ~7u) + +static int space(const char *name) +{ + static buf b = BUF_INIT; + FILE *fin, *fout = stdout; + char *newname = 0, *oldname = 0; + int rc = TROUBLE, status = OK; + int last = '\n'; + unsigned nsp = 0, nwsp = 0, hpos = 0, ohpos = 0, nhpos = 0, nl = 1; + unsigned i; +#define f_newline 1u +#define f_warnspacetab 2u +#define f_tabify 4u +#define f_warntabs 8u +#define f_warnspaces 16u +#define f_tab 32u +#define f_bad 64u +#define f_forced 128u + unsigned f = f_newline | (flags & F_TABIFY ? f_tabify : 0); + int ch; + + if (strcmp(name, "-") == 0) { + if (flags & F_INPLACE) { + fprintf(stderr, "%s: Can't modify stdin in-place.\n", ego); + goto done_0; + } + fin = stdin; + } else { + if ((fin = fopen(name, "r")) == 0) { + fprintf(stderr, "%s: Failed to open file `%s': %s.\n", + ego, name, strerror(errno)); + goto done_0; + } + else if (flags & F_INPLACE) { + struct stat st; + if (stat(name, &st)) { + fprintf(stderr, "%s: Can't stat `%s': %s.\n", + ego, name, strerror(errno)); + goto done_1; + } + if ((fout = freshname(name, &newname, st.st_mode)) == 0) + goto done_1; + } + } + if (flags & F_CHECK) + fout = 0; + + for (;;) { + ch = getc(fin); + switch (ch) { + case ' ': + nsp++; nwsp++; hpos++; + if (put(&b, ' ')) goto done_2; + break; + case '\t': + if (flags & F_UNTABIFY) { + if ((flags & F_CHECK) && !(f & f_warntabs)) { + fprintf(stderr, "%s:%u: found tab\n", name, nl); + f |= f_warntabs; + status = BADNESS; + } + } else if (((flags & F_MIDLINETABS) || (f & f_newline)) && nsp) { + if ((flags & F_VERBOSE) && !(f & f_warnspacetab)) { + fprintf(stderr, "%s:%u: space followed by tab\n", name, nl); + f |= f_warnspacetab; + status = BADNESS; + } + f |= f_tabify | f_forced; + } + f |= f_tab; + nsp = 0; nwsp++; hpos = TABSTOP(hpos); + if (put(&b, '\t')) goto done_2; + break; + case EOF: + if (nwsp || !(f & f_newline)) { + if (flags & F_VERBOSE) + fprintf(stderr, "%s:%u: file ends in mid-line\n", name, nl); + status = BADNESS; + if (fout) putc('\n', fout); + } + goto end; + case '\n': + case '\v': + if (nwsp && (flags & F_VERBOSE)) { + fprintf(stderr, "%s:%u: trailing whitespace\n", name, nl); + status = BADNESS; + } + if (fout) putc('\n', fout); + reset(&b); + nsp = nwsp = hpos = ohpos = 0; nl++; + f |= f_newline; + f &= ~(f_tab | f_warnspacetab | f_warntabs | f_warnspaces); + if (flags & F_TABIFY) + f |= f_tabify; + else + f &= ~f_tabify; + last = '\n'; + break; + default: + if (nwsp) { + if (flags & F_UNTABIFY) { + if (fout) for (; ohpos < hpos; ohpos++) putc(' ', fout); + } else if ((f & f_tabify) && + ((hpos - ohpos >= (last == '.' || last == ':' ? + 3 : 2)) || + (f & (f_tab | f_newline)))) { + i = 0; + for (;;) { + nhpos = TABSTOP(ohpos); + if (nhpos > hpos) break; + if (fout) putc('\t', fout); + if ((flags & F_VERBOSE) && (flags & F_TABIFY) && + i < b.n && b.b[i] != '\t' && + !(f & (f_warnspaces | f_forced))) { + fprintf(stderr, "%s:%u: spaces could be turned into tabs\n", + name, nl); + f |= f_warnspaces; + } + ohpos = nhpos; + i++; + } + if (fout) + for (; ohpos < hpos; ohpos++) putc(' ', fout); + } else if (fout) + for (i = 0; i < b.n; i++) putc(b.b[i], fout); + } + reset(&b); + f &= ~(f_newline | f_tab | f_forced); + if (!(flags & F_TABIFY) || !(flags & F_MIDLINETABS)) f &= ~f_tabify; + nwsp = nsp = 0; + hpos++; ohpos = hpos; + if (fout) putc(ch, fout); + if (ch != '"' && ch != '\'') + last = ch; + break; + } + } +end:; + + if (ferror(fin)) { + fprintf(stderr, "%s: Error reading `%s': %s\n", + ego, name, strerror(errno)); + goto done_2; + } + + if (fout) { + if (fflush(fout) || ferror(fout)) f |= f_bad; + if (fout != stdout && fclose(fout)) f |= f_bad; + fout = 0; + if (f & f_bad) { + fprintf(stderr, "%s: Error writing `%s': %s\n", + ego, newname, strerror(errno)); + goto done_2; + } + } + + if (flags & F_INPLACE) { + if (bkp) { + if ((oldname = augment(name, bkp)) == 0) + goto done_2; + if (rename(name, oldname)) { + fprintf(stderr, "%s: Failed to back up `%s' as `%s': %s\n", + ego, name, oldname, strerror(errno)); + goto done_2; + } + } + if (rename(newname, name)) { + if (oldname) rename(oldname, name); + fprintf(stderr, "%s: Failed to install `%s' as `%s': %s\n", + ego, newname, name, strerror(errno)); + goto done_2; + } + } + + rc = status; + +done_2: + if (oldname) free(oldname); + if (newname) { + remove(newname); + free(newname); + } +done_1: + if (fout && fout != stdout) fclose(fout); + fclose(fin); +done_0: + return (rc); +} + +static int manysetp(unsigned f) { return (!!(f & (f - 1))); } + +int main(int argc, char *argv[]) +{ + int i; + int rc = OK, st; + + if ((ego = strrchr(argv[0], '/')) == 0) + ego = argv[0]; + else + ego++; + + for (;;) { + if ((i = getopt(argc, argv, "h" "cmtuv" "i::")) < 0) + break; + switch (i) { + case 'h': + printf("%s -- remove extraneous spaces from files\n\n", ego); + usage(stdout); + fputs("Options:\n\ + -h Print this help text\n\ + -c Check files for badness, but don't produce other output\n\ + -m Fix spaces followed by tabs in mid-line\n\ + -t Tabify file completely\n\ + -u Untabify file completely\n\ + -i[BKP] Modify files in place; leave FILEBKP as copy of old FILE\n\ +", stdout); + exit(0); + case 'i': + bkp = optarg; + flags |= F_INPLACE; + break; + case 'm': + flags |= F_MIDLINETABS; + break; + case 'c': + flags |= F_CHECK; + break; + case 't': + flags |= F_TABIFY; + break; + case 'u': + flags |= F_UNTABIFY; + break; + case 'v': + flags |= F_VERBOSE; + break; + default: + flags |= F_BOGUS; + break; + } + } + if (flags & F_BOGUS) { + usage(stderr); + exit(TROUBLE); + } + if (manysetp(flags & (F_CHECK | F_INPLACE))) { + fprintf(stderr, "%s: Options -c and -i are mutually exclusive.\n", ego); + exit(TROUBLE); + } + if (manysetp(flags & (F_TABIFY | F_UNTABIFY))) { + fprintf(stderr, "%s: Options -t and -u are mutually exclusive.\n", ego); + exit(TROUBLE); + } + + if (optind == argc) { + if (isatty(0)) { + fprintf(stderr, "%s: No options given and stdin is a terminal.\n", + ego); + exit(TROUBLE); + } + rc = space("-"); + } else for (i = optind; i < argc; i++) { + st = space(argv[i]); + if (st > rc) rc = st; + } + if (rc == BADNESS && !(flags & F_CHECK)) + rc = OK; + return (rc); +} diff --git a/spacetest b/spacetest new file mode 100644 index 0000000..73d59ee --- /dev/null +++ b/spacetest @@ -0,0 +1,11 @@ + <- 10 spaces + <- two tabs + <- space space tab, space -> +foo -> <- space tab +foo -> <- tab and eight spaces +waffle> <- one space +stuff. <- two spaces +blah. <- three spaces +blah. <- four spaces +stuff- <- two spaces +eof -> \ No newline at end of file