--- /dev/null
+.\" -*-nroff-*-
+.ie t .ds o \(bu
+.el .ds o *
+.de hP
+.IP
+\fB\h'-\w'\\$1'u-1m'\\$1\h'1m'\fP\c
+..
+.TH space 1 "27 January 2007" "Straylight/Edgeware"
+.SH NAME
+space \- check and fix whitespace in files
+.SH SYNOPSIS
+.B space
+.RB [ \-cmtuv ]
+.RB [ \-i \c
+.RI [ backup ]]
+.RI [ file ...]
+.SH DESCRIPTION
+The (low-budget)
+.B space
+program identifies and fixes problematic use of whitespace in text
+files. In particular, it will notice and (if you want) fix
+.hP \*o
+.I trailing whitespace
+\(en tabs and spaces just before a newline;
+.hP \*o
+.I spaces before tabs
+\(en which are obviously pointless;
+.hP \*o
+.I end-of-file in mid-line
+\(en which confuses a number of programs,
+.BR diff (1)
+not least;
+.hP \*o groups of spaces
+which could be replaced by tabs; or
+.hP \*o
+.I all tab characters
+\(en replacing them with appropriate numbers of spaces.
+.PP
+The program is capable of fixing files in place (leaving backups if you
+ask for them).
+.PP
+The following command-line options are recognized.
+.TP
+.B \-h
+Give a short help message describing the options available.
+.TP
+.B \-c
+Just check files, exiting zero if they're all OK, or 1 if there are
+whitespace problems with any of them; exit 32 if there are actual
+errors. You may want the
+.B \-v
+option with this; without it,
+.B space
+is silent.
+.TP
+.BR \-i [ \fIbackup ]
+Modify all the
+.IR file s
+in place. During processing, the fixed copy of the file is written to
+some new name not currently in use. If the processing was successful,
+the old
+.I file
+is renamed to
+.I filebackup
+and the new file is renamed to
+.IR file .
+If something very complicated goes wrong at just the right moment, it's
+possible that you'll be left with just
+.I filebackup
+but that's not very likely. The new file is created with the same
+permissions as the old file, but no attempt is made to give it the same
+owner or anything like that.
+.TP
+.B \-m
+Worry about tabs in the middle of lines as well as tabs forming the
+initial indentation. Normally, identify (and fix) tabs preceded by
+spaces in the middle of lines; with
+.BR \-t ,
+fix all groups of whitespace which could be turned into tabs.
+.TP
+.B \-t
+Turn runs of horizontal whitespace into tabs where possible.
+With
+.BR \-c ,
+only complain about runs of space which could be turned into tabs but
+don't actually do anything. By default, only runs of whitespace at the
+starts of lines are affected. With
+.BR \-m ,
+however, all runs of whitespace are considered to be fair game. The
+rules for internal whitespace are as follows. A run at the start of a
+line, or which contains a tab is always fair game; a run of two or more
+spaces is fair game
+.I unless
+it follows a
+.RB ` . '
+or
+.RB ` : '
+character, either immediately or separated only by
+.RB ` """" '
+or
+.RB ` ' '
+characters, in which case three spaces are required. This is because
+some typists (including the author) put two spaces after a full stop,
+and don't want them randomly turning into tabs for no good reason.
+.TP
+.B \-u
+Turn tabs into appropriate numbers of spaces. With
+.BR \-c ,
+only complain about finding tabs, but don't actually do anything. All
+tabs are considered fair game here and the
+.B \-m
+option is ignored.
+.PP
+Tab stops are assumed to occur every eight columns, as is usual.
+.SH BUGS
+There's no way yet of controlling where the tab stops are.
+.PP
+This space program doesn't even attempt moon landings. The space man
+page doesn't have wiggly antennae.
+.SH AUTHOR
+Mark Wooding, <mdw@distorted.org.uk>
--- /dev/null
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <unistd.h>
+
+enum {
+ OK = 0,
+ BADNESS = 1,
+ TROUBLE = 32
+};
+
+static const char *ego = "<unset>";
+
+static const char *bkp = 0;
+
+static unsigned flags = 0;
+#define F_MIDLINETABS 1u
+#define F_INPLACE 2u
+#define F_CHECK 4u
+#define F_BOGUS 8u
+#define F_UNTABIFY 16u
+#define F_TABIFY 32u
+#define F_VERBOSE 64u
+
+static void usage(FILE *fp)
+ { fprintf(fp, "Usage: %s [-cmtuv] [-i[BKP]] [FILE...]\n\n", ego); }
+
+static char *augment(const char *name, const char *suffix)
+{
+ size_t n = strlen(name), nn = strlen(suffix);
+ char *p = malloc(n + nn + 1);
+
+ if (!p) {
+ fprintf(stderr, "%s: Out of memory!\n", ego);
+ return (0);
+ }
+ memcpy(p, name, n);
+ memcpy(p + n, suffix, nn + 1);
+ return (p);
+}
+
+static FILE *freshname(const char *name, char **newname, mode_t mode)
+{
+ char buf[16];
+ int i;
+ int fd;
+ FILE *fp;
+ char *n;
+
+ for (i = 0; i < 32767; i++) {
+ sprintf(buf, ".new%d", i);
+ if ((n = augment(name, buf)) == 0)
+ goto fail_0;
+ if ((fd = open(n, O_WRONLY | O_CREAT | O_EXCL, mode)) < 0) {
+ if (errno == EEXIST) {
+ free(n);
+ continue;
+ }
+ fprintf(stderr, "%s: Can't create new file for `%s': %s\n",
+ ego, name, strerror(errno));
+ goto fail_1;
+ }
+ goto win;
+ }
+ fprintf(stderr, "%s: Can't find new file to update `%s'\n", ego, name);
+ goto fail_1;
+
+win:
+ if (chmod(n, mode)) {
+ fprintf(stderr, "%s: Can't set permissions on `%s': %s\n",
+ ego, n, strerror(errno));
+ goto fail_2;
+ }
+ if ((fp = fdopen(fd, "w")) == 0) {
+ fprintf(stderr, "%s: fdopen on `%s' failed: %s\n",
+ ego, n, strerror(errno));
+ goto fail_2;
+ }
+ *newname = n;
+ return (fp);
+
+fail_2:
+ close(fd);
+fail_1:
+ free(n);
+fail_0:
+ return (0);
+}
+
+typedef struct buf {
+ char *b;
+ size_t n;
+ size_t sz;
+} buf;
+#define BUF_INIT { 0, 0, 0 }
+
+static void reset(buf *b) { b->n = 0; }
+
+static int put(buf *b, int ch)
+{
+ size_t w;
+
+ if (b->n >= b->sz) {
+ if (!b->sz) {
+ w = 64;
+ b->b = malloc(w);
+ } else {
+ w = b->sz * 2;
+ b->b = realloc(b->b, w);
+ }
+ if (!b->b) {
+ fprintf(stderr, "%s: Not enough memory for buffer!\n", ego);
+ return (-1);
+ }
+ b->sz = w;
+ }
+ b->b[b->n++] = ch;
+ return (0);
+}
+
+#define TABSTOP(n) (((n) + 8u) & ~7u)
+
+static int space(const char *name)
+{
+ static buf b = BUF_INIT;
+ FILE *fin, *fout = stdout;
+ char *newname = 0, *oldname = 0;
+ int rc = TROUBLE, status = OK;
+ int last = '\n';
+ unsigned nsp = 0, nwsp = 0, hpos = 0, ohpos = 0, nhpos = 0, nl = 1;
+ unsigned i;
+#define f_newline 1u
+#define f_warnspacetab 2u
+#define f_tabify 4u
+#define f_warntabs 8u
+#define f_warnspaces 16u
+#define f_tab 32u
+#define f_bad 64u
+#define f_forced 128u
+ unsigned f = f_newline | (flags & F_TABIFY ? f_tabify : 0);
+ int ch;
+
+ if (strcmp(name, "-") == 0) {
+ if (flags & F_INPLACE) {
+ fprintf(stderr, "%s: Can't modify stdin in-place.\n", ego);
+ goto done_0;
+ }
+ fin = stdin;
+ } else {
+ if ((fin = fopen(name, "r")) == 0) {
+ fprintf(stderr, "%s: Failed to open file `%s': %s.\n",
+ ego, name, strerror(errno));
+ goto done_0;
+ }
+ else if (flags & F_INPLACE) {
+ struct stat st;
+ if (stat(name, &st)) {
+ fprintf(stderr, "%s: Can't stat `%s': %s.\n",
+ ego, name, strerror(errno));
+ goto done_1;
+ }
+ if ((fout = freshname(name, &newname, st.st_mode)) == 0)
+ goto done_1;
+ }
+ }
+ if (flags & F_CHECK)
+ fout = 0;
+
+ for (;;) {
+ ch = getc(fin);
+ switch (ch) {
+ case ' ':
+ nsp++; nwsp++; hpos++;
+ if (put(&b, ' ')) goto done_2;
+ break;
+ case '\t':
+ if (flags & F_UNTABIFY) {
+ if ((flags & F_CHECK) && !(f & f_warntabs)) {
+ fprintf(stderr, "%s:%u: found tab\n", name, nl);
+ f |= f_warntabs;
+ status = BADNESS;
+ }
+ } else if (((flags & F_MIDLINETABS) || (f & f_newline)) && nsp) {
+ if ((flags & F_VERBOSE) && !(f & f_warnspacetab)) {
+ fprintf(stderr, "%s:%u: space followed by tab\n", name, nl);
+ f |= f_warnspacetab;
+ status = BADNESS;
+ }
+ f |= f_tabify | f_forced;
+ }
+ f |= f_tab;
+ nsp = 0; nwsp++; hpos = TABSTOP(hpos);
+ if (put(&b, '\t')) goto done_2;
+ break;
+ case EOF:
+ if (nwsp || !(f & f_newline)) {
+ if (flags & F_VERBOSE)
+ fprintf(stderr, "%s:%u: file ends in mid-line\n", name, nl);
+ status = BADNESS;
+ if (fout) putc('\n', fout);
+ }
+ goto end;
+ case '\n':
+ case '\v':
+ if (nwsp && (flags & F_VERBOSE)) {
+ fprintf(stderr, "%s:%u: trailing whitespace\n", name, nl);
+ status = BADNESS;
+ }
+ if (fout) putc('\n', fout);
+ reset(&b);
+ nsp = nwsp = hpos = ohpos = 0; nl++;
+ f |= f_newline;
+ f &= ~(f_tab | f_warnspacetab | f_warntabs | f_warnspaces);
+ if (flags & F_TABIFY)
+ f |= f_tabify;
+ else
+ f &= ~f_tabify;
+ last = '\n';
+ break;
+ default:
+ if (nwsp) {
+ if (flags & F_UNTABIFY) {
+ if (fout) for (; ohpos < hpos; ohpos++) putc(' ', fout);
+ } else if ((f & f_tabify) &&
+ ((hpos - ohpos >= (last == '.' || last == ':' ?
+ 3 : 2)) ||
+ (f & (f_tab | f_newline)))) {
+ i = 0;
+ for (;;) {
+ nhpos = TABSTOP(ohpos);
+ if (nhpos > hpos) break;
+ if (fout) putc('\t', fout);
+ if ((flags & F_VERBOSE) && (flags & F_TABIFY) &&
+ i < b.n && b.b[i] != '\t' &&
+ !(f & (f_warnspaces | f_forced))) {
+ fprintf(stderr, "%s:%u: spaces could be turned into tabs\n",
+ name, nl);
+ f |= f_warnspaces;
+ }
+ ohpos = nhpos;
+ i++;
+ }
+ if (fout)
+ for (; ohpos < hpos; ohpos++) putc(' ', fout);
+ } else if (fout)
+ for (i = 0; i < b.n; i++) putc(b.b[i], fout);
+ }
+ reset(&b);
+ f &= ~(f_newline | f_tab | f_forced);
+ if (!(flags & F_TABIFY) || !(flags & F_MIDLINETABS)) f &= ~f_tabify;
+ nwsp = nsp = 0;
+ hpos++; ohpos = hpos;
+ if (fout) putc(ch, fout);
+ if (ch != '"' && ch != '\'')
+ last = ch;
+ break;
+ }
+ }
+end:;
+
+ if (ferror(fin)) {
+ fprintf(stderr, "%s: Error reading `%s': %s\n",
+ ego, name, strerror(errno));
+ goto done_2;
+ }
+
+ if (fout) {
+ if (fflush(fout) || ferror(fout)) f |= f_bad;
+ if (fout != stdout && fclose(fout)) f |= f_bad;
+ fout = 0;
+ if (f & f_bad) {
+ fprintf(stderr, "%s: Error writing `%s': %s\n",
+ ego, newname, strerror(errno));
+ goto done_2;
+ }
+ }
+
+ if (flags & F_INPLACE) {
+ if (bkp) {
+ if ((oldname = augment(name, bkp)) == 0)
+ goto done_2;
+ if (rename(name, oldname)) {
+ fprintf(stderr, "%s: Failed to back up `%s' as `%s': %s\n",
+ ego, name, oldname, strerror(errno));
+ goto done_2;
+ }
+ }
+ if (rename(newname, name)) {
+ if (oldname) rename(oldname, name);
+ fprintf(stderr, "%s: Failed to install `%s' as `%s': %s\n",
+ ego, newname, name, strerror(errno));
+ goto done_2;
+ }
+ }
+
+ rc = status;
+
+done_2:
+ if (oldname) free(oldname);
+ if (newname) {
+ remove(newname);
+ free(newname);
+ }
+done_1:
+ if (fout && fout != stdout) fclose(fout);
+ fclose(fin);
+done_0:
+ return (rc);
+}
+
+static int manysetp(unsigned f) { return (!!(f & (f - 1))); }
+
+int main(int argc, char *argv[])
+{
+ int i;
+ int rc = OK, st;
+
+ if ((ego = strrchr(argv[0], '/')) == 0)
+ ego = argv[0];
+ else
+ ego++;
+
+ for (;;) {
+ if ((i = getopt(argc, argv, "h" "cmtuv" "i::")) < 0)
+ break;
+ switch (i) {
+ case 'h':
+ printf("%s -- remove extraneous spaces from files\n\n", ego);
+ usage(stdout);
+ fputs("Options:\n\
+ -h Print this help text\n\
+ -c Check files for badness, but don't produce other output\n\
+ -m Fix spaces followed by tabs in mid-line\n\
+ -t Tabify file completely\n\
+ -u Untabify file completely\n\
+ -i[BKP] Modify files in place; leave FILEBKP as copy of old FILE\n\
+", stdout);
+ exit(0);
+ case 'i':
+ bkp = optarg;
+ flags |= F_INPLACE;
+ break;
+ case 'm':
+ flags |= F_MIDLINETABS;
+ break;
+ case 'c':
+ flags |= F_CHECK;
+ break;
+ case 't':
+ flags |= F_TABIFY;
+ break;
+ case 'u':
+ flags |= F_UNTABIFY;
+ break;
+ case 'v':
+ flags |= F_VERBOSE;
+ break;
+ default:
+ flags |= F_BOGUS;
+ break;
+ }
+ }
+ if (flags & F_BOGUS) {
+ usage(stderr);
+ exit(TROUBLE);
+ }
+ if (manysetp(flags & (F_CHECK | F_INPLACE))) {
+ fprintf(stderr, "%s: Options -c and -i are mutually exclusive.\n", ego);
+ exit(TROUBLE);
+ }
+ if (manysetp(flags & (F_TABIFY | F_UNTABIFY))) {
+ fprintf(stderr, "%s: Options -t and -u are mutually exclusive.\n", ego);
+ exit(TROUBLE);
+ }
+
+ if (optind == argc) {
+ if (isatty(0)) {
+ fprintf(stderr, "%s: No options given and stdin is a terminal.\n",
+ ego);
+ exit(TROUBLE);
+ }
+ rc = space("-");
+ } else for (i = optind; i < argc; i++) {
+ st = space(argv[i]);
+ if (st > rc) rc = st;
+ }
+ if (rc == BADNESS && !(flags & F_CHECK))
+ rc = OK;
+ return (rc);
+}