From efae42a671ed553b5fd9967204aab11516010074 Mon Sep 17 00:00:00 2001 Message-Id: From: Mark Wooding Date: Sun, 8 Oct 2000 09:43:35 +0000 Subject: [PATCH] New quoted string handling and simple pattern matching. Organization: Straylight/Edgeware From: mdw --- man/str.3 | 67 +++++++++++++++-- str.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++------ str.h | 66 +++++++++++++++-- 3 files changed, 316 insertions(+), 33 deletions(-) diff --git a/man/str.3 b/man/str.3 index 1484fb5..e91fc13 100644 --- a/man/str.3 +++ b/man/str.3 @@ -14,15 +14,22 @@ .TH str 3 "20 June 1999" mLib .SH NAME str \- small string utilities +.\" @str_qword +.\" @str_qsplit .\" @str_getword .\" @str_split +.\" @str_match .\" @str_sanitize .SH SYNOPSIS .nf .B "#include " +.BI "char *str_qword(char **" pp ", unsigned " f ); +.BI "size_t str_qsplit(char *" p ", char *" v "[], size_t " c , +.BI " char **" rest ", unsigned " f ); .BI "char *str_getword(char **" pp ); .BI "size_t str_split(char *" p ", char *" v "[], size_t " c ", char **" rest ); +.BI "int str_match(const char *" p ", const char *" s ); .BI "void str_sanitize(char *" d ", const char *" p ", size_t " sz ); .fi .SH DESCRIPTION @@ -32,21 +39,26 @@ contains a few small utility functions for manipulating null-terminated strings. .PP The function -.B str_getword +.B str_qword extracts the next whitespace-delimited word from a string. The function's argument, .IR pp , is the address of a pointer into the string: this pointer is updated by -.B str_getword +.B str_qword so that it can extract the following word on the next call and so on. The return value is the address of the next word, appropriately null terminated. A null pointer is returned if the entire remainder of the string is whitespace. Note that -.B str_getword +.B str_qword modifies the string as it goes, to null-terminate the individual words. +If the flag +.B STRF_QUOTE +is passed, the single- and double-quote characters may be used to quote +whitespace within words, and the backslash can escape quote characters +and whitespace. .PP The function -.B str_split +.B str_qsplit divides a string into whitespace-separated words. The arguments are as follows: .TP @@ -74,11 +86,56 @@ storing. If the remainder string is empty, a null pointer is stored instead. If .I rest is null, the remainder pointer is discarded. +.TP +.BI "unsigned " f +Flags, as for +.BR str_qsplit . .PP The return value of -.B str_split +.B str_qsplit is the number of words extracted from the input string. .PP +The functions +.B str_getword +and +.B str_split +are veneers over +.B str_qword +and +.B str_qsplit +respectively; they are equivalent to calls to the latter functions with +flags words of zero. +.PP +The +.B str_match +function does simple wildcard matching. The first argument is a +pattern, which may contain metacharacters: +.RB ` * ' +matches zero or more arbitrary characters; +.RB ` ? ' +matches exactly one arbitrary characters; and +.RB ` [ ... ] ' +matches one of the characters listed. The backslash +.RB ` \e ' +escapes the following character. Within square brackets, the +hyphen +.RB ` \- ' +may be used to designate ranges of characters. If the initial character +is +.RB ` ! ' +or +.RB ` ^ ' +then the sense of the match is reversed. To literally match a +.RB ` ] ' +character, list it first; to literally match a +.RB ` \- ' +character, list it immediately after a range, or at the beginning or end +of the set. The return value is nonzero if the pattern +.I p +matches the given string +.IR s , +or zero if the pattern doesn't match. +.PP The function .B str_sanitize copies at most diff --git a/str.c b/str.c index 0defaa7..711ccf7 100644 --- a/str.c +++ b/str.c @@ -1,6 +1,6 @@ /* -*-c-*- * - * $Id: str.c,v 1.3 1999/12/22 15:41:14 mdw Exp $ + * $Id: str.c,v 1.4 2000/10/08 09:43:34 mdw Exp $ * * Functions for hacking with strings * @@ -30,6 +30,9 @@ /*----- Revision history --------------------------------------------------* * * $Log: str.c,v $ + * Revision 1.4 2000/10/08 09:43:34 mdw + * New quoted string handling and simple pattern matching. + * * Revision 1.3 1999/12/22 15:41:14 mdw * Skip past trailing whitespace in str_getword. * @@ -52,48 +55,91 @@ /*----- Main code ---------------------------------------------------------*/ -/* --- @str_getword@ --- * +/* --- @str_qword@ --- * * * Arguments: @char **pp@ = address of pointer into string + * @unsigned f@ = various flags * - * Returns: Pointer to the next space-separated word from the string, - * or null. + * Returns: Pointer to the next space-separated possibly-quoted word from + * the string, or null. * - * Use: Parses off space-separated words from a string. + * Use: Fetches the next word from a string. If the flag + * @STRF_QUOTE@ is set, the `\' character acts as an escape, and + * single and double quotes protect whitespace. */ -char *str_getword(char **pp) +#define STRF_QUOTE 1u + +char *str_qword(char **pp, unsigned f) { - char *p = *pp, *q; + char *p = *pp, *q, *qq; + int st = 0, pst = 0; + + /* --- Preliminaries --- */ if (!p) return (0); - while (isspace((unsigned char)*p)) p++; + if (!*p) { + *pp = 0; + return (0); + } + + /* --- Main work --- */ - for (q = p; *q; q++) { - if (isspace((unsigned char)*q)) { - *q++ = 0; - while (*q && isspace((unsigned char)*q)) - q++; - if (!*q) - q = 0; - *pp = q; - return (p); + for (q = qq = p; *q; q++) { + switch (st) { + case '\\': + *qq++ = *q; + st = pst; + break; + case '\'': + case '\"': + if (*q == st) + st = pst = 0; + else if (*q == '\\') + st = '\\'; + else + *qq++ = *q; + break; + default: + if (isspace((unsigned char)*q)) { + do q++; while (*q && isspace((unsigned char)*q)); + goto done; + } else if (!(f & STRF_QUOTE)) + goto stdchar; + switch (*q) { + case '\\': + st = '\\'; + break; + case '\'': + case '\"': + st = pst = *q; + break; + default: + stdchar: + *qq++ = *q; + break; + } } } - *pp = 0; + /* --- Finished --- */ + +done: + *pp = *q ? q : 0; + *qq++ = 0; return (p); } -/* --- @str_split@ --- * +/* --- @str_qsplit@ --- * * * Arguments: @char *p@ = pointer to string * @char *v[]@ = pointer to array to fill in * @size_t c@ = count of strings to fill in * @char **rest@ = where to store the remainder of the string + * @unsigned f@ = flags for @str_qword@ * * Returns: Number of strings filled in. * @@ -108,12 +154,12 @@ char *str_getword(char **pp) * @rest@ is set to a null pointer. */ -size_t str_split(char *p, char *v[], size_t c, char **rest) +size_t str_qsplit(char *p, char *v[], size_t c, char **rest, unsigned f) { size_t n = 0; char *q; - while (c && (q = str_getword(&p)) != 0) { + while (c && (q = str_qword(&p, f)) != 0) { *v++ = q; c--; n++; @@ -127,6 +173,134 @@ size_t str_split(char *p, char *v[], size_t c, char **rest) return (n); } +/* --- @str_getword@ --- * + * + * Arguments: @char **pp@ = address of pointer into string + * + * Returns: Pointer to the next space-separated word from the string, + * or null. + * + * Use: Parses off space-separated words from a string. This is a + * compatibility veneer over @str_qword@. + */ + +char *str_getword(char **pp) +{ + return (str_qword(pp, 0)); +} + +/* --- @str_split@ --- * + * + * Arguments: @char *p@ = pointer to string + * @char *v[]@ = pointer to array to fill in + * @size_t c@ = count of strings to fill in + * @char **rest@ = where to store the remainder of the string + * + * Returns: Number of strings filled in. + * + * Use: Fills an array with pointers to the individual words of a + * string. This is a compatibility veneer over @str_qsplit@. + */ + +size_t str_split(char *p, char *v[], size_t c, char **rest) +{ + return (str_qsplit(p, v, c, rest, 0)); +} + +/* --- @str_match@ --- * + * + * Arguments: @const char *p@ = pointer to pattern string + * @const char *s@ = string to compare with + * + * Returns: Nonzero if the pattern matches the string. + * + * Use: Does simple wildcard matching. This is quite nasty and more + * than a little slow. Supports metacharacters `*', `?' and + * '['. + */ + +int str_match(const char *p, const char *s) +{ + for (;;) { + char pch = *p++, pche, sch; + int sense; + + switch (pch) { + case '?': + if (!*s) + return (0); + s++; + break; + case '*': + if (!*p) + return (1); + while (*s) { + if (str_match(p, s)) + return (1); + s++; + } + return (0); + case '[': + if (!*s) + return (0); + sch = *s++; + pch = *p++; + sense = 1; + if (pch == '^' || pch == '!') { + sense = !sense; + pch = *p++; + } + if (pch == ']') { + if (*p == '-' && p[1] && p[1] != ']') { + pche = p[1]; + p += 2; + if (pch <= sch && sch <= pche) + goto class_match; + } else if (pch == sch) + goto class_match; + pch = *p++; + } + for (;; pch = *p++) { + if (!pch || pch == ']') + goto class_nomatch; + if (*p == '-' && p[1] && p[1] != ']') { + pche = p[1]; + p += 2; + if (pch <= sch && sch <= pche) + goto class_match; + } else if (pch == sch) + goto class_match; + } + class_match: + if (!sense) + return (0); + for (;;) { + pch = *p++; + if (!pch) + return (0); + if (pch == ']') + break; + if (*p == '-' && p[1] && p[1] != ']') + p += 2; + } + break; + class_nomatch: + if (sense) + return (0); + break; + case '\\': + pch = *p++; + default: + if (pch != *s) + return (0); + if (!pch) + return (1); + s++; + break; + } + } +} + /* --- @str_sanitize@ --- * * * Arguments: @char *d@ = destination buffer diff --git a/str.h b/str.h index 1581e50..5c5d300 100644 --- a/str.h +++ b/str.h @@ -1,6 +1,6 @@ /* -*-c-*- * - * $Id: str.h,v 1.3 1999/12/10 23:42:04 mdw Exp $ + * $Id: str.h,v 1.4 2000/10/08 09:43:34 mdw Exp $ * * Functions for hacking with strings * @@ -30,6 +30,9 @@ /*----- Revision history --------------------------------------------------* * * $Log: str.h,v $ + * Revision 1.4 2000/10/08 09:43:34 mdw + * New quoted string handling and simple pattern matching. + * * Revision 1.3 1999/12/10 23:42:04 mdw * Change header file guard names. * @@ -54,24 +57,30 @@ /*----- Functions provided ------------------------------------------------*/ -/* --- @str_getword@ --- * +/* --- @str_qword@ --- * * * Arguments: @char **pp@ = address of pointer into string + * @unsigned f@ = various flags * - * Returns: Pointer to the next space-separated word from the string, - * or null. + * Returns: Pointer to the next space-separated possibly-quoted word from + * the string, or null. * - * Use: Parses off space-separated words from a string. + * Use: Fetches the next word from a string. If the flag + * @STRF_QUOTE@ is set, the `\' character acts as an escape, and + * single and double quotes protect whitespace. */ -extern char *str_getword(char **/*pp*/); +#define STRF_QUOTE 1u -/* --- @str_split@ --- * +extern char *str_qword(char **/*pp*/, unsigned /*f*/); + +/* --- @str_qsplit@ --- * * * Arguments: @char *p@ = pointer to string * @char *v[]@ = pointer to array to fill in * @size_t c@ = count of strings to fill in * @char **rest@ = where to store the remainder of the string + * @unsigned f@ = flags for @str_qword@ * * Returns: Number of strings filled in. * @@ -86,9 +95,52 @@ extern char *str_getword(char **/*pp*/); * @rest@ is set to a null pointer. */ +extern size_t str_qsplit(char */*p*/, char */*v*/[], size_t /*c*/, + char **/*rest*/, unsigned /*f*/); + +/* --- @str_getword@ --- * + * + * Arguments: @char **pp@ = address of pointer into string + * + * Returns: Pointer to the next space-separated word from the string, + * or null. + * + * Use: Parses off space-separated words from a string. This is a + * compatibility veneer over @str_qword@. + */ + +extern char *str_getword(char **/*pp*/); + +/* --- @str_split@ --- * + * + * Arguments: @char *p@ = pointer to string + * @char *v[]@ = pointer to array to fill in + * @size_t c@ = count of strings to fill in + * @char **rest@ = where to store the remainder of the string + * + * Returns: Number of strings filled in. + * + * Use: Fills an array with pointers to the individual words of a + * string. This is a compatibility veneer over @str_qsplit@. + */ + extern size_t str_split(char */*p*/, char */*v*/[], size_t /*c*/, char **/*rest*/); +/* --- @str_match@ --- * + * + * Arguments: @const char *p@ = pointer to pattern string + * @const char *s@ = string to compare with + * + * Returns: Nonzero if the pattern matches the string. + * + * Use: Does simple wildcard matching. This is quite nasty and more + * than a little slow. Supports metacharacters `*', `?' and + * '['. + */ + +extern int str_match(const char */*p*/, const char */*s*/); + /* --- @str_sanitize@ --- * * * Arguments: @char *d@ = destination buffer -- [mdw]