.TH str 3 "20 June 1999" mLib
.SH NAME
str \- small string utilities
+.\" @str_qword
+.\" @str_qsplit
.\" @str_getword
.\" @str_split
+.\" @str_match
.\" @str_sanitize
.SH SYNOPSIS
.nf
.B "#include <mLib/str.h>"
+.BI "char *str_qword(char **" pp ", unsigned " f );
+.BI "size_t str_qsplit(char *" p ", char *" v "[], size_t " c ,
+.BI " char **" rest ", unsigned " f );
.BI "char *str_getword(char **" pp );
.BI "size_t str_split(char *" p ", char *" v "[], size_t " c ", char **" rest );
+.BI "int str_match(const char *" p ", const char *" s );
.BI "void str_sanitize(char *" d ", const char *" p ", size_t " sz );
.fi
.SH DESCRIPTION
strings.
.PP
The function
-.B str_getword
+.B str_qword
extracts the next whitespace-delimited word from a string. The
function's argument,
.IR pp ,
is the address of a pointer into the string: this pointer is updated by
-.B str_getword
+.B str_qword
so that it can extract the following word on the next call and so on.
The return value is the address of the next word, appropriately null
terminated. A null pointer is returned if the entire remainder of the
string is whitespace. Note that
-.B str_getword
+.B str_qword
modifies the string as it goes, to null-terminate the individual words.
+If the flag
+.B STRF_QUOTE
+is passed, the single- and double-quote characters may be used to quote
+whitespace within words, and the backslash can escape quote characters
+and whitespace.
.PP
The function
-.B str_split
+.B str_qsplit
divides a string into whitespace-separated words. The arguments are as
follows:
.TP
instead. If
.I rest
is null, the remainder pointer is discarded.
+.TP
+.BI "unsigned " f
+Flags, as for
+.BR str_qsplit .
.PP
The return value of
-.B str_split
+.B str_qsplit
is the number of words extracted from the input string.
.PP
+The functions
+.B str_getword
+and
+.B str_split
+are veneers over
+.B str_qword
+and
+.B str_qsplit
+respectively; they are equivalent to calls to the latter functions with
+flags words of zero.
+.PP
+The
+.B str_match
+function does simple wildcard matching. The first argument is a
+pattern, which may contain metacharacters:
+.RB ` * '
+matches zero or more arbitrary characters;
+.RB ` ? '
+matches exactly one arbitrary characters; and
+.RB ` [ ... ] '
+matches one of the characters listed. The backslash
+.RB ` \e '
+escapes the following character. Within square brackets, the
+hyphen
+.RB ` \- '
+may be used to designate ranges of characters. If the initial character
+is
+.RB ` ! '
+or
+.RB ` ^ '
+then the sense of the match is reversed. To literally match a
+.RB ` ] '
+character, list it first; to literally match a
+.RB ` \- '
+character, list it immediately after a range, or at the beginning or end
+of the set. The return value is nonzero if the pattern
+.I p
+matches the given string
+.IR s ,
+or zero if the pattern doesn't match.
+.PP
The function
.B str_sanitize
copies at most
/* -*-c-*-
*
- * $Id: str.c,v 1.3 1999/12/22 15:41:14 mdw Exp $
+ * $Id: str.c,v 1.4 2000/10/08 09:43:34 mdw Exp $
*
* Functions for hacking with strings
*
/*----- Revision history --------------------------------------------------*
*
* $Log: str.c,v $
+ * Revision 1.4 2000/10/08 09:43:34 mdw
+ * New quoted string handling and simple pattern matching.
+ *
* Revision 1.3 1999/12/22 15:41:14 mdw
* Skip past trailing whitespace in str_getword.
*
/*----- Main code ---------------------------------------------------------*/
-/* --- @str_getword@ --- *
+/* --- @str_qword@ --- *
*
* Arguments: @char **pp@ = address of pointer into string
+ * @unsigned f@ = various flags
*
- * Returns: Pointer to the next space-separated word from the string,
- * or null.
+ * Returns: Pointer to the next space-separated possibly-quoted word from
+ * the string, or null.
*
- * Use: Parses off space-separated words from a string.
+ * Use: Fetches the next word from a string. If the flag
+ * @STRF_QUOTE@ is set, the `\' character acts as an escape, and
+ * single and double quotes protect whitespace.
*/
-char *str_getword(char **pp)
+#define STRF_QUOTE 1u
+
+char *str_qword(char **pp, unsigned f)
{
- char *p = *pp, *q;
+ char *p = *pp, *q, *qq;
+ int st = 0, pst = 0;
+
+ /* --- Preliminaries --- */
if (!p)
return (0);
-
while (isspace((unsigned char)*p))
p++;
+ if (!*p) {
+ *pp = 0;
+ return (0);
+ }
+
+ /* --- Main work --- */
- for (q = p; *q; q++) {
- if (isspace((unsigned char)*q)) {
- *q++ = 0;
- while (*q && isspace((unsigned char)*q))
- q++;
- if (!*q)
- q = 0;
- *pp = q;
- return (p);
+ for (q = qq = p; *q; q++) {
+ switch (st) {
+ case '\\':
+ *qq++ = *q;
+ st = pst;
+ break;
+ case '\'':
+ case '\"':
+ if (*q == st)
+ st = pst = 0;
+ else if (*q == '\\')
+ st = '\\';
+ else
+ *qq++ = *q;
+ break;
+ default:
+ if (isspace((unsigned char)*q)) {
+ do q++; while (*q && isspace((unsigned char)*q));
+ goto done;
+ } else if (!(f & STRF_QUOTE))
+ goto stdchar;
+ switch (*q) {
+ case '\\':
+ st = '\\';
+ break;
+ case '\'':
+ case '\"':
+ st = pst = *q;
+ break;
+ default:
+ stdchar:
+ *qq++ = *q;
+ break;
+ }
}
}
- *pp = 0;
+ /* --- Finished --- */
+
+done:
+ *pp = *q ? q : 0;
+ *qq++ = 0;
return (p);
}
-/* --- @str_split@ --- *
+/* --- @str_qsplit@ --- *
*
* Arguments: @char *p@ = pointer to string
* @char *v[]@ = pointer to array to fill in
* @size_t c@ = count of strings to fill in
* @char **rest@ = where to store the remainder of the string
+ * @unsigned f@ = flags for @str_qword@
*
* Returns: Number of strings filled in.
*
* @rest@ is set to a null pointer.
*/
-size_t str_split(char *p, char *v[], size_t c, char **rest)
+size_t str_qsplit(char *p, char *v[], size_t c, char **rest, unsigned f)
{
size_t n = 0;
char *q;
- while (c && (q = str_getword(&p)) != 0) {
+ while (c && (q = str_qword(&p, f)) != 0) {
*v++ = q;
c--;
n++;
return (n);
}
+/* --- @str_getword@ --- *
+ *
+ * Arguments: @char **pp@ = address of pointer into string
+ *
+ * Returns: Pointer to the next space-separated word from the string,
+ * or null.
+ *
+ * Use: Parses off space-separated words from a string. This is a
+ * compatibility veneer over @str_qword@.
+ */
+
+char *str_getword(char **pp)
+{
+ return (str_qword(pp, 0));
+}
+
+/* --- @str_split@ --- *
+ *
+ * Arguments: @char *p@ = pointer to string
+ * @char *v[]@ = pointer to array to fill in
+ * @size_t c@ = count of strings to fill in
+ * @char **rest@ = where to store the remainder of the string
+ *
+ * Returns: Number of strings filled in.
+ *
+ * Use: Fills an array with pointers to the individual words of a
+ * string. This is a compatibility veneer over @str_qsplit@.
+ */
+
+size_t str_split(char *p, char *v[], size_t c, char **rest)
+{
+ return (str_qsplit(p, v, c, rest, 0));
+}
+
+/* --- @str_match@ --- *
+ *
+ * Arguments: @const char *p@ = pointer to pattern string
+ * @const char *s@ = string to compare with
+ *
+ * Returns: Nonzero if the pattern matches the string.
+ *
+ * Use: Does simple wildcard matching. This is quite nasty and more
+ * than a little slow. Supports metacharacters `*', `?' and
+ * '['.
+ */
+
+int str_match(const char *p, const char *s)
+{
+ for (;;) {
+ char pch = *p++, pche, sch;
+ int sense;
+
+ switch (pch) {
+ case '?':
+ if (!*s)
+ return (0);
+ s++;
+ break;
+ case '*':
+ if (!*p)
+ return (1);
+ while (*s) {
+ if (str_match(p, s))
+ return (1);
+ s++;
+ }
+ return (0);
+ case '[':
+ if (!*s)
+ return (0);
+ sch = *s++;
+ pch = *p++;
+ sense = 1;
+ if (pch == '^' || pch == '!') {
+ sense = !sense;
+ pch = *p++;
+ }
+ if (pch == ']') {
+ if (*p == '-' && p[1] && p[1] != ']') {
+ pche = p[1];
+ p += 2;
+ if (pch <= sch && sch <= pche)
+ goto class_match;
+ } else if (pch == sch)
+ goto class_match;
+ pch = *p++;
+ }
+ for (;; pch = *p++) {
+ if (!pch || pch == ']')
+ goto class_nomatch;
+ if (*p == '-' && p[1] && p[1] != ']') {
+ pche = p[1];
+ p += 2;
+ if (pch <= sch && sch <= pche)
+ goto class_match;
+ } else if (pch == sch)
+ goto class_match;
+ }
+ class_match:
+ if (!sense)
+ return (0);
+ for (;;) {
+ pch = *p++;
+ if (!pch)
+ return (0);
+ if (pch == ']')
+ break;
+ if (*p == '-' && p[1] && p[1] != ']')
+ p += 2;
+ }
+ break;
+ class_nomatch:
+ if (sense)
+ return (0);
+ break;
+ case '\\':
+ pch = *p++;
+ default:
+ if (pch != *s)
+ return (0);
+ if (!pch)
+ return (1);
+ s++;
+ break;
+ }
+ }
+}
+
/* --- @str_sanitize@ --- *
*
* Arguments: @char *d@ = destination buffer
/* -*-c-*-
*
- * $Id: str.h,v 1.3 1999/12/10 23:42:04 mdw Exp $
+ * $Id: str.h,v 1.4 2000/10/08 09:43:34 mdw Exp $
*
* Functions for hacking with strings
*
/*----- Revision history --------------------------------------------------*
*
* $Log: str.h,v $
+ * Revision 1.4 2000/10/08 09:43:34 mdw
+ * New quoted string handling and simple pattern matching.
+ *
* Revision 1.3 1999/12/10 23:42:04 mdw
* Change header file guard names.
*
/*----- Functions provided ------------------------------------------------*/
-/* --- @str_getword@ --- *
+/* --- @str_qword@ --- *
*
* Arguments: @char **pp@ = address of pointer into string
+ * @unsigned f@ = various flags
*
- * Returns: Pointer to the next space-separated word from the string,
- * or null.
+ * Returns: Pointer to the next space-separated possibly-quoted word from
+ * the string, or null.
*
- * Use: Parses off space-separated words from a string.
+ * Use: Fetches the next word from a string. If the flag
+ * @STRF_QUOTE@ is set, the `\' character acts as an escape, and
+ * single and double quotes protect whitespace.
*/
-extern char *str_getword(char **/*pp*/);
+#define STRF_QUOTE 1u
-/* --- @str_split@ --- *
+extern char *str_qword(char **/*pp*/, unsigned /*f*/);
+
+/* --- @str_qsplit@ --- *
*
* Arguments: @char *p@ = pointer to string
* @char *v[]@ = pointer to array to fill in
* @size_t c@ = count of strings to fill in
* @char **rest@ = where to store the remainder of the string
+ * @unsigned f@ = flags for @str_qword@
*
* Returns: Number of strings filled in.
*
* @rest@ is set to a null pointer.
*/
+extern size_t str_qsplit(char */*p*/, char */*v*/[], size_t /*c*/,
+ char **/*rest*/, unsigned /*f*/);
+
+/* --- @str_getword@ --- *
+ *
+ * Arguments: @char **pp@ = address of pointer into string
+ *
+ * Returns: Pointer to the next space-separated word from the string,
+ * or null.
+ *
+ * Use: Parses off space-separated words from a string. This is a
+ * compatibility veneer over @str_qword@.
+ */
+
+extern char *str_getword(char **/*pp*/);
+
+/* --- @str_split@ --- *
+ *
+ * Arguments: @char *p@ = pointer to string
+ * @char *v[]@ = pointer to array to fill in
+ * @size_t c@ = count of strings to fill in
+ * @char **rest@ = where to store the remainder of the string
+ *
+ * Returns: Number of strings filled in.
+ *
+ * Use: Fills an array with pointers to the individual words of a
+ * string. This is a compatibility veneer over @str_qsplit@.
+ */
+
extern size_t str_split(char */*p*/, char */*v*/[],
size_t /*c*/, char **/*rest*/);
+/* --- @str_match@ --- *
+ *
+ * Arguments: @const char *p@ = pointer to pattern string
+ * @const char *s@ = string to compare with
+ *
+ * Returns: Nonzero if the pattern matches the string.
+ *
+ * Use: Does simple wildcard matching. This is quite nasty and more
+ * than a little slow. Supports metacharacters `*', `?' and
+ * '['.
+ */
+
+extern int str_match(const char */*p*/, const char */*s*/);
+
/* --- @str_sanitize@ --- *
*
* Arguments: @char *d@ = destination buffer