X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ian/git?a=blobdiff_plain;f=doc%2Fman%2Fuwildmat.3;fp=doc%2Fman%2Fuwildmat.3;h=0000000000000000000000000000000000000000;hb=b7a32e2d73e3ab1add8208d3e157f7269a31ef4d;hp=bbdc88d7092cc12b697478ff1e29f5adfe29d47f;hpb=ac902a8299ff4469b356836f431ead31c3377377;p=inn-innduct.git diff --git a/doc/man/uwildmat.3 b/doc/man/uwildmat.3 deleted file mode 100644 index bbdc88d..0000000 --- a/doc/man/uwildmat.3 +++ /dev/null @@ -1,290 +0,0 @@ -.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32 -.\" -.\" Standard preamble: -.\" ======================================================================== -.de Sh \" Subsection heading -.br -.if t .Sp -.ne 5 -.PP -\fB\\$1\fR -.PP -.. -.de Sp \" Vertical space (when we can't use .PP) -.if t .sp .5v -.if n .sp -.. -.de Vb \" Begin verbatim text -.ft CW -.nf -.ne \\$1 -.. -.de Ve \" End verbatim text -.ft R -.fi -.. -.\" Set up some character translations and predefined strings. \*(-- will -.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left -.\" double quote, and \*(R" will give a right double quote. \*(C+ will -.\" give a nicer C++. Capital omega is used to do unbreakable dashes and -.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, -.\" nothing in troff, for use with C<>. -.tr \(*W- -.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' -.ie n \{\ -. ds -- \(*W- -. ds PI pi -. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch -. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch -. ds L" "" -. ds R" "" -. ds C` "" -. ds C' "" -'br\} -.el\{\ -. ds -- \|\(em\| -. ds PI \(*p -. ds L" `` -. ds R" '' -'br\} -.\" -.\" If the F register is turned on, we'll generate index entries on stderr for -.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index -.\" entries marked with X<> in POD. Of course, you'll have to process the -.\" output yourself in some meaningful fashion. -.if \nF \{\ -. de IX -. tm Index:\\$1\t\\n%\t"\\$2" -.. -. nr % 0 -. rr F -.\} -.\" -.\" For nroff, turn off justification. Always turn off hyphenation; it makes -.\" way too many mistakes in technical documents. -.hy 0 -.if n .na -.\" -.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). -.\" Fear. Run. Save yourself. No user-serviceable parts. -. \" fudge factors for nroff and troff -.if n \{\ -. ds #H 0 -. ds #V .8m -. ds #F .3m -. ds #[ \f1 -. ds #] \fP -.\} -.if t \{\ -. ds #H ((1u-(\\\\n(.fu%2u))*.13m) -. ds #V .6m -. ds #F 0 -. ds #[ \& -. ds #] \& -.\} -. \" simple accents for nroff and troff -.if n \{\ -. ds ' \& -. ds ` \& -. ds ^ \& -. ds , \& -. ds ~ ~ -. ds / -.\} -.if t \{\ -. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" -. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' -. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' -. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' -. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' -. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' -.\} -. \" troff and (daisy-wheel) nroff accents -.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' -.ds 8 \h'\*(#H'\(*b\h'-\*(#H' -.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] -.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' -.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' -.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] -.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] -.ds ae a\h'-(\w'a'u*4/10)'e -.ds Ae A\h'-(\w'A'u*4/10)'E -. \" corrections for vroff -.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' -.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' -. \" for low resolution devices (crt and lpr) -.if \n(.H>23 .if \n(.V>19 \ -\{\ -. ds : e -. ds 8 ss -. ds o a -. ds d- d\h'-1'\(ga -. ds D- D\h'-1'\(hy -. ds th \o'bp' -. ds Th \o'LP' -. ds ae ae -. ds Ae AE -.\} -.rm #[ #] #H #V #F C -.\" ======================================================================== -.\" -.IX Title "uwildmat 3" -.TH uwildmat 3 "2008-04-06" "INN 2.4.5" "InterNetNews Documentation" -.SH "NAME" -uwildmat, uwildmat_simple, uwildmat_poison \- Perform wildmat matching -.SH "SYNOPSIS" -.IX Header "SYNOPSIS" -\&\fB#include \fR -.PP -\&\fBbool uwildmat(const char *\fR\fItext\fR\fB, const char *\fR\fIpattern\fR\fB);\fR -.PP -\&\fBbool uwildmat_simple(const char *\fR\fItext\fR\fB, const char *\fR\fIpattern\fR\fB);\fR -.PP -\&\fBenum uwildmat uwildmat_poison(const char *\fR\fItext\fR\fB, -const char *\fR\fIpattern\fR\fB);\fR -.SH "DESCRIPTION" -.IX Header "DESCRIPTION" -\&\fBuwildmat\fR compares \fItext\fR against the wildmat expression \fIpattern\fR, -returning true if and only if the expression matches the text. \f(CW\*(C`@\*(C'\fR has -no special meaning in \fIpattern\fR when passed to \fBuwildmat\fR. Both \fItext\fR -and \fIpattern\fR are assumed to be in the \s-1UTF\-8\s0 character encoding, although -malformed \s-1UTF\-8\s0 sequences are treated in a way that attempts to be mostly -compatible with single-octet character sets like \s-1ISO\s0 8859\-1. (In other -words, if you try to match \s-1ISO\s0 8859\-1 text with these routines everything -should work as expected unless the \s-1ISO\s0 8859\-1 text contains valid \s-1UTF\-8\s0 -sequences, which thankfully is somewhat rare.) -.PP -\&\fBuwildmat_simple\fR is identical to \fBuwildmat\fR except that neither \f(CW\*(C`!\*(C'\fR -nor \f(CW\*(C`,\*(C'\fR have any special meaning and \fIpattern\fR is always treated as a -single pattern. This function exists solely to support legacy interfaces -like \s-1NNTP\s0's \s-1XPAT\s0 command, and should be avoided when implementing new -features. -.PP -\&\fBuwildmat_poison\fR works similarly to \fBuwildmat\fR, except that \f(CW\*(C`@\*(C'\fR as the -first character of one of the patterns in the expression (see below) -\&\*(L"poisons\*(R" the match if it matches. \fBuwildmat_poison\fR returns -\&\fB\s-1UWILDMAT_MATCH\s0\fR if the expression matches the text, \fB\s-1UWILDMAT_FAIL\s0\fR if -it doesn't, and \fB\s-1UWILDMAT_POISON\s0\fR if the expression doesn't match because -a poisoned pattern matched the text. These enumeration constants are -defined in the \fBlibinn.h\fR header. -.SH "WILDMAT EXPRESSIONS" -.IX Header "WILDMAT EXPRESSIONS" -A wildmat expression follows rules similar to those of shell filename -wildcards but with some additions and changes. A wildmat \fIexpression\fR is -composed of one or more wildmat \fIpatterns\fR separated by commas. Each -character in the wildmat pattern matches a literal occurance of that same -character in the text, with the exception of the following metacharacters: -.IP "?" 8 -Matches any single character (including a single \s-1UTF\-8\s0 multibyte -character, so \f(CW\*(C`?\*(C'\fR can match more than one byte). -.IP "*\&" 8 -Matches any sequence of zero or more characters. -.IP "\e" 8 -.IX Item "" -Turns off any special meaning of the following character; the following -character will match itself in the text. \f(CW\*(C`\e\*(C'\fR will escape any character, -including another backslash or a comma that otherwise would separate a -pattern from the next pattern in an expression. Note that \f(CW\*(C`\e\*(C'\fR is not -special inside a character range (no metacharacters are). -.IP "[...]" 8 -A character set, which matches any single character that falls within that -set. The presence of a character between the brackets adds that character -to the set; for example, \f(CW\*(C`[amv]\*(C'\fR specifies the set containing the -characters \f(CW\*(C`a\*(C'\fR, \f(CW\*(C`m\*(C'\fR, and \f(CW\*(C`v\*(C'\fR. A range of characters may be specified -using \f(CW\*(C`\-\*(C'\fR; for example, \f(CW\*(C`[0\-5abc]\*(C'\fR is equivalent to \f(CW\*(C`[012345abc]\*(C'\fR. The -order of characters is as defined in the \s-1UTF\-8\s0 character set, and if the -start character of such a range falls after the ending character of the -range in that ranking the results of attempting a match with that pattern -are undefined. -.Sp -In order to include a literal \f(CW\*(C`]\*(C'\fR character in the set, it must be the -first character of the set (possibly following \f(CW\*(C`^\*(C'\fR); for example, \f(CW\*(C`[]a]\*(C'\fR -matches either \f(CW\*(C`]\*(C'\fR or \f(CW\*(C`a\*(C'\fR. To include a literal \f(CW\*(C`\-\*(C'\fR character in the -set, it must be either the first or the last character of the set. -Backslashes have no special meaning inside a character set, nor do any -other of the wildmat metacharacters. -.IP "[^...]" 8 -A negated character set. Follows the same rules as a character set above, -but matches any character \fBnot\fR contained in the set. So, for example, -\&\f(CW\*(C`[^]\-]\*(C'\fR matches any character except \f(CW\*(C`]\*(C'\fR and \f(CW\*(C`\-\*(C'\fR. -.PP -In addition, \f(CW\*(C`!\*(C'\fR (and possibly \f(CW\*(C`@\*(C'\fR) have special meaning as the first -character of a pattern; see below. -.PP -When matching a wildmat expression against some text, each comma-separated -pattern is matched in order from left to right. In order to match, the -pattern must match the whole text; in regular expression terminology, it's -implicitly anchored at both the beginning and the end. For example, the -pattern \f(CW\*(C`a\*(C'\fR matches only the text \f(CW\*(C`a\*(C'\fR; it doesn't match \f(CW\*(C`ab\*(C'\fR or \f(CW\*(C`ba\*(C'\fR -or even \f(CW\*(C`aa\*(C'\fR. If none of the patterns match, the whole expression -doesn't match. Otherwise, whether the expression matches is determined -entirely by the rightmost matching pattern; the expression matches the -text if and only if the rightmost matching pattern is not negated. -.PP -For example, consider the text \f(CW\*(C`news.misc\*(C'\fR. The expression \f(CW\*(C`*\*(C'\fR matches -this text, of course, as does \f(CW\*(C`comp.*,news.*\*(C'\fR (because the second pattern -matches). \f(CW\*(C`news.*,!news.misc\*(C'\fR does not match this text because both -patterns match, meaning that the rightmost takes precedence, and the -rightmost matching pattern is negated. \f(CW\*(C`news.*,!news.misc,*.misc\*(C'\fR does -match this text, since the rightmost matching pattern is not negated. -.PP -Note that the expression \f(CW\*(C`!news.misc\*(C'\fR can't match anything. Either the -pattern doesn't match, in which case no patterns match and the expression -doesn't match, or the pattern does match, in which case because it's -negated the expression doesn't match. \f(CW\*(C`*,!news.misc\*(C'\fR, on the other hand, -is a useful pattern that matches anything except \f(CW\*(C`news.misc\*(C'\fR. -.PP -\&\f(CW\*(C`!\*(C'\fR has significance only as the first character of a pattern; anywhere -else in the pattern, it matches a literal \f(CW\*(C`!\*(C'\fR in the text like any other -non\-metacharacter. -.PP -If the \fBuwildmat_poison\fR interface is used, then \f(CW\*(C`@\*(C'\fR behaves the same as -\&\f(CW\*(C`!\*(C'\fR except that if an expression fails to match because the rightmost -matching pattern began with \f(CW\*(C`@\*(C'\fR, \fB\s-1UWILDMAT_POISON\s0\fR is returned instead of -\&\fB\s-1UWILDMAT_FAIL\s0\fR. -.PP -If the \fBuwildmat_simple\fR interface is used, the matching rules are the -same as above except that none of \f(CW\*(C`!\*(C'\fR, \f(CW\*(C`@\*(C'\fR, or \f(CW\*(C`,\*(C'\fR have any special -meaning at all and only match those literal characters. -.SH "BUGS" -.IX Header "BUGS" -All of these functions internally convert the passed arguments to const -unsigned char pointers. The only reason why they take regular char -pointers instead of unsigned char is for the convenience of \s-1INN\s0 and other -callers that may not be using unsigned char everywhere they should. In a -future revision, the public interface should be changed to just take -unsigned char pointers. -.SH "HISTORY" -.IX Header "HISTORY" -Written by Rich \f(CW$alz\fR in 1986, and posted to Usenet -several times since then, most notably in comp.sources.misc in -March, 1991. -.PP -Lars Mathiesen enhanced the multi-asterisk failure -mode in early 1991. -.PP -Rich and Lars increased the efficiency of star patterns and reposted it to -comp.sources.misc in April, 1991. -.PP -Robert Elz added minus sign and close bracket handling -in June, 1991. -.PP -Russ Allbery added support for comma-separated patterns -and the \f(CW\*(C`!\*(C'\fR and \f(CW\*(C`@\*(C'\fR metacharacters to the core wildmat routines in July, -2000. He also added support for \s-1UTF\-8\s0 characters, changed the default -behavior to assume that both the text and the pattern are in \s-1UTF\-8\s0, and -largely rewrote this documentation to expand and clarify the description -of how a wildmat expression matches. -.PP -Please note that the interfaces to these functions are named \fBuwildmat\fR -and the like rather than \fBwildmat\fR to distinguish them from the -\&\fBwildmat\fR function provided by Rich \f(CW$alz\fR's original implementation. -While this code is heavily based on Rich's original code, it has -substantial differences, including the extension to support \s-1UTF\-8\s0 -characters, and has noticable functionality changes. Any bugs present in -it aren't Rich's fault. -.PP -$Id: uwildmat.3 7880 2008-06-16 20:37:13Z iulius $ -.SH "SEE ALSO" -.IX Header "SEE ALSO" -\&\fIgrep\fR\|(1), \fIfnmatch\fR\|(3), \fIregex\fR\|(3), \fIregexp\fR\|(3).