doc/man/uwildmat.3

   1 .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
   2 .\"
   3 .\" Standard preamble:
   4 .\" ========================================================================
   5 .de Sh \" Subsection heading
   6 .br
   7 .if t .Sp
   8 .ne 5
   9 .PP
  10 \fB\\$1\fR
  11 .PP
  12 ..
  13 .de Sp \" Vertical space (when we can't use .PP)
  14 .if t .sp .5v
  15 .if n .sp
  16 ..
  17 .de Vb \" Begin verbatim text
  18 .ft CW
  19 .nf
  20 .ne \\$1
  21 ..
  22 .de Ve \" End verbatim text
  23 .ft R
  24 .fi
  25 ..
  26 .\" Set up some character translations and predefined strings.  \*(-- will
  27 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
  28 .\" double quote, and \*(R" will give a right double quote.  \*(C+ will
  29 .\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
  30 .\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
  31 .\" nothing in troff, for use with C<>.
  32 .tr \(*W-
  33 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
  34 .ie n \{\
  35 .    ds -- \(*W-
  36 .    ds PI pi
  37 .    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
  38 .    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
  39 .    ds L" ""
  40 .    ds R" ""
  41 .    ds C` ""
  42 .    ds C' ""
  43 'br\}
  44 .el\{\
  45 .    ds -- \|\(em\|
  46 .    ds PI \(*p
  47 .    ds L" ``
  48 .    ds R" ''
  49 'br\}
  50 .\"
  51 .\" If the F register is turned on, we'll generate index entries on stderr for
  52 .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
  53 .\" entries marked with X<> in POD.  Of course, you'll have to process the
  54 .\" output yourself in some meaningful fashion.
  55 .if \nF \{\
  56 .    de IX
  57 .    tm Index:\\$1\t\\n%\t"\\$2"
  58 ..
  59 .    nr % 0
  60 .    rr F
  61 .\}
  62 .\"
  63 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
  64 .\" way too many mistakes in technical documents.
  65 .hy 0
  66 .if n .na
  67 .\"
  68 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
  69 .\" Fear.  Run.  Save yourself.  No user-serviceable parts.
  70 .    \" fudge factors for nroff and troff
  71 .if n \{\
  72 .    ds #H 0
  73 .    ds #V .8m
  74 .    ds #F .3m
  75 .    ds #[ \f1
  76 .    ds #] \fP
  77 .\}
  78 .if t \{\
  79 .    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
  80 .    ds #V .6m
  81 .    ds #F 0
  82 .    ds #[ \&
  83 .    ds #] \&
  84 .\}
  85 .    \" simple accents for nroff and troff
  86 .if n \{\
  87 .    ds ' \&
  88 .    ds ` \&
  89 .    ds ^ \&
  90 .    ds , \&
  91 .    ds ~ ~
  92 .    ds /
  93 .\}
  94 .if t \{\
  95 .    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
  96 .    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
  97 .    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
  98 .    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
  99 .    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
 100 .    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
 101 .\}
 102 .    \" troff and (daisy-wheel) nroff accents
 103 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
 104 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
 105 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
 106 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
 107 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
 108 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
 109 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
 110 .ds ae a\h'-(\w'a'u*4/10)'e
 111 .ds Ae A\h'-(\w'A'u*4/10)'E
 112 .    \" corrections for vroff
 113 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
 114 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
 115 .    \" for low resolution devices (crt and lpr)
 116 .if \n(.H>23 .if \n(.V>19 \
 117 \{\
 118 .    ds : e
 119 .    ds 8 ss
 120 .    ds o a
 121 .    ds d- d\h'-1'\(ga
 122 .    ds D- D\h'-1'\(hy
 123 .    ds th \o'bp'
 124 .    ds Th \o'LP'
 125 .    ds ae ae
 126 .    ds Ae AE
 127 .\}
 128 .rm #[ #] #H #V #F C
 129 .\" ========================================================================
 130 .\"
 131 .IX Title "uwildmat 3"
 132 .TH uwildmat 3 "2008-04-06" "INN 2.4.5" "InterNetNews Documentation"
 133 .SH "NAME"
 134 uwildmat, uwildmat_simple, uwildmat_poison \- Perform wildmat matching
 135 .SH "SYNOPSIS"
 136 .IX Header "SYNOPSIS"
 137 \&\fB#include <libinn.h>\fR
 138 .PP
 139 \&\fBbool uwildmat(const char *\fR\fItext\fR\fB, const char *\fR\fIpattern\fR\fB);\fR
 140 .PP
 141 \&\fBbool uwildmat_simple(const char *\fR\fItext\fR\fB, const char *\fR\fIpattern\fR\fB);\fR
 142 .PP
 143 \&\fBenum uwildmat uwildmat_poison(const char *\fR\fItext\fR\fB,
 144 const char *\fR\fIpattern\fR\fB);\fR
 145 .SH "DESCRIPTION"
 146 .IX Header "DESCRIPTION"
 147 \&\fBuwildmat\fR compares \fItext\fR against the wildmat expression \fIpattern\fR,
 148 returning true if and only if the expression matches the text.  \f(CW\*(C`@\*(C'\fR has
 149 no special meaning in \fIpattern\fR when passed to \fBuwildmat\fR.  Both \fItext\fR
 150 and \fIpattern\fR are assumed to be in the \s-1UTF\-8\s0 character encoding, although
 151 malformed \s-1UTF\-8\s0 sequences are treated in a way that attempts to be mostly
 152 compatible with single-octet character sets like \s-1ISO\s0 8859\-1.  (In other
 153 words, if you try to match \s-1ISO\s0 8859\-1 text with these routines everything
 154 should work as expected unless the \s-1ISO\s0 8859\-1 text contains valid \s-1UTF\-8\s0
 155 sequences, which thankfully is somewhat rare.)
 156 .PP
 157 \&\fBuwildmat_simple\fR is identical to \fBuwildmat\fR except that neither \f(CW\*(C`!\*(C'\fR
 158 nor \f(CW\*(C`,\*(C'\fR have any special meaning and \fIpattern\fR is always treated as a
 159 single pattern.  This function exists solely to support legacy interfaces
 160 like \s-1NNTP\s0's \s-1XPAT\s0 command, and should be avoided when implementing new
 161 features.
 162 .PP
 163 \&\fBuwildmat_poison\fR works similarly to \fBuwildmat\fR, except that \f(CW\*(C`@\*(C'\fR as the
 164 first character of one of the patterns in the expression (see below)
 165 \&\*(L"poisons\*(R" the match if it matches.  \fBuwildmat_poison\fR returns
 166 \&\fB\s-1UWILDMAT_MATCH\s0\fR if the expression matches the text, \fB\s-1UWILDMAT_FAIL\s0\fR if
 167 it doesn't, and \fB\s-1UWILDMAT_POISON\s0\fR if the expression doesn't match because
 168 a poisoned pattern matched the text.  These enumeration constants are
 169 defined in the \fBlibinn.h\fR header.
 170 .SH "WILDMAT EXPRESSIONS"
 171 .IX Header "WILDMAT EXPRESSIONS"
 172 A wildmat expression follows rules similar to those of shell filename
 173 wildcards but with some additions and changes.  A wildmat \fIexpression\fR is
 174 composed of one or more wildmat \fIpatterns\fR separated by commas.  Each
 175 character in the wildmat pattern matches a literal occurance of that same
 176 character in the text, with the exception of the following metacharacters:
 177 .IP "?" 8
 178 Matches any single character (including a single \s-1UTF\-8\s0 multibyte
 179 character, so \f(CW\*(C`?\*(C'\fR can match more than one byte).
 180 .IP "*\&" 8
 181 Matches any sequence of zero or more characters.
 182 .IP "\e" 8
 183 .IX Item ""
 184 Turns off any special meaning of the following character; the following
 185 character will match itself in the text.  \f(CW\*(C`\e\*(C'\fR will escape any character,
 186 including another backslash or a comma that otherwise would separate a
 187 pattern from the next pattern in an expression.  Note that \f(CW\*(C`\e\*(C'\fR is not
 188 special inside a character range (no metacharacters are).
 189 .IP "[...]" 8
 190 A character set, which matches any single character that falls within that
 191 set.  The presence of a character between the brackets adds that character
 192 to the set; for example, \f(CW\*(C`[amv]\*(C'\fR specifies the set containing the
 193 characters \f(CW\*(C`a\*(C'\fR, \f(CW\*(C`m\*(C'\fR, and \f(CW\*(C`v\*(C'\fR.  A range of characters may be specified
 194 using \f(CW\*(C`\-\*(C'\fR; for example, \f(CW\*(C`[0\-5abc]\*(C'\fR is equivalent to \f(CW\*(C`[012345abc]\*(C'\fR.  The
 195 order of characters is as defined in the \s-1UTF\-8\s0 character set, and if the
 196 start character of such a range falls after the ending character of the
 197 range in that ranking the results of attempting a match with that pattern
 198 are undefined.
 199 .Sp
 200 In order to include a literal \f(CW\*(C`]\*(C'\fR character in the set, it must be the
 201 first character of the set (possibly following \f(CW\*(C`^\*(C'\fR); for example, \f(CW\*(C`[]a]\*(C'\fR
 202 matches either \f(CW\*(C`]\*(C'\fR or \f(CW\*(C`a\*(C'\fR.  To include a literal \f(CW\*(C`\-\*(C'\fR character in the
 203 set, it must be either the first or the last character of the set.
 204 Backslashes have no special meaning inside a character set, nor do any
 205 other of the wildmat metacharacters.
 206 .IP "[^...]" 8
 207 A negated character set.  Follows the same rules as a character set above,
 208 but matches any character \fBnot\fR contained in the set.  So, for example,
 209 \&\f(CW\*(C`[^]\-]\*(C'\fR matches any character except \f(CW\*(C`]\*(C'\fR and \f(CW\*(C`\-\*(C'\fR.
 210 .PP
 211 In addition, \f(CW\*(C`!\*(C'\fR (and possibly \f(CW\*(C`@\*(C'\fR) have special meaning as the first
 212 character of a pattern; see below.
 213 .PP
 214 When matching a wildmat expression against some text, each comma-separated
 215 pattern is matched in order from left to right.  In order to match, the
 216 pattern must match the whole text; in regular expression terminology, it's
 217 implicitly anchored at both the beginning and the end.  For example, the
 218 pattern \f(CW\*(C`a\*(C'\fR matches only the text \f(CW\*(C`a\*(C'\fR; it doesn't match \f(CW\*(C`ab\*(C'\fR or \f(CW\*(C`ba\*(C'\fR
 219 or even \f(CW\*(C`aa\*(C'\fR.  If none of the patterns match, the whole expression
 220 doesn't match.  Otherwise, whether the expression matches is determined
 221 entirely by the rightmost matching pattern; the expression matches the
 222 text if and only if the rightmost matching pattern is not negated.
 223 .PP
 224 For example, consider the text \f(CW\*(C`news.misc\*(C'\fR.  The expression \f(CW\*(C`*\*(C'\fR matches
 225 this text, of course, as does \f(CW\*(C`comp.*,news.*\*(C'\fR (because the second pattern
 226 matches).  \f(CW\*(C`news.*,!news.misc\*(C'\fR does not match this text because both
 227 patterns match, meaning that the rightmost takes precedence, and the
 228 rightmost matching pattern is negated.  \f(CW\*(C`news.*,!news.misc,*.misc\*(C'\fR does
 229 match this text, since the rightmost matching pattern is not negated.
 230 .PP
 231 Note that the expression \f(CW\*(C`!news.misc\*(C'\fR can't match anything.  Either the
 232 pattern doesn't match, in which case no patterns match and the expression
 233 doesn't match, or the pattern does match, in which case because it's
 234 negated the expression doesn't match.  \f(CW\*(C`*,!news.misc\*(C'\fR, on the other hand,
 235 is a useful pattern that matches anything except \f(CW\*(C`news.misc\*(C'\fR.
 236 .PP
 237 \&\f(CW\*(C`!\*(C'\fR has significance only as the first character of a pattern; anywhere
 238 else in the pattern, it matches a literal \f(CW\*(C`!\*(C'\fR in the text like any other
 239 non\-metacharacter.
 240 .PP
 241 If the \fBuwildmat_poison\fR interface is used, then \f(CW\*(C`@\*(C'\fR behaves the same as
 242 \&\f(CW\*(C`!\*(C'\fR except that if an expression fails to match because the rightmost
 243 matching pattern began with \f(CW\*(C`@\*(C'\fR, \fB\s-1UWILDMAT_POISON\s0\fR is returned instead of
 244 \&\fB\s-1UWILDMAT_FAIL\s0\fR.
 245 .PP
 246 If the \fBuwildmat_simple\fR interface is used, the matching rules are the
 247 same as above except that none of \f(CW\*(C`!\*(C'\fR, \f(CW\*(C`@\*(C'\fR, or \f(CW\*(C`,\*(C'\fR have any special
 248 meaning at all and only match those literal characters.
 249 .SH "BUGS"
 250 .IX Header "BUGS"
 251 All of these functions internally convert the passed arguments to const
 252 unsigned char pointers.  The only reason why they take regular char
 253 pointers instead of unsigned char is for the convenience of \s-1INN\s0 and other
 254 callers that may not be using unsigned char everywhere they should.  In a
 255 future revision, the public interface should be changed to just take
 256 unsigned char pointers.
 257 .SH "HISTORY"
 258 .IX Header "HISTORY"
 259 Written by Rich \f(CW$alz\fR <rsalz@uunet.uu.net> in 1986, and posted to Usenet
 260 several times since then, most notably in comp.sources.misc in
 261 March, 1991.
 262 .PP
 263 Lars Mathiesen <thorinn@diku.dk> enhanced the multi-asterisk failure
 264 mode in early 1991.
 265 .PP
 266 Rich and Lars increased the efficiency of star patterns and reposted it to
 267 comp.sources.misc in April, 1991.
 268 .PP
 269 Robert Elz <kre@munnari.oz.au> added minus sign and close bracket handling
 270 in June, 1991.
 271 .PP
 272 Russ Allbery <rra@stanford.edu> added support for comma-separated patterns
 273 and the \f(CW\*(C`!\*(C'\fR and \f(CW\*(C`@\*(C'\fR metacharacters to the core wildmat routines in July,
 274 2000.  He also added support for \s-1UTF\-8\s0 characters, changed the default
 275 behavior to assume that both the text and the pattern are in \s-1UTF\-8\s0, and
 276 largely rewrote this documentation to expand and clarify the description
 277 of how a wildmat expression matches.
 278 .PP
 279 Please note that the interfaces to these functions are named \fBuwildmat\fR
 280 and the like rather than \fBwildmat\fR to distinguish them from the
 281 \&\fBwildmat\fR function provided by Rich \f(CW$alz\fR's original implementation.
 282 While this code is heavily based on Rich's original code, it has
 283 substantial differences, including the extension to support \s-1UTF\-8\s0
 284 characters, and has noticable functionality changes.  Any bugs present in
 285 it aren't Rich's fault.
 286 .PP
 287 $Id: uwildmat.3 7880 2008-06-16 20:37:13Z iulius $
 288 .SH "SEE ALSO"
 289 .IX Header "SEE ALSO"
 290 \&\fIgrep\fR\|(1), \fIfnmatch\fR\|(3), \fIregex\fR\|(3), \fIregexp\fR\|(3).