24 .TH url 3 "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
26 url \- manipulation of form-urlencoded strings
33 .B "#include <mLib/url.h>"
46 .B "#define URLF_STRICT ..."
47 .B "#define URLF_LAX ..."
48 .B "#define URLF_SEMI ..."
50 .BI "void url_initenc(url_ectx *" ctx );
51 .ta \w'\fBvoid url_enc('u
52 .BI "void url_enc(url_ectx *" ctx ", dstr *" d ,
53 .BI " const char *" name ", const char *" value );
55 .BI "void url_initdec(url_dctx *" ctx ", const char *" p );
56 .BI "int url_dec(url_dctx *" ctx ", dstr *" n ", dstr *" v );
61 read and write `form-urlencoded' data, as specified in RFC1866. The
62 encoding represents a sequence of name/value pairs where both the name
63 and value are arbitrary binary strings (although the format is optimized
64 for textual data). An encoded string contains no nonprintable
65 characters or whitespace. This interface is capable of decoding any
66 urlencoded string; however, it can currently only
68 names and values which do not contain null bytes, because the encoding
69 interface uses standard C strings.
71 Encoding a sequence of name/value pairs is achieved using the
73 function. It requires as input an
74 .IR "encoding context" ,
75 represented as an object of type
77 This must be initialized before use by passing it to the function
81 encodes one name/value pair, appending the encoded output to a dynamic
86 You can set flags in the encoding context's
91 Be strict about escaping non-alphanumeric characters. Without this,
92 potentially unsafe characters such as
96 will be left unescaped, which makes encoded filenames (for example) more
100 Be very lax about non-alphanumeric characters. Everything except
101 obviously-unsafe characters like
110 to separate name/value pairs, rather than the ampersand
113 Decoding a sequence of name/value pairs is performed using the
115 function. It requires as input a
116 .IR "decoding context" ,
117 represented as an object of type
119 This must be initialized before use by passing it to the function
121 along with the address of the urlencoded string to decode. The string
122 is not modified during decoding. Each call to
124 extracts a name/value pair. The name and value are written to the
129 so you probably want to reset them before each call. If there are no
130 more name/value pairs to read,
132 returns zero; otherwise it returns a nonzero value.
134 You can set flags in the encoding context's
141 to separate name/value pairs,
145 Without this flag, the semicolon is considered an `ordinary' character
146 which can appear unescaped as part of names and values. (Note the
147 difference from the same flag's meaning when encoding. When encoding,
150 the use of the semicolon, and when decoding, it
154 The example code below demonstrates converting between a symbol table
155 and a urlencoded representation. The code is untested.
159 #include <mLib/alloc.h>
160 #include <mLib/dstr.h>
161 #include <mLib/sym.h>
162 #include <mLib/url.h>
169 void decode(sym_table *t, const char *p)
172 dstr n = DSTR_INIT, v = DSTR_INIT;
176 for (url_initdec(&c, p); url_dec(&c, &n, &v); ) {
177 vv = sym_find(t, n.buf, -1, sizeof(*vv), &f);
179 vv->v = xstrdup(v.buf);
183 dstr_destroy(&n); dstr_destroy(&v);
186 void encode(sym_table *t, dstr *d)
193 for (sym_mkiter(&i, t); (v = sym_next(&i)) != 0; )
194 url_enc(&c, d, SYM_NAME(v), v->v);
200 Mark Wooding, <mdw@distorted.org.uk>.