Commit | Line | Data |
---|---|---|
236f657b MW |
1 | .\" -*-nroff-*- |
2 | .TH codec 3 "9 January 2009" "Straylight/Edgeware" "mLib utilities library" | |
3 | .SH NAME | |
4 | codec \- binary encoding and decoding | |
5 | .\" @codec_class | |
6 | .\" @codec_strerror | |
7 | .\" @null_codec_class | |
8 | .\" @base64_class | |
9 | .\" @file64_class | |
10 | .\" @base64url_class | |
11 | .\" @base32_class | |
12 | .\" @base32hex_class | |
13 | .\" @hex_class | |
14 | .SH SYNOPSIS | |
15 | .nf | |
16 | .B "#include <mLib/codec.h>" | |
17 | .B "#include <mLib/base64.h>" | |
18 | .B "#include <mLib/base32.h>" | |
19 | .B "#include <mLib/hex.h>" | |
20 | ||
4729aa69 MW |
21 | .B "#define CDCF_LOWERC ..." |
22 | .B "#define CDCF_IGNCASE ..." | |
23 | .B "#define CDCF_NOEQPAD ..." | |
24 | .B "#define CDCF_IGNEQPAD ..." | |
25 | .B "#define CDCF_IGNEQMID ..." | |
26 | .B "#define CDCF_IGNZPAD ..." | |
27 | .B "#define CDCF_IGNNEWL ..." | |
28 | .B "#define CDCF_IGNINVCH ..." | |
29 | .B "#define CDCF_IGNSPC ..." | |
30 | .B "#define CDCF_IGNJUNK ..." | |
31 | ||
32 | .B "enum {" | |
33 | .B "\h'4n'CDCERR_OK = ...," | |
34 | .B "\h'4n'CDCERR_INVCH = ...," | |
35 | .B "\h'4n'CDCERR_INVEQPAD = ...," | |
36 | .B "\h'4n'CDCERR_INVZPAD = ..." | |
37 | .B "};" | |
38 | ||
39 | .B "typedef struct {" | |
40 | .B "\h'4n'const char *name;" | |
41 | .ds mT \fBcodec *(*encoder)( | |
42 | .BI "\h'4n'\*(mTunsigned " flags , | |
43 | .BI "\h'4n+\w'\*(mT'u'const char *" indent ", unsigned " maxlen ); | |
44 | .BI "\h'4n'codec *(*decoder)(unsigned " flags ); | |
45 | .B "\h'4n'...\&" | |
46 | .B "} codec_class;" | |
47 | ||
48 | .B "typedef struct {" | |
49 | .B "\h'4n'const codec_ops *ops;" | |
50 | .B "} codec;" | |
51 | ||
52 | .B "typedef struct {" | |
53 | .B "\h'4n'const codec_class *c;" | |
54 | .BI "\h'4n'int (*code)(codec *" c ", const void *" p ", size_t " sz ", dstr *" d ); | |
55 | .BI "\h'4n'void (*destroy)(codec *" c ); | |
56 | .B "} codec_ops;" | |
57 | ||
236f657b MW |
58 | .B "codec_class null_codec_class;" |
59 | .B "codec_class base64_class, file64_class, base64url_class;" | |
60 | .B "codec_class base32_class, base32hex_class;" | |
61 | .B "codec_class hex_class;" | |
62 | ||
63 | .BI "const char *codec_strerror(int " err ");" | |
64 | .fi | |
65 | .SH DESCRIPTION | |
66 | The | |
67 | .B codec | |
68 | system provides an object-based interface to functions which encode | |
69 | binary data as plain text and decode the result to recover the original | |
70 | binary data. The interface makes it easy to support multiple encodings | |
71 | and select an appropriate one at runtime. | |
72 | .SS "The codec_class structure" | |
73 | The | |
74 | .B codec_class | |
75 | structure represents a particular encoding format. The structure has | |
76 | the following members. | |
77 | .TP | |
78 | .B "const char *name" | |
79 | The name of the class, as a null-terminated string. The name should not | |
80 | contain whitespace characters. | |
81 | .TP | |
82 | .BI "codec *(*encoder)(unsigned " flags ", const char *" indent ", unsigned " maxline ")" | |
83 | Pointer to a function which constructs a new encoder object, of type | |
84 | .BR codec . | |
85 | The | |
86 | .I flags | |
87 | configure the behaviour of the object; the | |
88 | .I indent | |
89 | string is written to separate lines of output; the integer | |
90 | .I maxline | |
91 | is the maximum length of line to be produced, or zero to forbid line | |
92 | breaking. | |
93 | .TP | |
94 | .BI "codec *(*decoder)(unsigned " flags ")" | |
95 | Pointer to a function which constructs a new decoder object, also of | |
96 | type | |
97 | .BR codec . | |
98 | The | |
99 | .I flags | |
100 | configure the behaviour of the object. | |
101 | .PP | |
102 | The | |
103 | .I flags | |
104 | to the | |
105 | .B encoder | |
106 | and | |
107 | .B decoder | |
108 | functions have the following meanings. | |
109 | .TP | |
110 | .B CDCF_LOWERC | |
111 | For codecs which produce output using a single alphabetic case (e.g., | |
112 | .BR base32 , | |
113 | .BR hex ), | |
114 | emit and accept only lower case; the default to emit and accept only | |
115 | upper case, for compatibility with RFC4648. If the codec usually | |
116 | produces mixed-case output, then this flag is ignored. | |
117 | .TP | |
118 | .B CDCF_IGNCASE | |
119 | For codecs which produce output using a single alphabetic case, ignore | |
120 | the case of the input when decoding. If the codec usually produces | |
121 | mixed-case output, then this flag is ignored. | |
122 | .TP | |
123 | .B CDCF_NOEQPAD | |
124 | For codecs which usually pad their output (e.g., | |
125 | .BR base64 , | |
126 | .BR base32 ), | |
127 | do not emit or accept padding characters. If the codec does not usually | |
128 | produce padding, or the padding is not redundant, then this flag is | |
129 | ignored. | |
130 | .TP | |
131 | .B CDCF_IGNEQPAD | |
132 | For codecs which usually pad their output, do not treat incorrect (e.g., | |
133 | missing or excessive) padding as an error when decoding. If the codec | |
134 | does not usually produce padding, or the padding is required for | |
135 | unambiguous decoding, then this flag is ignored. | |
136 | .TP | |
137 | .B CDCF_IGNEQMID | |
138 | For codecs which usually pad their output, ignore padding characters | |
139 | wherever they may appear when decoding. Usually padding characters | |
140 | indicate the end of the input, and further input characters are | |
141 | considered erroneous. If the codec does not usually produce padding, or | |
142 | it is impossible to resume decoding correctly having seen padding | |
143 | characters, then this flag is ignored. | |
144 | .TP | |
145 | .B CDCF_IGNZPAD | |
146 | For codecs which need to pad their input, ignore unusual padding bits | |
147 | when decoding. (This is not at all the same thing as the padding | |
148 | characters controlled by the flags above: they deal with padding the | |
149 | length of the encoding | |
150 | .I output | |
151 | up to a suitable multiple of characters; this option deals with padding | |
152 | of the | |
153 | .I input | |
154 | prior to encoding.) If the codec does not add padding bits, or specific | |
155 | values are required for unambiguous decoding, then this flag is ignored. | |
156 | .TP | |
157 | .B CDCF_IGNNEWL | |
158 | Ignore newline (and carriage-return) characters when decoding: the | |
159 | default for RFC4648 codecs is to reject newline characters. If these | |
160 | characters are significant in the encoding, then this flag is ignored. | |
161 | .TP | |
09fbf4d0 MW |
162 | .B CDCF_IGNSPC |
163 | Ignore whitespace characters (other than newlines) when decoding: the | |
164 | default for RFC4648 codecs is to reject whitespace characters. If these | |
165 | characters are significant in the encoding, then this flag is ignored. | |
166 | .TP | |
236f657b MW |
167 | .B CDCF_IGNINVCH |
168 | Ignore any other invalid characters appearing in the input when | |
169 | decoding. | |
170 | .TP | |
171 | .B CDCF_IGNJUNK | |
172 | Ignore all `junk' in the input. This should suppress almost all | |
173 | decoding errors. | |
174 | .PP | |
175 | If you do not set any of the | |
c3dd6b29 | 176 | .BR CDCF_IGN ...\& |
236f657b MW |
177 | flags, a decoder should only accept the exact encoding that the |
178 | corresponding encoder would produce (with | |
179 | .I maxline | |
180 | = 0 to inhibit line-breaking). | |
181 | .SS "The codec and codec_ops structures" | |
182 | The | |
183 | .B codec | |
184 | structure represents the state of an encoder or decoder, as returned by | |
185 | the | |
186 | .B encoder | |
187 | and | |
188 | .B decoder | |
189 | functions described above, contains a single member. | |
190 | .TP | |
191 | .B "const codec_ops *ops" | |
192 | Pointer to a | |
193 | .B codec_ops | |
194 | structure which contains operations and metadata for use with the | |
195 | encoder or decoder. | |
196 | .PP | |
197 | The | |
198 | .B codec_ops | |
199 | structure contains the following members. | |
200 | .TP | |
201 | .B "const codec_class *c" | |
202 | Pointer back to the | |
203 | .B codec_class | |
204 | which was used to construct the | |
205 | .B codec | |
206 | object. | |
207 | .TP | |
208 | .BI "int (*code)(codec *" c ", const void *" p ", size_t " sz ", dstr *" d ")" | |
209 | Encode or decode, using the codec | |
63ba7202 | 210 | .IR c , |
236f657b MW |
211 | the data in the buffer at address |
212 | .I p | |
213 | and continuing for | |
214 | .I sz | |
215 | bytes, appending the output to the dynamic string | |
216 | .I d | |
217 | (see | |
218 | .BR dstr (3)). | |
219 | If the operation was successful, the function returns zero; otherwise it | |
220 | returns a nonzero error code, as described below. | |
221 | .TP | |
222 | .BI "void (*destroy)(codec *" c ")" | |
223 | Destroy the codec object | |
224 | .IR c , | |
225 | freeing any resources it may hold. | |
226 | .PP | |
227 | A codec may buffer its input (e.g., if needs to see more in order to | |
228 | decide what output to produce next); it may also need to take special | |
229 | action at the end of the input (e.g., flushing buffers, and applying | |
230 | padding). To signal the codec that there is no more input, call the | |
231 | .B code | |
232 | function with a null | |
233 | .I p | |
234 | pointer. It will then write any final output to | |
235 | .IR d . | |
236 | .PP | |
237 | The following error conditions may be reported. | |
238 | .TP | |
239 | .B CDCERR_INVCH | |
240 | An invalid character was encountered while decoding. This includes | |
241 | encoutering padding characters if padding is disabled using the | |
242 | .B CDCF_NOEQPAD | |
243 | flag. | |
244 | .TP | |
245 | .B CDCERR_INVEQPAD | |
246 | Invalid padding characters (e.g., wrong characters, or too few, too | |
247 | many, or none at all) were found during decoding. This may also | |
248 | indicate that the input is truncated, even if the codec does not usually | |
249 | perform output padding. | |
250 | .TP | |
251 | .B CDCERR_INVZPAD | |
252 | Invalid padding bits were found during decoding. | |
253 | .PP | |
254 | The | |
255 | .B codec_strerror | |
256 | function converts these error codes to brief, (moderately) | |
257 | human-readable strings. | |
258 | .SS "Provided codecs" | |
259 | The library provides a number of standard codecs. | |
260 | .TP | |
261 | .B base64 | |
262 | Implements Base64 encoding, as defined by RFC4648. Output is | |
263 | mixed-case, so the | |
264 | .B CDCF_LOWERC | |
265 | and | |
266 | .B CDCF_IGNCASE | |
267 | flags are ignored. | |
268 | .TP | |
269 | .B safe64 | |
270 | Implements a variant of the Base64 encoding which uses | |
271 | .RB ` % ' | |
272 | in place of | |
273 | .RB ` / ', | |
274 | so that its output is suitable for use as a Unix filename. | |
275 | .TP | |
276 | .B base64url | |
277 | Implements the filename- and URL-safe variant of Base64 encoding, as | |
278 | defined by RFC4648. | |
279 | .TP | |
280 | .B base32 | |
281 | Implements Base32 encoding, as defined by RFC4648. Output is in upper | |
282 | case by default. | |
283 | .TP | |
284 | .B base32hex | |
285 | Implements the extended-hex variant of Base32, as defined by RFC4648. | |
286 | This encoding has the property that the encoding preserves the ordering | |
287 | of messages if padding is suppressed. | |
288 | .TP | |
289 | .B hex | |
290 | Implements hex encoding, defined by RFC4648 under the name Base16. For | |
291 | compatibility with that specification, output is in upper case by | |
292 | default. | |
293 | .SH "SEE ALSO" | |
294 | .BR bincode (1), | |
295 | .BR dstr (3), | |
296 | .BR mLib (3). | |
297 | .SH AUTHOR | |
298 | Mark Wooding, <mdw@distorted.org.uk> |