Commit | Line | Data |
---|---|---|
236f657b MW |
1 | .\" -*-nroff-*- |
2 | .TH codec 3 "9 January 2009" "Straylight/Edgeware" "mLib utilities library" | |
3 | .SH NAME | |
4 | codec \- binary encoding and decoding | |
5 | .\" @codec_class | |
6 | .\" @codec_strerror | |
7 | .\" @null_codec_class | |
8 | .\" @base64_class | |
9 | .\" @file64_class | |
10 | .\" @base64url_class | |
11 | .\" @base32_class | |
12 | .\" @base32hex_class | |
13 | .\" @hex_class | |
14 | .SH SYNOPSIS | |
15 | .nf | |
16 | .B "#include <mLib/codec.h>" | |
17 | .B "#include <mLib/base64.h>" | |
18 | .B "#include <mLib/base32.h>" | |
19 | .B "#include <mLib/hex.h>" | |
d056fbdf | 20 | .PP |
4729aa69 MW |
21 | .B "#define CDCF_LOWERC ..." |
22 | .B "#define CDCF_IGNCASE ..." | |
23 | .B "#define CDCF_NOEQPAD ..." | |
24 | .B "#define CDCF_IGNEQPAD ..." | |
25 | .B "#define CDCF_IGNEQMID ..." | |
26 | .B "#define CDCF_IGNZPAD ..." | |
27 | .B "#define CDCF_IGNNEWL ..." | |
28 | .B "#define CDCF_IGNINVCH ..." | |
29 | .B "#define CDCF_IGNSPC ..." | |
30 | .B "#define CDCF_IGNJUNK ..." | |
d056fbdf | 31 | .PP |
adec5584 | 32 | .ta 2n |
4729aa69 | 33 | .B "enum {" |
adec5584 MW |
34 | .B " CDCERR_OK = ...," |
35 | .B " CDCERR_INVCH = ...," | |
36 | .B " CDCERR_INVEQPAD = ...," | |
37 | .B " CDCERR_INVZPAD = ..." | |
4729aa69 | 38 | .B "};" |
d056fbdf | 39 | .PP |
4729aa69 | 40 | .B "typedef struct {" |
adec5584 MW |
41 | .B " const char *name;" |
42 | .ta 2n +\w'\fBcodec *(*encoder)('u | |
43 | .BI " codec *(*encoder)(unsigned " flags , | |
44 | .BI " const char *" indent ", unsigned " maxlen ); | |
45 | .BI " codec *(*decoder)(unsigned " flags ); | |
46 | .B " ...\&" | |
4729aa69 | 47 | .B "} codec_class;" |
d056fbdf | 48 | .PP |
4729aa69 | 49 | .B "typedef struct {" |
adec5584 | 50 | .B " const codec_ops *ops;" |
4729aa69 | 51 | .B "} codec;" |
d056fbdf | 52 | .PP |
4729aa69 | 53 | .B "typedef struct {" |
adec5584 MW |
54 | .B " const codec_class *c;" |
55 | .BI " int (*code)(codec *" c ", const void *" p ", size_t " sz ", dstr *" d ); | |
56 | .BI " void (*destroy)(codec *" c ); | |
4729aa69 | 57 | .B "} codec_ops;" |
d056fbdf | 58 | .PP |
236f657b MW |
59 | .B "codec_class null_codec_class;" |
60 | .B "codec_class base64_class, file64_class, base64url_class;" | |
61 | .B "codec_class base32_class, base32hex_class;" | |
62 | .B "codec_class hex_class;" | |
d056fbdf | 63 | .PP |
236f657b MW |
64 | .BI "const char *codec_strerror(int " err ");" |
65 | .fi | |
66 | .SH DESCRIPTION | |
67 | The | |
68 | .B codec | |
69 | system provides an object-based interface to functions which encode | |
70 | binary data as plain text and decode the result to recover the original | |
71 | binary data. The interface makes it easy to support multiple encodings | |
72 | and select an appropriate one at runtime. | |
73 | .SS "The codec_class structure" | |
74 | The | |
75 | .B codec_class | |
76 | structure represents a particular encoding format. The structure has | |
77 | the following members. | |
78 | .TP | |
79 | .B "const char *name" | |
80 | The name of the class, as a null-terminated string. The name should not | |
81 | contain whitespace characters. | |
82 | .TP | |
83 | .BI "codec *(*encoder)(unsigned " flags ", const char *" indent ", unsigned " maxline ")" | |
84 | Pointer to a function which constructs a new encoder object, of type | |
85 | .BR codec . | |
86 | The | |
87 | .I flags | |
88 | configure the behaviour of the object; the | |
89 | .I indent | |
90 | string is written to separate lines of output; the integer | |
91 | .I maxline | |
92 | is the maximum length of line to be produced, or zero to forbid line | |
93 | breaking. | |
94 | .TP | |
95 | .BI "codec *(*decoder)(unsigned " flags ")" | |
96 | Pointer to a function which constructs a new decoder object, also of | |
97 | type | |
98 | .BR codec . | |
99 | The | |
100 | .I flags | |
101 | configure the behaviour of the object. | |
102 | .PP | |
103 | The | |
104 | .I flags | |
105 | to the | |
106 | .B encoder | |
107 | and | |
108 | .B decoder | |
109 | functions have the following meanings. | |
110 | .TP | |
111 | .B CDCF_LOWERC | |
112 | For codecs which produce output using a single alphabetic case (e.g., | |
113 | .BR base32 , | |
114 | .BR hex ), | |
115 | emit and accept only lower case; the default to emit and accept only | |
116 | upper case, for compatibility with RFC4648. If the codec usually | |
117 | produces mixed-case output, then this flag is ignored. | |
118 | .TP | |
119 | .B CDCF_IGNCASE | |
120 | For codecs which produce output using a single alphabetic case, ignore | |
121 | the case of the input when decoding. If the codec usually produces | |
122 | mixed-case output, then this flag is ignored. | |
123 | .TP | |
124 | .B CDCF_NOEQPAD | |
125 | For codecs which usually pad their output (e.g., | |
126 | .BR base64 , | |
127 | .BR base32 ), | |
128 | do not emit or accept padding characters. If the codec does not usually | |
129 | produce padding, or the padding is not redundant, then this flag is | |
130 | ignored. | |
131 | .TP | |
132 | .B CDCF_IGNEQPAD | |
133 | For codecs which usually pad their output, do not treat incorrect (e.g., | |
134 | missing or excessive) padding as an error when decoding. If the codec | |
135 | does not usually produce padding, or the padding is required for | |
136 | unambiguous decoding, then this flag is ignored. | |
137 | .TP | |
138 | .B CDCF_IGNEQMID | |
139 | For codecs which usually pad their output, ignore padding characters | |
140 | wherever they may appear when decoding. Usually padding characters | |
141 | indicate the end of the input, and further input characters are | |
142 | considered erroneous. If the codec does not usually produce padding, or | |
143 | it is impossible to resume decoding correctly having seen padding | |
144 | characters, then this flag is ignored. | |
145 | .TP | |
146 | .B CDCF_IGNZPAD | |
147 | For codecs which need to pad their input, ignore unusual padding bits | |
148 | when decoding. (This is not at all the same thing as the padding | |
149 | characters controlled by the flags above: they deal with padding the | |
150 | length of the encoding | |
151 | .I output | |
152 | up to a suitable multiple of characters; this option deals with padding | |
153 | of the | |
154 | .I input | |
155 | prior to encoding.) If the codec does not add padding bits, or specific | |
156 | values are required for unambiguous decoding, then this flag is ignored. | |
157 | .TP | |
158 | .B CDCF_IGNNEWL | |
159 | Ignore newline (and carriage-return) characters when decoding: the | |
160 | default for RFC4648 codecs is to reject newline characters. If these | |
161 | characters are significant in the encoding, then this flag is ignored. | |
162 | .TP | |
09fbf4d0 MW |
163 | .B CDCF_IGNSPC |
164 | Ignore whitespace characters (other than newlines) when decoding: the | |
165 | default for RFC4648 codecs is to reject whitespace characters. If these | |
166 | characters are significant in the encoding, then this flag is ignored. | |
167 | .TP | |
236f657b MW |
168 | .B CDCF_IGNINVCH |
169 | Ignore any other invalid characters appearing in the input when | |
170 | decoding. | |
171 | .TP | |
172 | .B CDCF_IGNJUNK | |
173 | Ignore all `junk' in the input. This should suppress almost all | |
174 | decoding errors. | |
175 | .PP | |
176 | If you do not set any of the | |
c3dd6b29 | 177 | .BR CDCF_IGN ...\& |
236f657b MW |
178 | flags, a decoder should only accept the exact encoding that the |
179 | corresponding encoder would produce (with | |
180 | .I maxline | |
181 | = 0 to inhibit line-breaking). | |
182 | .SS "The codec and codec_ops structures" | |
183 | The | |
184 | .B codec | |
185 | structure represents the state of an encoder or decoder, as returned by | |
186 | the | |
187 | .B encoder | |
188 | and | |
189 | .B decoder | |
190 | functions described above, contains a single member. | |
191 | .TP | |
192 | .B "const codec_ops *ops" | |
193 | Pointer to a | |
194 | .B codec_ops | |
195 | structure which contains operations and metadata for use with the | |
196 | encoder or decoder. | |
197 | .PP | |
198 | The | |
199 | .B codec_ops | |
200 | structure contains the following members. | |
201 | .TP | |
202 | .B "const codec_class *c" | |
203 | Pointer back to the | |
204 | .B codec_class | |
205 | which was used to construct the | |
206 | .B codec | |
207 | object. | |
208 | .TP | |
209 | .BI "int (*code)(codec *" c ", const void *" p ", size_t " sz ", dstr *" d ")" | |
210 | Encode or decode, using the codec | |
63ba7202 | 211 | .IR c , |
236f657b MW |
212 | the data in the buffer at address |
213 | .I p | |
214 | and continuing for | |
215 | .I sz | |
216 | bytes, appending the output to the dynamic string | |
217 | .I d | |
218 | (see | |
219 | .BR dstr (3)). | |
220 | If the operation was successful, the function returns zero; otherwise it | |
221 | returns a nonzero error code, as described below. | |
222 | .TP | |
223 | .BI "void (*destroy)(codec *" c ")" | |
224 | Destroy the codec object | |
225 | .IR c , | |
226 | freeing any resources it may hold. | |
227 | .PP | |
228 | A codec may buffer its input (e.g., if needs to see more in order to | |
229 | decide what output to produce next); it may also need to take special | |
230 | action at the end of the input (e.g., flushing buffers, and applying | |
231 | padding). To signal the codec that there is no more input, call the | |
232 | .B code | |
233 | function with a null | |
234 | .I p | |
235 | pointer. It will then write any final output to | |
236 | .IR d . | |
237 | .PP | |
238 | The following error conditions may be reported. | |
239 | .TP | |
240 | .B CDCERR_INVCH | |
241 | An invalid character was encountered while decoding. This includes | |
242 | encoutering padding characters if padding is disabled using the | |
243 | .B CDCF_NOEQPAD | |
244 | flag. | |
245 | .TP | |
246 | .B CDCERR_INVEQPAD | |
247 | Invalid padding characters (e.g., wrong characters, or too few, too | |
248 | many, or none at all) were found during decoding. This may also | |
249 | indicate that the input is truncated, even if the codec does not usually | |
250 | perform output padding. | |
251 | .TP | |
252 | .B CDCERR_INVZPAD | |
253 | Invalid padding bits were found during decoding. | |
254 | .PP | |
255 | The | |
256 | .B codec_strerror | |
257 | function converts these error codes to brief, (moderately) | |
258 | human-readable strings. | |
259 | .SS "Provided codecs" | |
260 | The library provides a number of standard codecs. | |
261 | .TP | |
262 | .B base64 | |
263 | Implements Base64 encoding, as defined by RFC4648. Output is | |
264 | mixed-case, so the | |
265 | .B CDCF_LOWERC | |
266 | and | |
267 | .B CDCF_IGNCASE | |
268 | flags are ignored. | |
269 | .TP | |
270 | .B safe64 | |
271 | Implements a variant of the Base64 encoding which uses | |
272 | .RB ` % ' | |
273 | in place of | |
274 | .RB ` / ', | |
275 | so that its output is suitable for use as a Unix filename. | |
276 | .TP | |
277 | .B base64url | |
278 | Implements the filename- and URL-safe variant of Base64 encoding, as | |
279 | defined by RFC4648. | |
280 | .TP | |
281 | .B base32 | |
282 | Implements Base32 encoding, as defined by RFC4648. Output is in upper | |
283 | case by default. | |
284 | .TP | |
285 | .B base32hex | |
286 | Implements the extended-hex variant of Base32, as defined by RFC4648. | |
287 | This encoding has the property that the encoding preserves the ordering | |
288 | of messages if padding is suppressed. | |
289 | .TP | |
290 | .B hex | |
291 | Implements hex encoding, defined by RFC4648 under the name Base16. For | |
292 | compatibility with that specification, output is in upper case by | |
293 | default. | |
294 | .SH "SEE ALSO" | |
295 | .BR bincode (1), | |
296 | .BR dstr (3), | |
297 | .BR mLib (3). | |
298 | .SH AUTHOR | |
299 | Mark Wooding, <mdw@distorted.org.uk> |