chiark / gitweb /
codec/{base32,hex}.h: Include `codec.h'.
[mLib] / codec / baseconv.c
CommitLineData
236f657b
MW
1/* -*-c-*-
2 *
3 * Binary base-conversion encoding and decoding (base64, base32, etc.)
4 *
5 * (c) 1997 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the mLib utilities library.
11 *
12 * mLib is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * mLib is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include <ctype.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34
35#include "alloc.h"
36#include "codec.h"
37#include "dstr.h"
38#include "sub.h"
39
40#include "base64.h"
41#include "base32.h"
42#include "hex.h"
43
44/*----- Important tables --------------------------------------------------*/
45
46/* --- Magic constants --- */
47
48#define NV -1 /* Not valid */
49#define PC -2 /* Padding character */
50#define NL -3 /* Newline character */
51
52/* --- Base64 --- */
53
54static const char
55 encodemap_base64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
56 "abcdefghijklmnopqrstuvwxyz"
57 "0123456789+/" },
58 encodemap_file64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
59 "abcdefghijklmnopqrstuvwxyz"
60 "0123456789+%" },
61 encodemap_base64url[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
62 "abcdefghijklmnopqrstuvwxyz"
63 "0123456789-_" };
64
65static const signed char decodemap_base64[] = {
66 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NL, NV, NV, NL, NV, NV, /* 0x */
67 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
68 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, 62, NV, NV, NV, 63, /* 2x */
69 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */
70 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
71 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */
72 NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */
73 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */
74}, decodemap_file64[] = {
75 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NL, NV, NV, NL, NV, NV, /* 0x */
76 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
77 NV, NV, NV, NV, NV, 63, NV, NV, NV, NV, NV, 62, NV, NV, NV, NV, /* 2x */
78 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */
79 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
80 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */
81 NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */
82 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */
83}, decodemap_base64url[] = {
84 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NL, NV, NV, NL, NV, NV, /* 0x */
85 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
86 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, 62, NV, NV, /* 2x */
87 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */
88 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
89 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, 63, /* 5x */
90 NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */
91 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */
92};
93
94/* --- Base32 --- */
95
96static const char
97 encodemap_base32[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" },
98 encodemap_base32hex[] = { "0123456789ABCDEFGHIJKLMNOPQRSTUV" };
99
100static const signed char decodemap_base32[] = {
101 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NL, NV, NV, NL, NV, NV, /* 0x */
102 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
103 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */
104 NV, NV, 26, 27, 28, 29, 30, 31, NV, NV, NV, NV, NV, PC, NV, NV, /* 3x */
105 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
106 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */
107 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */
108 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */
109}, decodemap_base32hex[] = {
110 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NL, NV, NV, NL, NV, NV, /* 0x */
111 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
112 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */
113 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NV, NV, NV, PC, NV, NV, /* 3x */
114 NV, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 4x */
115 25, 26, 27, 28, 29, 30, 31, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 5x */
116 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */
117 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */
118};
119
120/* --- Hex --- */
121
122static const char
123 encodemap_hex[] = { "0123456789ABCDEF" };
124
125static const signed char decodemap_hex[] = {
126 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NL, NV, NV, NL, NV, NV, /* 0x */
127 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
128 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */
129 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NV, NV, NV, NV, NV, NV, /* 3x */
130 NV, 10, 11, 12, 13, 14, 15, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 4x */
131 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 5x */
132 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */
133 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */
134};
135
136/*----- Base conversion macros --------------------------------------------*/
137
138/* --- @BASECONV@ --- *
139 *
140 * Arguments: @x@ = an input digit of width @IWD@ bits
141 * @iwd@ = input digit width in bits
142 * @owd@ = output digit width in bits
143 * @put@ = function or macro to output a digit
144 *
145 * Use: Inserts the bits of @x@ into an accumulator. As digits @y@
146 * of with @owd@ become ready, @put(y)@ is invoked to emit them.
147 */
148
149#define BASECONV(x, iwd, owd, put) do { \
150 a = (a << iwd) | x; \
151 nb += iwd; \
152 while (nb >= owd) { \
153 nb -= owd; \
154 put((a >> nb) & ((1 << owd) - 1)); \
155 } \
156} while (0)
157
158/* --- @BASECONV_FLUSH@ --- *
159 *
160 * Arguments: @iwd@ = input digit width in bits
161 * @owd@ = output digit width in bits
162 * @put@ = function or macro to output a digit
163 *
164 * Use: Flushes remaining digits from the base-conversion shift
165 * register. The bits in the shift register are padded on the
166 * right with zeros. Digits of width @owd@ are emitted by
167 * invoking @put@.
168 */
169
170#define BASECONV_FLUSH(iwd, owd, put) do { \
171 if (nb) { \
172 while (nb < owd) { a <<= iwd; nb += iwd; } \
173 nb -= owd; \
174 put((a >> nb) & ((1 << owd) - 1)); \
175 } \
176} while (0)
177
178/* --- @BASECONV_PAD@ --- *
179 *
180 * Arguments: @iwd@ = input digit width in bits
181 * @owd@ = output digit width in bits
182 * @pad@ = function or macro to output padding
183 *
184 * Use: Invokes @pad@ sufficiently often to realign the shift
185 * register.
186 */
187
188#define BASECONV_PAD(iwd, owd, pad) do { \
189 for (;;) { \
190 while (nb >= owd) { pad; nb -= owd; } \
191 if (!nb) break; \
192 nb += iwd; \
193 } \
194} while (0)
195
196#define NULL_PAD(iwd, owd, pad) do ; while (0)
197
198/*----- Lists of things to make -------------------------------------------*/
199
200#define CODECS(_) \
201 /* NAME, CTXN, ACC */ \
202 _(base64, base64, acc) \
203 _(file64, base64, acc) \
204 _(base64url, base64, acc) \
205 _(base32, base32, accl) \
206 _(base32hex, base32, accl) \
207 _(hex, hex, acc)
208
209#define CTXS(_) \
210 /* CTXN, WD, ACC */ \
211 _(base64, 6, acc) \
212 _(base32, 5, accl) \
213 _(hex, 4, acc)
214
215#define base64_PADDING BASECONV_PAD
216#define base64_FLAGMASK ~(CDCF_LOWERC | CDCF_IGNCASE)
217#define base64_FLAGXOR 0
218#define base64_OLDFLAGS CDCF_IGNJUNK
219
220#define base32_PADDING BASECONV_PAD
221#define base32_FLAGMASK ~0
222#define base32_FLAGXOR 0
223#define base32_OLDFLAGS CDCF_IGNJUNK
224
225#define hex_PADDING NULL_PAD
226#define hex_FLAGMASK ~0
227#define hex_FLAGXOR 0
228#define hex_OLDFLAGS (CDCF_IGNJUNK | CDCF_LOWERC)
229
230/*----- Data structures ---------------------------------------------------*/
231
232#define OBJ(ctxn, wd, acc) \
233 \
234typedef struct ctxn##_codec { \
235 codec c; \
236 ctxn##_ctx ctx; \
237 const char *encodemap; \
238 const signed char *decodemap; \
239} ctxn##_codec;
240
241CTXS(OBJ)
242
243/*----- State packing -----------------------------------------------------*
244 *
245 * These macros convert between the state required by the new encoding and
246 * decoding core and the old externally-visible context structures. It's
247 * unpleasant, I know; maybe we can drop the old interface later.
248 */
249
250enum {
251 ST_MAIN, /* Main decoding state */
252 ST_PAD, /* Decoding trailing padding */
253 ST_END /* Finished decoding */
254};
255
256#define STATE_UNPACK(acc) \
257 unsigned long a = (ctx->acc >> 0) & 0xffff; \
258 unsigned nb = (ctx->acc >> 16) & 0xff; \
259 unsigned st = (ctx->acc >> 24) & 0xff; \
260 unsigned f = ctx->qsz;
261
262#define STATE_PACK(acc) do { \
263 ctx->acc = (((a & 0xffff) << 0) | \
264 (((unsigned long)nb & 0xff) << 16) | \
265 (((unsigned long)st & 0xff) << 24)); \
266} while (0)
267
268/*----- Main encoder and decoder ------------------------------------------*/
269
270#define WRAP(stuff) do { \
271 if (maxln && lnlen >= maxln) { \
272 dstr_puts(d, ctx->indent); \
273 lnlen = 0; \
274 } \
275 stuff \
276 lnlen++; \
277} while (0)
278
279#define PUTWRAP(x) WRAP({ \
280 char ch = encodemap[x]; \
281 if (f & CDCF_LOWERC) ch = tolower((unsigned char)ch); \
282 DPUTC(d, ch); \
283})
284
285#define PADWRAP WRAP({ DPUTC(d, '='); })
286
287#define PUTRAW(x) DPUTC(d, x)
288
289#define ENCODER(ctxn, wd, acc) \
290 \
291/* --- @CTXN_doencode@ --- * \
292 * \
293 * Arguments: @CTXN_ctx *ctx@ = pointer to a context block \
294 * @const char *encodemap@ = pointer to encoding map \
295 * @const unsigned char *p@ = pointer to a source buffer \
296 * @size_t sz@ = size of the source buffer \
297 * @dstr *d@ = pointer to destination string \
298 * \
299 * Returns: Zero on success, or @CDCERR_@ error code. \
300 * \
301 * Use: Main encoder function. \
302 */ \
303 \
304static int ctxn##_doencode(ctxn##_ctx *ctx, const char *encodemap, \
305 const unsigned char *p, size_t sz, dstr *d) \
306{ \
307 STATE_UNPACK(acc); \
308 const unsigned char *l = p + sz; \
309 unsigned lnlen = ctx->lnlen, maxln = ctx->maxline; \
310 \
311 if (p) { \
312 while (p < l) BASECONV(*p++, 8, wd, PUTWRAP); \
313 } else { \
314 BASECONV_FLUSH(8, wd, PUTWRAP); \
315 if (!(f & CDCF_NOEQPAD)) ctxn##_PADDING(8, wd, PADWRAP); \
316 } \
317 \
318 STATE_PACK(acc); \
319 ctx->lnlen = lnlen; \
320 return (0); \
321} \
322 \
323/* --- @CTXN_dodecode@ --- * \
324 * \
325 * Arguments: @CTXN_ctx *ctx@ = pointer to a context block \
326 * @const signed char *decodemap@ = pointer to decode map \
327 * @const char *p@ = pointer to a source buffer \
328 * @size_t sz@ = size of the source buffer \
329 * @dstr *d@ = pointer to destination string \
330 * \
331 * Returns: Zero on success, or @CDCERR_@ error code. \
332 * \
333 * Use: Main decoder function. \
334 */ \
335 \
336static int ctxn##_dodecode(ctxn##_ctx *ctx, \
337 const signed char *decodemap, \
338 const unsigned char *p, size_t sz, dstr *d) \
339{ \
340 STATE_UNPACK(acc); \
341 const unsigned char *l = p + sz; \
342 int ch; \
343 int x; \
344 \
345 if (p) { \
346 while (p < l) { \
347 ch = *p++; \
348 switch (f & (CDCF_LOWERC | CDCF_IGNCASE)) { \
349 case 0: \
350 break; \
351 case CDCF_LOWERC: \
352 if (isupper(ch)) goto badch; \
353 default: \
354 ch = toupper(ch); \
355 } \
356 x = decodemap[ch]; \
357 switch (x) { \
358 case NV: \
359 badch: \
360 if (!(f & CDCF_IGNINVCH)) return (CDCERR_INVCH); \
361 break; \
362 case PC: \
363 if (f & CDCF_IGNEQMID) break; \
364 if (f & CDCF_NOEQPAD) goto badch; \
365 if (st == ST_MAIN && \
366 !(f & CDCF_IGNZPAD) && (a & ((1 << nb) - 1))) \
367 return (CDCERR_INVZPAD); \
368 st = ST_PAD; \
369 if (!(f & CDCF_IGNEQPAD)) { \
370 if (!nb) return (CDCERR_INVEQPAD); \
371 nb = (nb + wd)%8; \
372 st = ST_PAD; \
373 } \
374 break; \
375 case NL: \
376 if (f & CDCF_IGNNEWL) break; \
377 return (CDCERR_INVCH); \
378 default: \
379 if (st != ST_MAIN) \
380 return (CDCERR_INVEQPAD); \
381 BASECONV(x, wd, 8, PUTRAW); \
382 break; \
383 } \
384 } \
385 } else { \
386 if (st == ST_MAIN && \
387 !(f & CDCF_IGNZPAD) && (a & ((1 << nb) - 1))) \
388 return (CDCERR_INVZPAD); \
389 if (!(f & (CDCF_IGNEQPAD | CDCF_IGNEQMID | CDCF_NOEQPAD)) && nb) \
390 return (CDCERR_INVEQPAD); \
391 } \
392 \
393 STATE_PACK(acc); \
394 return (0); \
395}
396
397CTXS(ENCODER)
398
399/*----- Codec implementation ----------------------------------------------*/
400
401#define OPS(ctxn, wd, acc) \
402 \
403static int ctxn##_enc(codec *c, const void *p, size_t sz, dstr *d) \
404{ \
405 ctxn##_codec *bc = (ctxn##_codec *)c; \
406 return (ctxn##_doencode(&bc->ctx, bc->encodemap, p, sz, d)); \
407} \
408 \
409static int ctxn##_dec(codec *c, const void *p, size_t sz, dstr *d) \
410{ \
411 ctxn##_codec *bc = (ctxn##_codec *)c; \
412 return (ctxn##_dodecode(&bc->ctx, bc->decodemap, p, sz, d)); \
413} \
414 \
415static void ctxn##_destroy(codec *c) \
416{ \
417 ctxn##_codec *bc = (ctxn##_codec *)c; \
418 if (bc->ctx.indent) xfree((/*unconst*/ char *)bc->ctx.indent); \
419 DESTROY(bc); \
420} \
421 \
422static codec *ctxn##_docreate(unsigned flags, \
423 const char *indent, unsigned maxline, \
424 const codec_ops *ops, \
425 const char *encodemap, \
426 const signed char *decodemap) \
427{ \
428 ctxn##_codec *bc = CREATE(ctxn##_codec); \
429 bc->c.ops = ops; \
430 bc->ctx.acc = 0; \
431 bc->ctx.qsz = (flags & ctxn##_FLAGMASK) ^ ctxn##_FLAGXOR; \
432 bc->ctx.lnlen = 0; \
433 bc->ctx.indent = indent ? xstrdup(indent) : 0; \
434 bc->ctx.maxline = maxline; \
435 bc->encodemap = encodemap; \
436 bc->decodemap = decodemap; \
437 return (&bc->c); \
438}
439
440CTXS(OPS)
441
442#define CLASS(name, ctxn, acc) \
443 \
444static const codec_ops \
445 name##_encode_ops = { &name##_class, ctxn##_enc, ctxn##_destroy }, \
446 name##_decode_ops = { &name##_class, ctxn##_dec, ctxn##_destroy }; \
447 \
448static codec *name##_encoder(unsigned flags, \
449 const char *indent, unsigned maxline) \
450{ \
451 return ctxn##_docreate(flags, indent, maxline, \
452 &name##_encode_ops, \
453 encodemap_##name, \
454 decodemap_##name); \
455} \
456 \
457static codec *name##_decoder(unsigned flags) \
458{ \
459 return ctxn##_docreate(flags, 0, 0, \
460 &name##_decode_ops, \
461 encodemap_##name, \
462 decodemap_##name); \
463} \
464 \
465const codec_class name##_class = \
466 { #name, name##_encoder, name##_decoder };
467
468CODECS(CLASS)
469
470/*----- Compatibility veneers ---------------------------------------------*/
471
472#define COMPAT(ctxn, wd, acc) \
473 \
474void ctxn##_encode(ctxn##_ctx *ctx, const void *p, size_t sz, dstr *d) \
475 { ctxn##_doencode(ctx, encodemap_##ctxn, p, sz, d); } \
476 \
477void ctxn##_decode(ctxn##_ctx *ctx, const void *p, size_t sz, dstr *d) \
478 { ctxn##_dodecode(ctx, decodemap_##ctxn, p, sz, d); } \
479 \
480void ctxn##_init(ctxn##_ctx *ctx) \
481{ \
482 ctx->acc = 0; \
483 ctx->qsz = (ctxn##_OLDFLAGS & ctxn##_FLAGMASK) ^ ctxn##_FLAGXOR; \
484 ctx->lnlen = 0; \
485 ctx->indent = "\n"; \
486 ctx->maxline = 72; \
487}
488
489CTXS(COMPAT)
490
491/*----- That's all, folks -------------------------------------------------*/