chiark / gitweb /
codec/url.c (encode): Fix bungled cast in previous refactoring.
[mLib] / codec / baseconv.c
CommitLineData
236f657b
MW
1/* -*-c-*-
2 *
3 * Binary base-conversion encoding and decoding (base64, base32, etc.)
4 *
5 * (c) 1997 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the mLib utilities library.
11 *
12 * mLib is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * mLib is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include <ctype.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34
35#include "alloc.h"
36#include "codec.h"
37#include "dstr.h"
38#include "sub.h"
39
40#include "base64.h"
41#include "base32.h"
42#include "hex.h"
43
44/*----- Important tables --------------------------------------------------*/
45
46/* --- Magic constants --- */
47
48#define NV -1 /* Not valid */
49#define PC -2 /* Padding character */
50#define NL -3 /* Newline character */
09fbf4d0 51#define SP -4 /* Space character */
236f657b
MW
52
53/* --- Base64 --- */
54
55static const char
56 encodemap_base64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
57 "abcdefghijklmnopqrstuvwxyz"
58 "0123456789+/" },
59 encodemap_file64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
60 "abcdefghijklmnopqrstuvwxyz"
61 "0123456789+%" },
62 encodemap_base64url[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
63 "abcdefghijklmnopqrstuvwxyz"
64 "0123456789-_" };
65
66static const signed char decodemap_base64[] = {
09fbf4d0 67 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
236f657b 68 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
09fbf4d0 69 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, 62, NV, NV, NV, 63, /* 2x */
236f657b
MW
70 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */
71 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
72 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */
73 NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */
74 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */
75}, decodemap_file64[] = {
09fbf4d0 76 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
236f657b 77 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
09fbf4d0 78 SP, NV, NV, NV, NV, 63, NV, NV, NV, NV, NV, 62, NV, NV, NV, NV, /* 2x */
236f657b
MW
79 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */
80 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
81 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */
82 NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */
83 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */
84}, decodemap_base64url[] = {
09fbf4d0 85 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
236f657b 86 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
09fbf4d0 87 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, 62, NV, NV, /* 2x */
236f657b
MW
88 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */
89 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
90 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, 63, /* 5x */
91 NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */
92 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */
93};
94
95/* --- Base32 --- */
96
97static const char
98 encodemap_base32[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" },
99 encodemap_base32hex[] = { "0123456789ABCDEFGHIJKLMNOPQRSTUV" };
100
101static const signed char decodemap_base32[] = {
09fbf4d0 102 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
236f657b 103 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
09fbf4d0 104 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */
236f657b
MW
105 NV, NV, 26, 27, 28, 29, 30, 31, NV, NV, NV, NV, NV, PC, NV, NV, /* 3x */
106 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
107 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */
108 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */
109 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */
110}, decodemap_base32hex[] = {
09fbf4d0 111 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
236f657b 112 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
09fbf4d0 113 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */
236f657b
MW
114 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NV, NV, NV, PC, NV, NV, /* 3x */
115 NV, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 4x */
116 25, 26, 27, 28, 29, 30, 31, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 5x */
117 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */
118 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */
119};
120
121/* --- Hex --- */
122
123static const char
124 encodemap_hex[] = { "0123456789ABCDEF" };
125
126static const signed char decodemap_hex[] = {
09fbf4d0 127 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
236f657b 128 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
09fbf4d0 129 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */
236f657b
MW
130 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NV, NV, NV, NV, NV, NV, /* 3x */
131 NV, 10, 11, 12, 13, 14, 15, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 4x */
132 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 5x */
133 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */
134 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */
135};
136
137/*----- Base conversion macros --------------------------------------------*/
138
139/* --- @BASECONV@ --- *
140 *
141 * Arguments: @x@ = an input digit of width @IWD@ bits
142 * @iwd@ = input digit width in bits
143 * @owd@ = output digit width in bits
144 * @put@ = function or macro to output a digit
145 *
146 * Use: Inserts the bits of @x@ into an accumulator. As digits @y@
147 * of with @owd@ become ready, @put(y)@ is invoked to emit them.
148 */
149
150#define BASECONV(x, iwd, owd, put) do { \
151 a = (a << iwd) | x; \
152 nb += iwd; \
153 while (nb >= owd) { \
154 nb -= owd; \
155 put((a >> nb) & ((1 << owd) - 1)); \
156 } \
157} while (0)
158
159/* --- @BASECONV_FLUSH@ --- *
160 *
161 * Arguments: @iwd@ = input digit width in bits
162 * @owd@ = output digit width in bits
163 * @put@ = function or macro to output a digit
164 *
165 * Use: Flushes remaining digits from the base-conversion shift
166 * register. The bits in the shift register are padded on the
167 * right with zeros. Digits of width @owd@ are emitted by
168 * invoking @put@.
169 */
170
171#define BASECONV_FLUSH(iwd, owd, put) do { \
172 if (nb) { \
173 while (nb < owd) { a <<= iwd; nb += iwd; } \
174 nb -= owd; \
175 put((a >> nb) & ((1 << owd) - 1)); \
176 } \
177} while (0)
178
179/* --- @BASECONV_PAD@ --- *
180 *
181 * Arguments: @iwd@ = input digit width in bits
182 * @owd@ = output digit width in bits
183 * @pad@ = function or macro to output padding
184 *
185 * Use: Invokes @pad@ sufficiently often to realign the shift
186 * register.
187 */
188
189#define BASECONV_PAD(iwd, owd, pad) do { \
190 for (;;) { \
191 while (nb >= owd) { pad; nb -= owd; } \
192 if (!nb) break; \
193 nb += iwd; \
194 } \
195} while (0)
196
197#define NULL_PAD(iwd, owd, pad) do ; while (0)
198
199/*----- Lists of things to make -------------------------------------------*/
200
201#define CODECS(_) \
202 /* NAME, CTXN, ACC */ \
203 _(base64, base64, acc) \
204 _(file64, base64, acc) \
205 _(base64url, base64, acc) \
206 _(base32, base32, accl) \
207 _(base32hex, base32, accl) \
208 _(hex, hex, acc)
209
210#define CTXS(_) \
211 /* CTXN, WD, ACC */ \
212 _(base64, 6, acc) \
213 _(base32, 5, accl) \
214 _(hex, 4, acc)
215
216#define base64_PADDING BASECONV_PAD
217#define base64_FLAGMASK ~(CDCF_LOWERC | CDCF_IGNCASE)
218#define base64_FLAGXOR 0
219#define base64_OLDFLAGS CDCF_IGNJUNK
220
221#define base32_PADDING BASECONV_PAD
222#define base32_FLAGMASK ~0
223#define base32_FLAGXOR 0
224#define base32_OLDFLAGS CDCF_IGNJUNK
225
226#define hex_PADDING NULL_PAD
227#define hex_FLAGMASK ~0
228#define hex_FLAGXOR 0
229#define hex_OLDFLAGS (CDCF_IGNJUNK | CDCF_LOWERC)
230
231/*----- Data structures ---------------------------------------------------*/
232
233#define OBJ(ctxn, wd, acc) \
234 \
235typedef struct ctxn##_codec { \
236 codec c; \
237 ctxn##_ctx ctx; \
238 const char *encodemap; \
239 const signed char *decodemap; \
240} ctxn##_codec;
241
242CTXS(OBJ)
243
244/*----- State packing -----------------------------------------------------*
245 *
246 * These macros convert between the state required by the new encoding and
247 * decoding core and the old externally-visible context structures. It's
248 * unpleasant, I know; maybe we can drop the old interface later.
249 */
250
251enum {
252 ST_MAIN, /* Main decoding state */
253 ST_PAD, /* Decoding trailing padding */
254 ST_END /* Finished decoding */
255};
256
257#define STATE_UNPACK(acc) \
258 unsigned long a = (ctx->acc >> 0) & 0xffff; \
259 unsigned nb = (ctx->acc >> 16) & 0xff; \
260 unsigned st = (ctx->acc >> 24) & 0xff; \
261 unsigned f = ctx->qsz;
262
263#define STATE_PACK(acc) do { \
264 ctx->acc = (((a & 0xffff) << 0) | \
265 (((unsigned long)nb & 0xff) << 16) | \
266 (((unsigned long)st & 0xff) << 24)); \
267} while (0)
268
269/*----- Main encoder and decoder ------------------------------------------*/
270
271#define WRAP(stuff) do { \
272 if (maxln && lnlen >= maxln) { \
273 dstr_puts(d, ctx->indent); \
274 lnlen = 0; \
275 } \
276 stuff \
277 lnlen++; \
278} while (0)
279
280#define PUTWRAP(x) WRAP({ \
281 char ch = encodemap[x]; \
282 if (f & CDCF_LOWERC) ch = tolower((unsigned char)ch); \
283 DPUTC(d, ch); \
284})
285
286#define PADWRAP WRAP({ DPUTC(d, '='); })
287
288#define PUTRAW(x) DPUTC(d, x)
289
290#define ENCODER(ctxn, wd, acc) \
291 \
292/* --- @CTXN_doencode@ --- * \
293 * \
294 * Arguments: @CTXN_ctx *ctx@ = pointer to a context block \
295 * @const char *encodemap@ = pointer to encoding map \
296 * @const unsigned char *p@ = pointer to a source buffer \
297 * @size_t sz@ = size of the source buffer \
298 * @dstr *d@ = pointer to destination string \
299 * \
300 * Returns: Zero on success, or @CDCERR_@ error code. \
301 * \
302 * Use: Main encoder function. \
303 */ \
304 \
305static int ctxn##_doencode(ctxn##_ctx *ctx, const char *encodemap, \
306 const unsigned char *p, size_t sz, dstr *d) \
307{ \
308 STATE_UNPACK(acc); \
309 const unsigned char *l = p + sz; \
310 unsigned lnlen = ctx->lnlen, maxln = ctx->maxline; \
311 \
312 if (p) { \
313 while (p < l) BASECONV(*p++, 8, wd, PUTWRAP); \
314 } else { \
315 BASECONV_FLUSH(8, wd, PUTWRAP); \
316 if (!(f & CDCF_NOEQPAD)) ctxn##_PADDING(8, wd, PADWRAP); \
317 } \
318 \
319 STATE_PACK(acc); \
320 ctx->lnlen = lnlen; \
321 return (0); \
322} \
323 \
324/* --- @CTXN_dodecode@ --- * \
325 * \
326 * Arguments: @CTXN_ctx *ctx@ = pointer to a context block \
327 * @const signed char *decodemap@ = pointer to decode map \
328 * @const char *p@ = pointer to a source buffer \
329 * @size_t sz@ = size of the source buffer \
330 * @dstr *d@ = pointer to destination string \
331 * \
332 * Returns: Zero on success, or @CDCERR_@ error code. \
333 * \
334 * Use: Main decoder function. \
335 */ \
336 \
337static int ctxn##_dodecode(ctxn##_ctx *ctx, \
338 const signed char *decodemap, \
339 const unsigned char *p, size_t sz, dstr *d) \
340{ \
341 STATE_UNPACK(acc); \
342 const unsigned char *l = p + sz; \
343 int ch; \
344 int x; \
345 \
346 if (p) { \
347 while (p < l) { \
348 ch = *p++; \
349 switch (f & (CDCF_LOWERC | CDCF_IGNCASE)) { \
350 case 0: \
351 break; \
352 case CDCF_LOWERC: \
353 if (isupper(ch)) goto badch; \
354 default: \
355 ch = toupper(ch); \
356 } \
357 x = decodemap[ch]; \
358 switch (x) { \
359 case NV: \
360 badch: \
361 if (!(f & CDCF_IGNINVCH)) return (CDCERR_INVCH); \
362 break; \
363 case PC: \
364 if (f & CDCF_IGNEQMID) break; \
365 if (f & CDCF_NOEQPAD) goto badch; \
366 if (st == ST_MAIN && \
367 !(f & CDCF_IGNZPAD) && (a & ((1 << nb) - 1))) \
368 return (CDCERR_INVZPAD); \
369 st = ST_PAD; \
370 if (!(f & CDCF_IGNEQPAD)) { \
371 if (!nb) return (CDCERR_INVEQPAD); \
372 nb = (nb + wd)%8; \
373 st = ST_PAD; \
374 } \
375 break; \
376 case NL: \
377 if (f & CDCF_IGNNEWL) break; \
378 return (CDCERR_INVCH); \
09fbf4d0
MW
379 case SP: \
380 if (f & CDCF_IGNSPC) break; \
381 return (CDCERR_INVCH); \
236f657b 382 default: \
09fbf4d0 383 if (st != ST_MAIN) return (CDCERR_INVEQPAD); \
236f657b
MW
384 BASECONV(x, wd, 8, PUTRAW); \
385 break; \
386 } \
387 } \
388 } else { \
389 if (st == ST_MAIN && \
390 !(f & CDCF_IGNZPAD) && (a & ((1 << nb) - 1))) \
391 return (CDCERR_INVZPAD); \
392 if (!(f & (CDCF_IGNEQPAD | CDCF_IGNEQMID | CDCF_NOEQPAD)) && nb) \
393 return (CDCERR_INVEQPAD); \
394 } \
395 \
396 STATE_PACK(acc); \
397 return (0); \
398}
399
400CTXS(ENCODER)
401
402/*----- Codec implementation ----------------------------------------------*/
403
404#define OPS(ctxn, wd, acc) \
405 \
406static int ctxn##_enc(codec *c, const void *p, size_t sz, dstr *d) \
407{ \
408 ctxn##_codec *bc = (ctxn##_codec *)c; \
409 return (ctxn##_doencode(&bc->ctx, bc->encodemap, p, sz, d)); \
410} \
411 \
412static int ctxn##_dec(codec *c, const void *p, size_t sz, dstr *d) \
413{ \
414 ctxn##_codec *bc = (ctxn##_codec *)c; \
415 return (ctxn##_dodecode(&bc->ctx, bc->decodemap, p, sz, d)); \
416} \
417 \
418static void ctxn##_destroy(codec *c) \
419{ \
420 ctxn##_codec *bc = (ctxn##_codec *)c; \
421 if (bc->ctx.indent) xfree((/*unconst*/ char *)bc->ctx.indent); \
422 DESTROY(bc); \
423} \
424 \
425static codec *ctxn##_docreate(unsigned flags, \
426 const char *indent, unsigned maxline, \
427 const codec_ops *ops, \
428 const char *encodemap, \
429 const signed char *decodemap) \
430{ \
431 ctxn##_codec *bc = CREATE(ctxn##_codec); \
432 bc->c.ops = ops; \
433 bc->ctx.acc = 0; \
434 bc->ctx.qsz = (flags & ctxn##_FLAGMASK) ^ ctxn##_FLAGXOR; \
435 bc->ctx.lnlen = 0; \
436 bc->ctx.indent = indent ? xstrdup(indent) : 0; \
437 bc->ctx.maxline = maxline; \
438 bc->encodemap = encodemap; \
439 bc->decodemap = decodemap; \
440 return (&bc->c); \
441}
442
443CTXS(OPS)
444
445#define CLASS(name, ctxn, acc) \
446 \
447static const codec_ops \
448 name##_encode_ops = { &name##_class, ctxn##_enc, ctxn##_destroy }, \
449 name##_decode_ops = { &name##_class, ctxn##_dec, ctxn##_destroy }; \
450 \
451static codec *name##_encoder(unsigned flags, \
452 const char *indent, unsigned maxline) \
453{ \
454 return ctxn##_docreate(flags, indent, maxline, \
455 &name##_encode_ops, \
456 encodemap_##name, \
457 decodemap_##name); \
458} \
459 \
460static codec *name##_decoder(unsigned flags) \
461{ \
462 return ctxn##_docreate(flags, 0, 0, \
463 &name##_decode_ops, \
464 encodemap_##name, \
465 decodemap_##name); \
466} \
467 \
468const codec_class name##_class = \
469 { #name, name##_encoder, name##_decoder };
470
471CODECS(CLASS)
472
473/*----- Compatibility veneers ---------------------------------------------*/
474
475#define COMPAT(ctxn, wd, acc) \
476 \
477void ctxn##_encode(ctxn##_ctx *ctx, const void *p, size_t sz, dstr *d) \
478 { ctxn##_doencode(ctx, encodemap_##ctxn, p, sz, d); } \
479 \
480void ctxn##_decode(ctxn##_ctx *ctx, const void *p, size_t sz, dstr *d) \
481 { ctxn##_dodecode(ctx, decodemap_##ctxn, p, sz, d); } \
482 \
483void ctxn##_init(ctxn##_ctx *ctx) \
484{ \
485 ctx->acc = 0; \
486 ctx->qsz = (ctxn##_OLDFLAGS & ctxn##_FLAGMASK) ^ ctxn##_FLAGXOR; \
487 ctx->lnlen = 0; \
488 ctx->indent = "\n"; \
489 ctx->maxline = 72; \
490}
491
492CTXS(COMPAT)
493
494/*----- That's all, folks -------------------------------------------------*/