3 * Binary base-conversion encoding and decoding (base64, base32, etc.)
5 * (c) 1997 Straylight/Edgeware
8 /*----- Licensing notice --------------------------------------------------*
10 * This file is part of the mLib utilities library.
12 * mLib is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
17 * mLib is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
28 /*----- Header files ------------------------------------------------------*/
43 /*----- Important tables --------------------------------------------------*/
45 /* --- Magic constants --- */
47 #define NV -1 /* Not valid */
48 #define PC -2 /* Padding character */
49 #define NL -3 /* Newline character */
50 #define SP -4 /* Space character */
55 encodemap_base64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
56 "abcdefghijklmnopqrstuvwxyz"
58 encodemap_file64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
59 "abcdefghijklmnopqrstuvwxyz"
61 encodemap_base64url[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
62 "abcdefghijklmnopqrstuvwxyz"
65 static const signed char decodemap_base64[] = {
66 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
67 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
68 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, 62, NV, NV, NV, 63, /* 2x */
69 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */
70 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
71 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */
72 NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */
73 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */
74 }, decodemap_file64[] = {
75 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
76 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
77 SP, NV, NV, NV, NV, 63, NV, NV, NV, NV, NV, 62, NV, NV, NV, NV, /* 2x */
78 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */
79 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
80 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */
81 NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */
82 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */
83 }, decodemap_base64url[] = {
84 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
85 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
86 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, 62, NV, NV, /* 2x */
87 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NV, NV, NV, PC, NV, NV, /* 3x */
88 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
89 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, 63, /* 5x */
90 NV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ,37, 38, 39, 40, /* 6x */
91 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NV, NV, NV, NV, NV /* 7x */
97 encodemap_base32[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" },
98 encodemap_base32hex[] = { "0123456789ABCDEFGHIJKLMNOPQRSTUV" };
100 static const signed char decodemap_base32[] = {
101 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
102 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
103 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */
104 NV, NV, 26, 27, 28, 29, 30, 31, NV, NV, NV, NV, NV, PC, NV, NV, /* 3x */
105 NV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 4x */
106 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NV, NV, NV, NV, NV, /* 5x */
107 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */
108 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */
109 }, decodemap_base32hex[] = {
110 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
111 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
112 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */
113 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NV, NV, NV, PC, NV, NV, /* 3x */
114 NV, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 4x */
115 25, 26, 27, 28, 29, 30, 31, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 5x */
116 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */
117 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */
123 encodemap_hex[] = { "0123456789ABCDEF" };
125 static const signed char decodemap_hex[] = {
126 NV, NV, NV, NV, NV, NV, NV, NV, NV, SP, NL, NV, SP, NL, NV, NV, /* 0x */
127 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 1x */
128 SP, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 2x */
129 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NV, NV, NV, NV, NV, NV, /* 3x */
130 NV, 10, 11, 12, 13, 14, 15, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 4x */
131 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 5x */
132 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 6x */
133 NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, NV, /* 7x */
136 /*----- Base conversion macros --------------------------------------------*/
138 /* --- @BASECONV@ --- *
140 * Arguments: @x@ = an input digit of width @IWD@ bits
141 * @iwd@ = input digit width in bits
142 * @owd@ = output digit width in bits
143 * @put@ = function or macro to output a digit
145 * Use: Inserts the bits of @x@ into an accumulator. As digits @y@
146 * of with @owd@ become ready, @put(y)@ is invoked to emit them.
149 #define BASECONV(x, iwd, owd, put) do { \
150 a = (a << iwd) | x; \
152 while (nb >= owd) { \
154 put((a >> nb) & ((1 << owd) - 1)); \
158 /* --- @BASECONV_FLUSH@ --- *
160 * Arguments: @iwd@ = input digit width in bits
161 * @owd@ = output digit width in bits
162 * @put@ = function or macro to output a digit
164 * Use: Flushes remaining digits from the base-conversion shift
165 * register. The bits in the shift register are padded on the
166 * right with zeros. Digits of width @owd@ are emitted by
170 #define BASECONV_FLUSH(iwd, owd, put) do { \
172 while (nb < owd) { a <<= iwd; nb += iwd; } \
174 put((a >> nb) & ((1 << owd) - 1)); \
178 /* --- @BASECONV_PAD@ --- *
180 * Arguments: @iwd@ = input digit width in bits
181 * @owd@ = output digit width in bits
182 * @pad@ = function or macro to output padding
184 * Use: Invokes @pad@ sufficiently often to realign the shift
188 #define BASECONV_PAD(iwd, owd, pad) do { \
190 while (nb >= owd) { pad; nb -= owd; } \
196 #define NULL_PAD(iwd, owd, pad) do ; while (0)
198 /*----- Lists of things to make -------------------------------------------*/
201 /* NAME, CTXN, ACC */ \
202 _(base64, base64, acc) \
203 _(file64, base64, acc) \
204 _(base64url, base64, acc) \
205 _(base32, base32, accl) \
206 _(base32hex, base32, accl) \
210 /* CTXN, WD, ACC */ \
215 #define base64_PADDING BASECONV_PAD
216 #define base64_FLAGMASK ~(CDCF_LOWERC | CDCF_IGNCASE)
217 #define base64_FLAGXOR 0
218 #define base64_OLDFLAGS CDCF_IGNJUNK
220 #define base32_PADDING BASECONV_PAD
221 #define base32_FLAGMASK ~0
222 #define base32_FLAGXOR 0
223 #define base32_OLDFLAGS CDCF_IGNJUNK
225 #define hex_PADDING NULL_PAD
226 #define hex_FLAGMASK ~0
227 #define hex_FLAGXOR 0
228 #define hex_OLDFLAGS (CDCF_IGNJUNK | CDCF_LOWERC)
230 /*----- Data structures ---------------------------------------------------*/
232 #define OBJ(ctxn, wd, acc) \
234 typedef struct ctxn##_codec { \
237 const char *encodemap; \
238 const signed char *decodemap; \
243 /*----- State packing -----------------------------------------------------*
245 * These macros convert between the state required by the new encoding and
246 * decoding core and the old externally-visible context structures. It's
247 * unpleasant, I know; maybe we can drop the old interface later.
251 ST_MAIN, /* Main decoding state */
252 ST_PAD, /* Decoding trailing padding */
253 ST_END /* Finished decoding */
256 #define STATE_UNPACK(acc) \
257 unsigned long a = (ctx->acc >> 0) & 0xffff; \
258 unsigned nb = (ctx->acc >> 16) & 0xff; \
259 unsigned st = (ctx->acc >> 24) & 0xff; \
260 unsigned f = ctx->qsz;
262 #define STATE_PACK(acc) do { \
263 ctx->acc = (((a & 0xffff) << 0) | \
264 (((unsigned long)nb & 0xff) << 16) | \
265 (((unsigned long)st & 0xff) << 24)); \
268 /*----- Main encoder and decoder ------------------------------------------*/
270 #define WRAP(stuff) do { \
271 if (maxln && lnlen >= maxln) { \
272 dstr_puts(d, ctx->indent); \
279 #define PUTWRAP(x) WRAP({ \
280 char ch = encodemap[x]; \
281 if (f & CDCF_LOWERC) ch = TOLOWER(ch); \
285 #define PADWRAP WRAP({ DPUTC(d, '='); })
287 #define PUTRAW(x) DPUTC(d, x)
289 #define ENCODER(ctxn, wd, acc) \
291 /* --- @CTXN_doencode@ --- * \
293 * Arguments: @CTXN_ctx *ctx@ = pointer to a context block \
294 * @const char *encodemap@ = pointer to encoding map \
295 * @const unsigned char *p@ = pointer to a source buffer \
296 * @size_t sz@ = size of the source buffer \
297 * @dstr *d@ = pointer to destination string \
299 * Returns: Zero on success, or @CDCERR_@ error code. \
301 * Use: Main encoder function. \
304 static int ctxn##_doencode(ctxn##_ctx *ctx, const char *encodemap, \
305 const unsigned char *p, size_t sz, dstr *d) \
308 const unsigned char *l = p + sz; \
309 unsigned lnlen = ctx->lnlen, maxln = ctx->maxline; \
312 while (p < l) BASECONV(*p++, 8, wd, PUTWRAP); \
314 BASECONV_FLUSH(8, wd, PUTWRAP); \
315 if (!(f & CDCF_NOEQPAD)) ctxn##_PADDING(8, wd, PADWRAP); \
319 ctx->lnlen = lnlen; \
323 /* --- @CTXN_dodecode@ --- * \
325 * Arguments: @CTXN_ctx *ctx@ = pointer to a context block \
326 * @const signed char *decodemap@ = pointer to decode map \
327 * @const char *p@ = pointer to a source buffer \
328 * @size_t sz@ = size of the source buffer \
329 * @dstr *d@ = pointer to destination string \
331 * Returns: Zero on success, or @CDCERR_@ error code. \
333 * Use: Main decoder function. \
336 static int ctxn##_dodecode(ctxn##_ctx *ctx, \
337 const signed char *decodemap, \
338 const unsigned char *p, size_t sz, dstr *d) \
341 const unsigned char *l = p + sz; \
348 switch (f & (CDCF_LOWERC | CDCF_IGNCASE)) { \
352 if (ISUPPER(ch)) goto badch; \
360 if (!(f & CDCF_IGNINVCH)) return (CDCERR_INVCH); \
363 if (f & CDCF_IGNEQMID) break; \
364 if (f & CDCF_NOEQPAD) goto badch; \
365 if (st == ST_MAIN && !(f & CDCF_IGNZPAD) && \
366 ((nb && !(nb%wd)) || (a & ((1 << nb) - 1)))) \
367 return (CDCERR_INVZPAD); \
369 if (!(f & CDCF_IGNEQPAD)) { \
370 if (!nb) return (CDCERR_INVEQPAD); \
376 if (f & CDCF_IGNNEWL) break; \
377 return (CDCERR_INVCH); \
379 if (f & CDCF_IGNSPC) break; \
380 return (CDCERR_INVCH); \
382 if (st != ST_MAIN) return (CDCERR_INVEQPAD); \
383 BASECONV(x, wd, 8, PUTRAW); \
388 if (st == ST_MAIN && !(f & CDCF_IGNZPAD) && \
389 ((nb && !(nb%wd)) || (a & ((1 << nb) - 1)))) \
390 return (CDCERR_INVZPAD); \
391 if (!(f & (CDCF_IGNEQPAD | CDCF_IGNEQMID | CDCF_NOEQPAD)) && nb) \
392 return (CDCERR_INVEQPAD); \
401 /*----- Codec implementation ----------------------------------------------*/
403 #define OPS(ctxn, wd, acc) \
405 static int ctxn##_enc(codec *c, const void *p, size_t sz, dstr *d) \
407 ctxn##_codec *bc = (ctxn##_codec *)c; \
408 return (ctxn##_doencode(&bc->ctx, bc->encodemap, p, sz, d)); \
411 static int ctxn##_dec(codec *c, const void *p, size_t sz, dstr *d) \
413 ctxn##_codec *bc = (ctxn##_codec *)c; \
414 return (ctxn##_dodecode(&bc->ctx, bc->decodemap, p, sz, d)); \
417 static void ctxn##_destroy(codec *c) \
419 ctxn##_codec *bc = (ctxn##_codec *)c; \
420 if (bc->ctx.indent) xfree(UNCONST(char, bc->ctx.indent)); \
424 static codec *ctxn##_docreate(unsigned flags, \
425 const char *indent, unsigned maxline, \
426 const codec_ops *ops, \
427 const char *encodemap, \
428 const signed char *decodemap) \
430 ctxn##_codec *bc = CREATE(ctxn##_codec); \
433 bc->ctx.qsz = (flags & ctxn##_FLAGMASK) ^ ctxn##_FLAGXOR; \
435 bc->ctx.indent = indent ? xstrdup(indent) : 0; \
436 bc->ctx.maxline = maxline; \
437 bc->encodemap = encodemap; \
438 bc->decodemap = decodemap; \
444 #define CLASS(name, ctxn, acc) \
446 static const codec_ops \
447 name##_encode_ops = { &name##_class, ctxn##_enc, ctxn##_destroy }, \
448 name##_decode_ops = { &name##_class, ctxn##_dec, ctxn##_destroy }; \
450 static codec *name##_encoder(unsigned flags, \
451 const char *indent, unsigned maxline) \
453 return ctxn##_docreate(flags, indent, maxline, \
454 &name##_encode_ops, \
459 static codec *name##_decoder(unsigned flags) \
461 return ctxn##_docreate(flags, 0, 0, \
462 &name##_decode_ops, \
467 const codec_class name##_class = \
468 { #name, name##_encoder, name##_decoder };
472 /*----- Compatibility veneers ---------------------------------------------*/
474 #define COMPAT(ctxn, wd, acc) \
476 void ctxn##_encode(ctxn##_ctx *ctx, const void *p, size_t sz, dstr *d) \
477 { ctxn##_doencode(ctx, encodemap_##ctxn, p, sz, d); } \
479 void ctxn##_decode(ctxn##_ctx *ctx, const void *p, size_t sz, dstr *d) \
480 { ctxn##_dodecode(ctx, decodemap_##ctxn, p, sz, d); } \
482 void ctxn##_init(ctxn##_ctx *ctx) \
485 ctx->qsz = (ctxn##_OLDFLAGS & ctxn##_FLAGMASK) ^ ctxn##_FLAGXOR; \
487 ctx->indent = "\n"; \
493 /*----- That's all, folks -------------------------------------------------*/