From 7e4708e456b17131f28ae7f1ff7b5c7f86e2f803 Mon Sep 17 00:00:00 2001 Message-Id: <7e4708e456b17131f28ae7f1ff7b5c7f86e2f803.1713895947.git.mdw@distorted.org.uk> From: Mark Wooding Date: Tue, 14 Mar 2006 16:41:41 +0000 Subject: [PATCH] url: Allow various `safe' characters unquoted in URL strings. Organization: Straylight/Edgeware From: Mark Wooding Allow `.', `-', `_' and `/', because they're commonly used in filenames, and it's nice for them to be displayed readably. The `~' character isn't actually safe but we allow it anyway. --- man/url.3 | 21 +++++++++++++++++++++ url.c | 36 +++++++++++++++++++++++++++--------- url.h | 2 ++ 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/man/url.3 b/man/url.3 index 5a96403..919ce93 100644 --- a/man/url.3 +++ b/man/url.3 @@ -57,6 +57,27 @@ string (see .BR dstr (3) for details). .PP +You can set flags in the encoding context's +.B f +member: +.TP +.B URLF_STRICT +Be strict about escaping non-alphanumeric characters. Without this, +potentially unsafe characters such as +.RB ` / ' +and +.RB ` ~ ' +will be left unescaped, which makes encoded filenames (for example) more +readable. +.TP +.B URLF_LAX +Be very lax about non-alphanumeric characters. Everything except +obviously-unsafe characters like +.RB ` & ' +and +.RB ` = ' +are left unescaped. +.PP Decoding a sequence of name/value pairs is performed using the .B url_dec function. It requires as input a diff --git a/url.c b/url.c index d859bfa..f7e11b3 100644 --- a/url.c +++ b/url.c @@ -52,7 +52,8 @@ void url_initenc(url_ectx *ctx) { ctx->f = 0; } /* --- @encode@ --- * * - * Arguments: @dstr *d@ = pointer to output string + * Arguments: @url_ectx *ctx@ = encoding context + * @dstr *d@ = pointer to output string * @const char *p@ = pointer to thing to encode * * Returns: --- @@ -60,7 +61,7 @@ void url_initenc(url_ectx *ctx) { ctx->f = 0; } * Use: Encodes the input string into the output string. */ -static void encode(dstr *d, const char *p) +static void encode(url_ectx *ctx, dstr *d, const char *p) { while (*p) { switch (*p) { @@ -68,11 +69,28 @@ static void encode(dstr *d, const char *p) DPUTC(d, '+'); break; default: - if (isalnum((unsigned char)*p)) - DPUTC(d, *p); - else - dstr_putf(d, "%%%02x", *p); - break; + if ((ctx->f & URLF_LAX) || isalnum((unsigned char)*p)) + goto safe; + else + goto unsafe; + case '/': + case '~': + if (ctx->f & URLF_STRICT) + goto unsafe; + case '-': + case '.': + case '_': + safe: + DPUTC(d, *p); + break; + unsafe: + case '+': + case '%': + case '=': + case '&': + case ';': + dstr_putf(d, "%%%02x", *p); + break; } p++; } @@ -95,9 +113,9 @@ void url_enc(url_ectx *ctx, dstr *d, const char *name, const char *value) { if (ctx->f & URLF_SEP) DPUTC(d, '&'); - encode(d, name); + encode(ctx, d, name); DPUTC(d, '='); - encode(d, value); + encode(ctx, d, value); DPUTZ(d); ctx->f |= URLF_SEP; } diff --git a/url.h b/url.h index c351343..be38bdc 100644 --- a/url.h +++ b/url.h @@ -47,6 +47,8 @@ typedef struct url_ectx { } url_ectx; #define URLF_SEP 1u +#define URLF_STRICT 2u +#define URLF_LAX 4u typedef struct url_dctx { const char *p; -- [mdw]