From 5b708e0cac7e269355f5b38480ad0256e5831a0d Mon Sep 17 00:00:00 2001 Message-Id: <5b708e0cac7e269355f5b38480ad0256e5831a0d.1714080893.git.mdw@distorted.org.uk> From: Mark Wooding Date: Sun, 4 May 2008 21:22:11 +0100 Subject: [PATCH] Shift many cgi_ functions into lib/, and add a test for them. The multipart/form-data code is not yet exercised, and the CGI program does not use the new code. Organization: Straylight/Edgeware From: Richard Kettlewell --- .bzrignore | 1 + configure.ac | 2 +- lib/Makefile.am | 7 +- lib/cgi.c | 343 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/cgi.h | 46 +++++++ lib/t-cgi.c | 102 ++++++++++++++ server/cgi.h | 6 +- 7 files changed, 502 insertions(+), 5 deletions(-) create mode 100644 lib/cgi.c create mode 100644 lib/cgi.h create mode 100644 lib/t-cgi.c diff --git a/.bzrignore b/.bzrignore index 8b39f2e..ae0a2c9 100644 --- a/.bzrignore +++ b/.bzrignore @@ -173,3 +173,4 @@ lib/t-vector lib/t-words lib/t-wstat lib/t-macros +lib/t-cgi diff --git a/configure.ac b/configure.ac index 1dabfe5..175add4 100644 --- a/configure.ac +++ b/configure.ac @@ -417,7 +417,7 @@ if test $ac_cv_type_long_long = yes; then AC_DEFINE([DECLARES_ATOLL],[1],[define if declares atoll]) fi fi -AC_CHECK_FUNCS([ioctl nl_langinfo strsignal],[:],[ +AC_CHECK_FUNCS([ioctl nl_langinfo strsignal setenv unsetenv],[:],[ missing_functions="$missing_functions $ac_func" ]) # fsync will do if fdatasync not available diff --git a/lib/Makefile.am b/lib/Makefile.am index eae7288..9c9826f 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -22,7 +22,7 @@ TESTS=t-addr t-basen t-bits t-cache t-casefold t-cookies \ t-filepart t-hash t-heap t-hex t-kvp t-mime t-printf \ t-regsub t-selection t-signame t-sink t-split t-syscalls \ t-trackname t-unicode t-url t-utf8 t-vector t-words t-wstat \ - t-macros + t-macros t-cgi noinst_LIBRARIES=libdisorder.a include_HEADERS=disorder.h @@ -42,6 +42,7 @@ libdisorder_a_SOURCES=charset.c charset.h \ base64.c base64.h \ bits.c bits.h \ cache.c cache.h \ + cgi.c cgi.h \ client.c client.h \ client-common.c client-common.h \ configuration.c configuration.h \ @@ -142,6 +143,10 @@ t_casefold_SOURCES=t-casefold.c test.c test.h t_casefold_LDADD=libdisorder.a $(LIBPCRE) $(LIBICONV) $(LIBGC) t_casefold_DEPENDENCIES=libdisorder.a +t_cgi_SOURCES=t-cgi.c test.c test.h +t_cgi_LDADD=libdisorder.a $(LIBPCRE) $(LIBICONV) $(LIBGC) +t_cgi_DEPENDENCIES=libdisorder.a + t_cookies_SOURCES=t-cookies.c test.c test.h t_cookies_LDADD=libdisorder.a $(LIBPCRE) $(LIBICONV) $(LIBGC) t_cookies_DEPENDENCIES=libdisorder.a diff --git a/lib/cgi.c b/lib/cgi.c new file mode 100644 index 0000000..9b4762f --- /dev/null +++ b/lib/cgi.c @@ -0,0 +1,343 @@ +/* + * This file is part of DisOrder. + * Copyright (C) 2004, 2005, 2007, 2008 Richard Kettlewell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + */ +/** @file lib/cgi.c + * @brief CGI tools + */ + +#include +#include "types.h" + +#include +#include +#include +#include +#include +#include + +#include "cgi.h" +#include "mem.h" +#include "log.h" +#include "vector.h" +#include "hash.h" +#include "kvp.h" +#include "mime.h" +#include "unicode.h" +#include "sink.h" + +/** @brief Hash of arguments */ +static hash *cgi_args; + +/** @brief Get CGI arguments from a GET request's query string */ +static struct kvp *cgi__init_get(void) { + const char *q; + + if((q = getenv("QUERY_STRING"))) + return kvp_urldecode(q, strlen(q)); + error(0, "QUERY_STRING not set, assuming empty"); + return NULL; +} + +/** @brief Read the HTTP request body */ +static void cgi__input(char **ptrp, size_t *np) { + const char *cl; + char *q; + size_t n, m = 0; + int r; + + if(!(cl = getenv("CONTENT_LENGTH"))) + fatal(0, "CONTENT_LENGTH not set"); + n = atol(cl); + /* We check for overflow and also limit the input to 16MB. Lower + * would probably do. */ + if(!(n+1) || n > 16 * 1024 * 1024) + fatal(0, "input is much too large"); + q = xmalloc_noptr(n + 1); + while(m < n) { + r = read(0, q + m, n - m); + if(r > 0) + m += r; + else if(r == 0) + fatal(0, "unexpected end of file reading request body"); + else switch(errno) { + case EINTR: break; + default: fatal(errno, "error reading request body"); + } + } + if(memchr(q, 0, n)) + fatal(0, "null character in request body"); + q[n + 1] = 0; + *ptrp = q; + if(np) + *np = n; +} + +/** @brief Called for each part header field (see cgi__part_callback()) */ +static int cgi__field_callback(const char *name, const char *value, + void *u) { + char *disposition, *pname, *pvalue; + char **namep = u; + + if(!strcmp(name, "content-disposition")) { + if(mime_rfc2388_content_disposition(value, + &disposition, + &pname, + &pvalue)) + fatal(0, "error parsing Content-Disposition field"); + if(!strcmp(disposition, "form-data") + && pname + && !strcmp(pname, "name")) { + if(*namep) + fatal(0, "duplicate Content-Disposition field"); + *namep = pvalue; + } + } + return 0; +} + +/** @brief Called for each part (see cgi__init_multipart()) */ +static int cgi__part_callback(const char *s, + void *u) { + char *name = 0; + struct kvp *k, **head = u; + + if(!(s = mime_parse(s, cgi__field_callback, &name))) + fatal(0, "error parsing part header"); + if(!name) + fatal(0, "no name found"); + k = xmalloc(sizeof *k); + k->next = *head; + k->name = name; + k->value = s; + *head = k; + return 0; +} + +/** @brief Initialize CGI arguments from a multipart/form-data request body */ +static struct kvp *cgi__init_multipart(const char *boundary) { + char *q; + struct kvp *head = 0; + + cgi__input(&q, 0); + if(mime_multipart(q, cgi__part_callback, boundary, &head)) + fatal(0, "invalid multipart object"); + return head; +} + +/** @brief Initialize CGI arguments from a POST request */ +static struct kvp *cgi__init_post(void) { + const char *ct, *boundary; + char *q, *type; + size_t n; + struct kvp *k; + + if(!(ct = getenv("CONTENT_TYPE"))) + ct = "application/x-www-form-urlencoded"; + if(mime_content_type(ct, &type, &k)) + fatal(0, "invalid content type '%s'", ct); + if(!strcmp(type, "application/x-www-form-urlencoded")) { + cgi__input(&q, &n); + return kvp_urldecode(q, n); + } + if(!strcmp(type, "multipart/form-data")) { + if(!(boundary = kvp_get(k, "boundary"))) + fatal(0, "no boundary parameter found"); + return cgi__init_multipart(boundary); + } + fatal(0, "unrecognized content type '%s'", type); +} + +/** @brief Initialize CGI arguments + * + * Must be called before other cgi_ functions are used. + * + * This function can be called more than once, in which case it + * revisits the environment and (perhaps) standard input. This is + * only intended to be used for testing, actual CGI applications + * should call it exactly once. + */ +void cgi_init(void) { + const char *p; + struct kvp *k; + + cgi_args = hash_new(sizeof (char *)); + if(!(p = getenv("REQUEST_METHOD"))) + error(0, "REQUEST_METHOD not set, assuming GET"); + if(!p || !strcmp(p, "GET")) + k = cgi__init_get(); + else if(!strcmp(p, "POST")) + k = cgi__init_post(); + else + fatal(0, "unknown request method %s", p); + /* Validate the arguments and put them in a hash */ + for(; k; k = k->next) { + if(!utf8_valid(k->name, strlen(k->name)) + || !utf8_valid(k->value, strlen(k->value))) + error(0, "invalid UTF-8 sequence in cgi argument %s", k->name); + else + hash_add(cgi_args, k->name, &k->value, HASH_INSERT_OR_REPLACE); + /* We just drop bogus arguments. */ + } +} + +/** @brief Get a CGI argument by name + * + * cgi_init() must be called first. Names and values are all valid + * UTF-8 strings (and this is enforced at initialization time). + */ +const char *cgi_get(const char *name) { + const char **v = hash_find(cgi_args, name); + + return v ? *v : NULL; +} + +/** @brief Add SGML-style quoting + * @param src String to quote (UTF-8) + * @return Quoted string + * + * Quotes characters for insertion into HTML output. Anything that is + * not a printable ASCII character will be converted to a numeric + * character references, as will '"', '&', '<' and '>' (since those + * have special meanings). + * + * Quoting everything down to ASCII means we don't care what the + * content encoding really is (as long as it's not anything insane + * like EBCDIC). + */ +char *cgi_sgmlquote(const char *src) { + uint32_t *ucs, c; + int n; + struct dynstr d[1]; + struct sink *s; + + if(!(ucs = utf8_to_utf32(src, strlen(src), 0))) + exit(1); + dynstr_init(d); + s = sink_dynstr(d); + n = 1; + /* format the string */ + while((c = *ucs++)) { + switch(c) { + default: + if(c > 126 || c < 32) { + case '"': + case '&': + case '<': + case '>': + /* For simplicity we always use numeric character references + * even if a named reference is available. */ + sink_printf(s, "&#%"PRIu32";", c); + break; + } else + sink_writec(s, (char)c); + } + } + dynstr_terminate(d); + return d->vec; +} + +/** @brief Write a CGI attribute + * @param output Where to send output + * @param name Attribute name + * @param value Attribute value + */ +void cgi_attr(struct sink *output, const char *name, const char *value) { + /* Try to avoid needless quoting */ + if(!value[strspn(value, "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789")]) + sink_printf(output, "%s=%s", name, value); + else + sink_printf(output, "%s=\"%s\"", name, cgi_sgmlquote(value)); +} + +/** @brief Write an open tag + * @param output Where to send output + * @param name Element name + * @param ... Attribute name/value pairs + * + * The name/value pair list is terminated by a single (char *)0. + */ +void cgi_opentag(struct sink *output, const char *name, ...) { + va_list ap; + const char *n, *v; + + sink_printf(output, "<%s", name); + va_start(ap, name); + while((n = va_arg(ap, const char *))) { + sink_printf(output, " "); + v = va_arg(ap, const char *); + if(v) + cgi_attr(output, n, v); + else + sink_printf(output, n); + } + va_end(ap); + sink_printf(output, ">"); +} + +/** @brief Write a close tag + * @param output Where to send output + * @param name Element name + */ +void cgi_closetag(struct sink *output, const char *name) { + sink_printf(output, "", name); +} + +/** @brief Construct a URL + * @param url Base URL + * @param ... Name/value pairs for constructed query string + * @return Constructed URL + * + * The name/value pair list is terminated by a single (char *)0. + */ +char *cgi_makeurl(const char *url, ...) { + va_list ap; + struct kvp *kvp, *k, **kk = &kvp; + struct dynstr d; + const char *n, *v; + + dynstr_init(&d); + dynstr_append_string(&d, url); + va_start(ap, url); + while((n = va_arg(ap, const char *))) { + v = va_arg(ap, const char *); + *kk = k = xmalloc(sizeof *k); + kk = &k->next; + k->name = n; + k->value = v; + } + va_end(ap); + *kk = 0; + if(kvp) { + dynstr_append(&d, '?'); + dynstr_append_string(&d, kvp_urlencode(kvp, 0)); + } + dynstr_terminate(&d); + return d.vec; +} + +/* +Local Variables: +c-basic-offset:2 +comment-column:40 +fill-column:79 +indent-tabs-mode:nil +End: +*/ diff --git a/lib/cgi.h b/lib/cgi.h new file mode 100644 index 0000000..1ad4167 --- /dev/null +++ b/lib/cgi.h @@ -0,0 +1,46 @@ +/* + * This file is part of DisOrder. + * Copyright (C) 2004, 2005, 2007, 2008 Richard Kettlewell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + */ +/** @file lib/cgi.h + * @brief CGI tools + */ + +#ifndef CGI_H +#define CGI_H + +struct sink; + +void cgi_init(void); +const char *cgi_get(const char *name); +char *cgi_sgmlquote(const char *src); +void cgi_attr(struct sink *output, const char *name, const char *value); +void cgi_opentag(struct sink *output, const char *name, ...); +void cgi_closetag(struct sink *output, const char *name); +char *cgi_makeurl(const char *url, ...); + +#endif + +/* +Local Variables: +c-basic-offset:2 +comment-column:40 +fill-column:79 +indent-tabs-mode:nil +End: +*/ diff --git a/lib/t-cgi.c b/lib/t-cgi.c new file mode 100644 index 0000000..a055c67 --- /dev/null +++ b/lib/t-cgi.c @@ -0,0 +1,102 @@ +/* + * This file is part of DisOrder. + * Copyright (C) 2008 Richard Kettlewell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + */ +#include "test.h" +#include "cgi.h" + +static void input_from(const char *s) { + FILE *fp = tmpfile(); + char buffer[64]; + + if(fputs(s, fp) < 0 || fflush(fp) < 0) + fatal(errno, "writing to temporary file"); + rewind(fp); + xdup2(fileno(fp), 0); + lseek(0, 0/*offset*/, SEEK_SET); + snprintf(buffer, sizeof buffer, "%zu", strlen(s)); + setenv("CONTENT_LENGTH", buffer, 1); +} + +static void test_cgi(void) { + struct dynstr d[1]; + + setenv("REQUEST_METHOD", "GET", 1); + setenv("QUERY_STRING", "foo=bar&a=b+c&c=x%7ey", 1); + cgi_init(); + check_string(cgi_get("foo"), "bar"); + check_string(cgi_get("a"), "b c"); + check_string(cgi_get("c"), "x~y"); + + setenv("REQUEST_METHOD", "POST", 1); + unsetenv("QUERY_STRING"); + input_from("foo=xbar&a=xb+c&c=xx%7ey"); + cgi_init(); + check_string(cgi_get("foo"), "xbar"); + check_string(cgi_get("a"), "xb c"); + check_string(cgi_get("c"), "xx~y"); + + /* TODO multipart/form-data */ + + check_string(cgi_sgmlquote("foobar"), "foobar"); + check_string(cgi_sgmlquote(""), "<wibble>"); + check_string(cgi_sgmlquote("\"&\""), ""&""); + check_string(cgi_sgmlquote("\xC2\xA3"), "£"); + + dynstr_init(d); + cgi_opentag(sink_dynstr(d), "element", + "foo", "bar", + "foo", "has space", + "foo", "has \"quotes\"", + (char *)NULL); + dynstr_terminate(d); + check_string(d->vec, ""); + + dynstr_init(d); + cgi_opentag(sink_dynstr(d), "element", + "foo", (char *)NULL, + (char *)NULL); + dynstr_terminate(d); + check_string(d->vec, ""); + + dynstr_init(d); + cgi_closetag(sink_dynstr(d), "element"); + dynstr_terminate(d); + check_string(d->vec, ""); + + check_string(cgi_makeurl("http://example.com/", (char *)NULL), + "http://example.com/"); + check_string(cgi_makeurl("http://example.com/", + "foo", "bar", + "a", "b c", + "d", "f=g+h", + (char *)NULL), + "http://example.com/?foo=bar&a=b%20c&d=f%3dg%2bh"); + +} + +TEST(cgi); + +/* +Local Variables: +c-basic-offset:2 +comment-column:40 +fill-column:79 +indent-tabs-mode:nil +End: +*/ diff --git a/server/cgi.h b/server/cgi.h index 8a76d31..9fe27be 100644 --- a/server/cgi.h +++ b/server/cgi.h @@ -18,8 +18,8 @@ * USA */ -#ifndef CGI_H -#define CGI_H +#ifndef SERVER_CGI_H +#define SERVER_CGI_H extern struct kvp *cgi_args; @@ -104,7 +104,7 @@ const char *cgi_transform(const char *type, void cgi_set_option(const char *name, const char *value); /* set an option */ -#endif /* CGI_H */ +#endif /* SERVER_CGI_H */ /* Local Variables: -- [mdw]