From: Mark Wooding Date: Sat, 2 Dec 2017 21:44:38 +0000 (+0000) Subject: libtests: Include the Unicode test files directly. X-Git-Tag: 5.2~40 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/disorder/commitdiff_plain/34fb8c61ac9d00d64a82facbfc16113cc5b6cfd1 libtests: Include the Unicode test files directly. Rather than fetch the files using `wget' at test time, fire up `gzip' to decompress them from local copies. The files compress really rather well, so this is an overall saving in disk space relative to the previous version -- especially since we now share the test files among all build trees rather than having a separate copy in each. On the other hand, they're moderately large things to have in the source distribution, though small compared to the `images/' tree. Of course, the main reason for doing this is to completely eliminate the need for external network connectivity during a build. The copyright notice, at https://www.unicode.org/copyright.html, appears to be compatible with the GPL (which is good, because I think we'd have had a problem using these files even if we didn't distribute them). I've included the copyright notice as COPYING.unicode-tests, in order to comply with requirement (a). Should it be necessary to update the copies of the test files, there's a (slightly hairy) make target `update-unicode-tests' which can be invoked by hand to do this. --- diff --git a/README b/README index 5daf7b7..80e8d17 100644 --- a/README +++ b/README @@ -282,6 +282,8 @@ Portions extracted from MPG321, http://mpg321.sourceforge.net/ Copyright (C) 2000-2001 Robert Leslie Portions Copyright (C) 1997-2006 Free Software Foundation, Inc. Portions Copyright (C) 2000 Red Hat, Inc., Jonathan Blandford +Unicode test files Copyright (C) 1991-2017 Unicode Inc.; see + libtests/COPYING.unicode-tests for details. Binaries may derive extra copyright owners through linkage (binary distributors are expected to do their own legwork) diff --git a/libtests/COPYING.unicode-tests b/libtests/COPYING.unicode-tests new file mode 100644 index 0000000..b456a04 --- /dev/null +++ b/libtests/COPYING.unicode-tests @@ -0,0 +1,38 @@ +[The Unicode test files GraphemeBreakTest.txt, NormalizationTest.txt, +and WordBreakTest.txt, included in this directory, are copyright (c) +1991--2017 Unicode Inc., and subject to the license conditions below, as +published at https://www.unicode.org/copyright.html. These files are +used for testing, but are not required at runtime. In particular, they +are not included in binary packages. -- [mdw]] + +Copyright © 1991-2017 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in http://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that either +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, or +(b) this copyright and permission notice appear in associated +Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. diff --git a/libtests/GraphemeBreakTest.txt.gz b/libtests/GraphemeBreakTest.txt.gz new file mode 100644 index 0000000..128d91f Binary files /dev/null and b/libtests/GraphemeBreakTest.txt.gz differ diff --git a/libtests/Makefile.am b/libtests/Makefile.am index e4a9eb0..83a7f97 100644 --- a/libtests/Makefile.am +++ b/libtests/Makefile.am @@ -56,6 +56,7 @@ t_split_SOURCES=t-split.c test.c test.h t_syscalls_SOURCES=t-syscalls.c test.c test.h t_trackname_SOURCES=t-trackname.c test.c test.h t_unicode_SOURCES=t-unicode.c test.c test.h +t_unicode_CFLAGS=$(AM_CFLAGS) -DSRCDIR=\"$(srcdir)\" t_url_SOURCES=t-url.c test.c test.h t_utf8_SOURCES=t-utf8.c test.c test.h t_vector_SOURCES=t-vector.c test.c test.h @@ -74,9 +75,22 @@ before-check: make-coverage-reports: check cd ../lib && ${GCOV} *.c | ${PYTHON} ../scripts/format-gcov-report --html . *.c -EXTRA_DIST=t-macros-1.tmpl t-macros-2 +UNICODE_TEST_FILES=GraphemeBreakTest NormalizationTest WordBreakTest +UNICODE_TEST_BASE_URL=http://www.unicode.org/Public/6.0.0/ucd/ +GraphemeBreakTest_URL=$(UNICODE_TEST_BASE_URL)/auxiliary/GraphemeBreakTest.txt +NormalizationTest_URL=$(UNICODE_TEST_BASE_URL)/NormalizationTest.txt +WordBreakTest_URL=$(UNICODE_TEST_BASE_URL)/auxiliary/WordBreakTest.txt +update-unicode-tests: + set -e; \ + for t in $(foreach t,$(UNICODE_TEST_FILES),$t:$($t_URL)); do \ + f=$${t%%:*} u=$${t#*:}; \ + echo $$f $$u; \ + rm -f $$f.new $$f.new.gz; wget -O$$f.new $$u; \ + gzip -9cv $$f.new >$$f.new.gz; \ + mv -f $$f.new.gz $(srcdir)/$$f.txt.gz; rm -f $$f.new; \ + done -CLEANFILES=*.gcda *.gcov *.gcno *.c.html index.html +EXTRA_DIST=t-macros-1.tmpl t-macros-2 \ + COPYING.unicode-tests $(addsuffix .txt.gz, $(UNICODE_TEST_FILES)) -DISTCLEANFILES=GraphemeBreakTest.txt NormalizationTest.txt \ - WordBreakTest.txt +CLEANFILES=*.gcda *.gcov *.gcno *.c.html index.html diff --git a/libtests/NormalizationTest.txt.gz b/libtests/NormalizationTest.txt.gz new file mode 100644 index 0000000..6524991 Binary files /dev/null and b/libtests/NormalizationTest.txt.gz differ diff --git a/libtests/WordBreakTest.txt.gz b/libtests/WordBreakTest.txt.gz new file mode 100644 index 0000000..72c0193 Binary files /dev/null and b/libtests/WordBreakTest.txt.gz differ diff --git a/libtests/t-unicode.c b/libtests/t-unicode.c index 2a199e3..6ab20f5 100644 --- a/libtests/t-unicode.c +++ b/libtests/t-unicode.c @@ -17,30 +17,30 @@ */ #include "test.h" +#ifndef SRCDIR +# define SRCDIR "." +#endif + /** @brief Open a Unicode test file */ static FILE *open_unicode_test(const char *path) { - const char *base; FILE *fp; char buffer[1024]; - int w; - if((base = strrchr(path, '/'))) - ++base; - else - base = path; - if(!(fp = fopen(base, "r"))) { - snprintf(buffer, sizeof buffer, - "wget http://www.unicode.org/Public/6.0.0/ucd/%s", path); - if((w = system(buffer))) - disorder_fatal(0, "%s: %s", buffer, wstat(w)); - if(chmod(base, 0444) < 0) - disorder_fatal(errno, "chmod %s", base); - if(!(fp = fopen(base, "r"))) - disorder_fatal(errno, "%s", base); - } + snprintf(buffer, sizeof buffer, "gzip -dc " SRCDIR "/%s.gz", path); + if(!(fp = popen(buffer, "r"))) + disorder_fatal(errno, "decompressing %s", path); return fp; } +/** @brief Close a Unicode test file */ +static void close_unicode_test(const char *path, FILE *fp) +{ + int w; + + if((w = pclose(fp))) + disorder_fatal(0, "decompressing %s: %s", path, wstat(w)); +} + /** @brief Run breaking tests for utf32_grapheme_boundary() etc */ static void breaktest(const char *path, int (*breakfn)(const uint32_t *, size_t, size_t)) { @@ -94,7 +94,7 @@ static void breaktest(const char *path, } xfree(l); } - fclose(fp); + close_unicode_test(path, fp); } /** @brief Tests for @ref lib/unicode.h */ @@ -179,9 +179,9 @@ static void test_unicode(void) { } xfree(l); } - fclose(fp); - breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary); - breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary); + close_unicode_test("NormalizationTest.txt", fp); + breaktest("GraphemeBreakTest.txt", utf32_is_grapheme_boundary); + breaktest("WordBreakTest.txt", utf32_is_word_boundary); insist(utf32_combining_class(0x40000) == 0); insist(utf32_combining_class(0xE0000) == 0); }