chiark / gitweb /
libtests: Include the Unicode test files directly.
authorMark Wooding <mdw@distorted.org.uk>
Sat, 2 Dec 2017 21:44:38 +0000 (21:44 +0000)
committerMark Wooding <mdw@distorted.org.uk>
Sat, 2 Dec 2017 22:18:21 +0000 (22:18 +0000)
Rather than fetch the files using `wget' at test time, fire up `gzip' to
decompress them from local copies.  The files compress really rather
well, so this is an overall saving in disk space relative to the
previous version -- especially since we now share the test files among
all build trees rather than having a separate copy in each.  On the
other hand, they're moderately large things to have in the source
distribution, though small compared to the `images/' tree.

Of course, the main reason for doing this is to completely eliminate the
need for external network connectivity during a build.

The copyright notice, at https://www.unicode.org/copyright.html, appears
to be compatible with the GPL (which is good, because I think we'd have
had a problem using these files even if we didn't distribute them).
I've included the copyright notice as COPYING.unicode-tests, in order to
comply with requirement (a).

Should it be necessary to update the copies of the test files, there's a
(slightly hairy) make target `update-unicode-tests' which can be invoked
by hand to do this.

README
libtests/COPYING.unicode-tests [new file with mode: 0644]
libtests/GraphemeBreakTest.txt.gz [new file with mode: 0644]
libtests/Makefile.am
libtests/NormalizationTest.txt.gz [new file with mode: 0644]
libtests/WordBreakTest.txt.gz [new file with mode: 0644]
libtests/t-unicode.c

diff --git a/README b/README
index 5daf7b7..80e8d17 100644 (file)
--- a/README
+++ b/README
@@ -282,6 +282,8 @@ Portions extracted from MPG321, http://mpg321.sourceforge.net/
   Copyright (C) 2000-2001 Robert Leslie
 Portions Copyright (C) 1997-2006 Free Software Foundation, Inc.
 Portions Copyright (C) 2000 Red Hat, Inc., Jonathan Blandford <jrb@redhat.com>
+Unicode test files Copyright (C) 1991-2017 Unicode Inc.; see
+  libtests/COPYING.unicode-tests for details.
 Binaries may derive extra copyright owners through linkage (binary distributors
 are expected to do their own legwork)
 
diff --git a/libtests/COPYING.unicode-tests b/libtests/COPYING.unicode-tests
new file mode 100644 (file)
index 0000000..b456a04
--- /dev/null
@@ -0,0 +1,38 @@
+[The Unicode test files GraphemeBreakTest.txt, NormalizationTest.txt,
+and WordBreakTest.txt, included in this directory, are copyright (c)
+1991--2017 Unicode Inc., and subject to the license conditions below, as
+published at https://www.unicode.org/copyright.html.  These files are
+used for testing, but are not required at runtime.  In particular, they
+are not included in binary packages.  -- [mdw]]
+
+Copyright © 1991-2017 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
diff --git a/libtests/GraphemeBreakTest.txt.gz b/libtests/GraphemeBreakTest.txt.gz
new file mode 100644 (file)
index 0000000..128d91f
Binary files /dev/null and b/libtests/GraphemeBreakTest.txt.gz differ
index e4a9eb0..83a7f97 100644 (file)
@@ -56,6 +56,7 @@ t_split_SOURCES=t-split.c test.c test.h
 t_syscalls_SOURCES=t-syscalls.c test.c test.h
 t_trackname_SOURCES=t-trackname.c test.c test.h
 t_unicode_SOURCES=t-unicode.c test.c test.h
+t_unicode_CFLAGS=$(AM_CFLAGS) -DSRCDIR=\"$(srcdir)\"
 t_url_SOURCES=t-url.c test.c test.h
 t_utf8_SOURCES=t-utf8.c test.c test.h
 t_vector_SOURCES=t-vector.c test.c test.h
@@ -74,9 +75,22 @@ before-check:
 make-coverage-reports: check
        cd ../lib && ${GCOV} *.c | ${PYTHON} ../scripts/format-gcov-report --html . *.c
 
-EXTRA_DIST=t-macros-1.tmpl t-macros-2
+UNICODE_TEST_FILES=GraphemeBreakTest NormalizationTest WordBreakTest
+UNICODE_TEST_BASE_URL=http://www.unicode.org/Public/6.0.0/ucd/
+GraphemeBreakTest_URL=$(UNICODE_TEST_BASE_URL)/auxiliary/GraphemeBreakTest.txt
+NormalizationTest_URL=$(UNICODE_TEST_BASE_URL)/NormalizationTest.txt
+WordBreakTest_URL=$(UNICODE_TEST_BASE_URL)/auxiliary/WordBreakTest.txt
+update-unicode-tests:
+       set -e; \
+       for t in $(foreach t,$(UNICODE_TEST_FILES),$t:$($t_URL)); do \
+         f=$${t%%:*} u=$${t#*:}; \
+         echo $$f $$u; \
+         rm -f $$f.new $$f.new.gz; wget -O$$f.new $$u; \
+         gzip -9cv $$f.new >$$f.new.gz; \
+         mv -f $$f.new.gz $(srcdir)/$$f.txt.gz; rm -f $$f.new; \
+       done
 
-CLEANFILES=*.gcda *.gcov *.gcno *.c.html index.html
+EXTRA_DIST=t-macros-1.tmpl t-macros-2 \
+       COPYING.unicode-tests $(addsuffix .txt.gz, $(UNICODE_TEST_FILES))
 
-DISTCLEANFILES=GraphemeBreakTest.txt NormalizationTest.txt     \
-              WordBreakTest.txt
+CLEANFILES=*.gcda *.gcov *.gcno *.c.html index.html
diff --git a/libtests/NormalizationTest.txt.gz b/libtests/NormalizationTest.txt.gz
new file mode 100644 (file)
index 0000000..6524991
Binary files /dev/null and b/libtests/NormalizationTest.txt.gz differ
diff --git a/libtests/WordBreakTest.txt.gz b/libtests/WordBreakTest.txt.gz
new file mode 100644 (file)
index 0000000..72c0193
Binary files /dev/null and b/libtests/WordBreakTest.txt.gz differ
index 2a199e3..6ab20f5 100644 (file)
  */
 #include "test.h"
 
+#ifndef SRCDIR
+# define SRCDIR "."
+#endif
+
 /** @brief Open a Unicode test file */
 static FILE *open_unicode_test(const char *path) {
-  const char *base;
   FILE *fp;
   char buffer[1024];
-  int w;
 
-  if((base = strrchr(path, '/')))
-    ++base;
-  else
-    base = path;
-  if(!(fp = fopen(base, "r"))) {
-    snprintf(buffer, sizeof buffer,
-             "wget http://www.unicode.org/Public/6.0.0/ucd/%s", path);
-    if((w = system(buffer)))
-      disorder_fatal(0, "%s: %s", buffer, wstat(w));
-    if(chmod(base, 0444) < 0)
-      disorder_fatal(errno, "chmod %s", base);
-    if(!(fp = fopen(base, "r")))
-      disorder_fatal(errno, "%s", base);
-  }
+  snprintf(buffer, sizeof buffer, "gzip -dc " SRCDIR "/%s.gz", path);
+  if(!(fp = popen(buffer, "r")))
+    disorder_fatal(errno, "decompressing %s", path);
   return fp;
 }
 
+/** @brief Close a Unicode test file */
+static void close_unicode_test(const char *path, FILE *fp)
+{
+  int w;
+
+  if((w = pclose(fp)))
+    disorder_fatal(0, "decompressing %s: %s", path, wstat(w));
+}
+
 /** @brief Run breaking tests for utf32_grapheme_boundary() etc */
 static void breaktest(const char *path,
                       int (*breakfn)(const uint32_t *, size_t, size_t)) {
@@ -94,7 +94,7 @@ static void breaktest(const char *path,
     }
     xfree(l);
   }
-  fclose(fp);
+  close_unicode_test(path, fp);
 }
 
 /** @brief Tests for @ref lib/unicode.h */
@@ -179,9 +179,9 @@ static void test_unicode(void) {
     }
     xfree(l);
   }
-  fclose(fp);
-  breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary);
-  breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary);
+  close_unicode_test("NormalizationTest.txt", fp);
+  breaktest("GraphemeBreakTest.txt", utf32_is_grapheme_boundary);
+  breaktest("WordBreakTest.txt", utf32_is_word_boundary);
   insist(utf32_combining_class(0x40000) == 0);
   insist(utf32_combining_class(0xE0000) == 0);
 }