chiark / gitweb /
ownsource: do not include files more than once in srcbomb
[hippotat.git] / hippotatlib / ownsource.py
index 218661a..8ecc7f2 100644 (file)
@@ -1,4 +1,27 @@
-# Automatic source code provision (AGPL compliance)
+# -*- python -*-
+#
+# Hippotat - Asinine IP Over HTTP program
+# hippotatlib/ownsource.py - Automatic source code provision (AGPL compliance)
+#
+# Copyright 2017 Ian Jackson
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version, with the "CAF Login
+# Exception" as published by Ian Jackson (version 2, or at your option
+# any later version) as an Additional Permission.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public
+# License and the CAF Login Exception along with this program, in the
+# file AGPLv3+CAFv2.  If not, email Ian Jackson
+# <ijackson@chiark.greenend.org.uk>.
+
 
 import os
 import sys
@@ -6,23 +29,31 @@ import fnmatch
 import stat
 import subprocess
 import tempfile
+import shutil
+
+try: import debian.deb822
+except ImportError: pass
 
 class SourceShipmentPreparer():
   def __init__(s, destdir):
     # caller may modify, and should read after calling generate()
-    s.output_name = 'srcbomb.tar.gz'
+    s.output_names = ['srcbomb.tar.gz', 'fullsrcbomb.tar']
+    s.output_paths = [None,None] # alternatively caller may read this
     # defaults, caller can modify after creation
+    s.logger = lambda m: print('SourceShipmentPreparer',m)
     s.src_filter = s.src_filter_glob
     s.src_package_globs = ['!/usr/local/*', '/usr*']
     s.src_filter_globs = ['!/etc/*']
     s.src_likeparent = s.src_likeparent_git
+    s.src_direxcludes = s.src_direxcludes_git
     s.report_from_packages = s.report_from_packages_debian
     s.cwd = os.getcwd()
     s.find_rune_base = "find -type f -perm -004 \! -path '*/tmp/*'"
-    s.excludes = ['*~', '*.bak', '*.tmp', '#*#',
+    s.ignores = ['*~', '*.bak', '*.tmp', '#*#', '__pycache__',
                   '[0-9][0-9][0-9][0-9]-src.cpio']
     s.rune_shell = ['/bin/bash', '-ec']
     s.show_pathnames = True
+    s.download_packages = True
     s.rune_cpio = r'''
             set -o pipefail
            (
@@ -34,9 +65,10 @@ class SourceShipmentPreparer():
             )
     '''
     s.rune_portmanteau = r'''
-            outfile=$1; shift
-            rm -f "$outfile"
-            GZIP=-1 tar zcf "$outfile" "$@"
+            GZIP=-1 tar zcf - "$@"
+    '''
+    s.rune_portmanteau_uncompressed = r'''
+            tar cf - "$@"
     '''
     s.manifest_name='0000-MANIFEST.txt'
     # private
@@ -45,6 +77,8 @@ class SourceShipmentPreparer():
     s._manifest = []
     s._dirmap = { }
     s._package_files = { } # map filename => infol
+    s._packages_path = os.path.join(s._destdir, 'packages')
+    s._package_sources = []
 
   def thing_matches_globs(s, thing, globs):
     for pat in globs:
@@ -57,6 +91,19 @@ class SourceShipmentPreparer():
   def src_filter_glob(s, src): # default s.src_filter
     return s.thing_matches_globs(src, s.src_filter_globs)
 
+  def src_direxcludes_git(s, d):
+    try:
+      excl = open(os.path.join(d, '.gitignore'))
+    except FileNotFoundError:
+      return []
+    r = []
+    for l in excl:
+      l.strip
+      if l.startswith('#'): next
+      if not len(l): next
+      r += l
+    return r
+
   def src_likeparent_git(s, src):
     try:
       os.stat(os.path.join(src, '.git/.'))
@@ -101,9 +148,12 @@ class SourceShipmentPreparer():
 
   def srcdir_find_rune(s, d):
     script = s.find_rune_base
-    for excl in s.excludes + [s.output_name, s.manifest_name]:
+    ignores = s.ignores + s.output_names + [s.manifest_name]
+    ignores += s.src_direxcludes(d)
+    for excl in ignores:
       assert("'" not in excl)
-      script += r" \! -name '%s'" % excl
+      script += r" \! -name '%s'"     % excl
+      script += r" \! -path '*/%s/*'" % excl
     script += ' -print0'
     return script
 
@@ -137,6 +187,9 @@ class SourceShipmentPreparer():
     s._dirmap[d] = name
     fh = s.open_output_fh(name, 'wb')
 
+    s.logger('packing up into %s: %s (because %s)' %
+             (name, d, ' '.join(infol)))
+
     subprocess.run(s.rune_shell + [total_rune],
                    cwd=d,
                    stdin=subprocess.DEVNULL,
@@ -179,7 +232,7 @@ class SourceShipmentPreparer():
     assert(dpkg_S.wait() == 0)
     dpkg_show_in.seek(0)
     cmdl = ['xargs','-r','dpkg-query',
-            r'-f${binary:Package}\t${Package}\t${Architecture}\t${Version}\t${source:Package}\t${source:Version}\n',
+            r'-f${binary:Package}\t${Package}\t${Architecture}\t${Version}\t${source:Package}\t${source:Version}\t${source:Upstream-Version}\n',
             '--show','--']
     dpkg_show = subprocess.Popen(cmdl,
                                  cwd='/',
@@ -189,23 +242,45 @@ class SourceShipmentPreparer():
                                  close_fds=False)
     for l in dpkg_show.stdout:
       l = l.strip(b'\n').decode('utf-8')
-      (pk,p,a,v,sp,sv) = l.split('\t')
+      (pk,p,a,v,sp,sv,suv) = l.split('\t')
       pkginfos[pk]['binary'] = p
       pkginfos[pk]['arch'] = a
       pkginfos[pk]['version'] = v
       pkginfos[pk]['source'] = sp
       pkginfos[pk]['sourceversion'] = sv
+      pkginfos[pk]['sourceupstreamversion'] = sv
     assert(dpkg_show.wait() == 0)
     for pk in sorted(pkginfos.keys()):
       pi = pkginfos[pk]
       debfname = '%s_%s_%s.deb' % (pi['binary'], pi['version'], pi['arch'])
       dscfname = '%s_%s.dsc' % (pi['source'], pi['sourceversion'])
       s.manifest_append_absentfile(dscfname, [debfname])
+      s.logger('mentioning %s and %s because %s' %
+               (dscfname, debfname, pi['files'][0]))
       for fname in pi['files']:
         infol = files[fname]
         if s.show_pathnames: infol = infol + ['loaded='+fname]
         s.manifest_append_absentfile(' \t' + debfname, infol)
 
+      if s.download_packages:
+        try: os.mkdir(s._packages_path)
+        except FileExistsError: pass
+
+        cmdl = ['apt-get','--download-only','source',
+                '%s=%s' % (pi['source'], pi['sourceversion'])]
+        subprocess.run(cmdl,
+                       cwd=s._packages_path,
+                       stdin=subprocess.DEVNULL,
+                       stdout=sys.stdout,
+                       stderr=sys.stderr,
+                       restore_signals=True,
+                       check=True)
+
+        s._package_sources.append(dscfname)
+        dsc = debian.deb822.Dsc(open(s._packages_path + '/' + dscfname))
+        for indsc in dsc['Files']:
+          s._package_sources.append(indsc['name'])
+
   def thing_ought_packaged(s, fname):
     return s.thing_matches_globs(fname, s.src_package_globs)
 
@@ -244,30 +319,51 @@ class SourceShipmentPreparer():
       s.src_indir(fname, infol)
 
   def srcs_allitems(s, dirs=sys.path):
+    s.logger('allitems')
     s.src_argv0(sys.argv[0], ['argv[0]'])
     for d in sys.path:
       s.src_syspath(d, ['sys.path'])
     for m in sys.modules.values():
       s.src_module(m, ['sys.modules'])
     s.report_from_packages(s._package_files)
+    s.logger('allitems done')
+
+  def _mk_portmanteau(s, ix, rune, cwd, files):
+    output_name = s.output_names[ix]
+    s.logger('making portmanteau %s' % output_name)
+    output_path = os.path.join(s._destdir, output_name)
+    subprocess.run(s.rune_shell + [ rune, 'x' ] + files,
+                   cwd=cwd,
+                   stdin=subprocess.DEVNULL,
+                   stdout=open(output_path, 'wb'),
+                   restore_signals=True,
+                   check=True)
+    s.output_paths[ix] = output_path
 
-  def mk_portmanteau(s):
-    cmdl = s.rune_shell + [ s.rune_portmanteau, 'x',
-                            s.output_name, s.manifest_name ]
+  def mk_inner_portmanteau(s):
+    outputs = [s.manifest_name]
+    outputs_done = { }
     mfh = s.open_output_fh(s.manifest_name,'w')
     for me in s._manifest:
       try: fname = me['file']
       except KeyError: fname = me.get('file_print','')
-      else: cmdl.append(fname)
+      else:
+        try: outputs_done[fname]
+        except KeyError:
+          outputs.append(fname)
+          outputs_done[fname] = 1
       print('%s\t%s' % (fname, me['info']), file=mfh)
     mfh.close()
-    subprocess.run(cmdl,
-                   cwd=s._destdir,
-                   stdin=subprocess.DEVNULL,
-                   stdout=sys.stderr,
-                   restore_signals=True,
-                   check=True)
+
+    s._mk_portmanteau(0, s.rune_portmanteau,
+                      s._destdir, outputs)
+
+  def mk_packages_portmanteau(s):
+    s._mk_portmanteau(1, s.rune_portmanteau_uncompressed,
+                      s._packages_path, s._package_sources)
 
   def generate(s):
     s.srcs_allitems()
-    s.mk_portmanteau()
+    s.mk_inner_portmanteau()
+    s.mk_packages_portmanteau()
+    s.logger('portmanteau ready in %s %s' % tuple(s.output_paths))