chiark / gitweb /
ownsource fixes
[hippotat.git] / hippotatlib / ownsource.py
index be7bdcd620f4349e64a7c1a2e40e8ee8cab128dc..218661a5ec006e34aca6358bb2df2988b59df2f7 100644 (file)
@@ -1,7 +1,11 @@
 # Automatic source code provision (AGPL compliance)
 
+import os
 import sys
 import fnmatch
+import stat
+import subprocess
+import tempfile
 
 class SourceShipmentPreparer():
   def __init__(s, destdir):
@@ -9,12 +13,17 @@ class SourceShipmentPreparer():
     s.output_name = 'srcbomb.tar.gz'
     # defaults, caller can modify after creation
     s.src_filter = s.src_filter_glob
-    s.src_filter_globs = ['/usr/local/*', '!/usr*', '!/etc/*']
+    s.src_package_globs = ['!/usr/local/*', '/usr*']
+    s.src_filter_globs = ['!/etc/*']
     s.src_likeparent = s.src_likeparent_git
+    s.report_from_packages = s.report_from_packages_debian
     s.cwd = os.getcwd()
-    s.excludes = ['*~', '*.bak', '*.tmp', '#*#']
+    s.find_rune_base = "find -type f -perm -004 \! -path '*/tmp/*'"
+    s.excludes = ['*~', '*.bak', '*.tmp', '#*#',
+                  '[0-9][0-9][0-9][0-9]-src.cpio']
     s.rune_shell = ['/bin/bash', '-ec']
-    s.rune_cpio = r''''
+    s.show_pathnames = True
+    s.rune_cpio = r'''
             set -o pipefail
            (
             %s
@@ -27,25 +36,30 @@ class SourceShipmentPreparer():
     s.rune_portmanteau = r'''
             outfile=$1; shift
             rm -f "$outfile"
-            GZIP=-9 tar zcf "$outfile" "$@"'
+            GZIP=-1 tar zcf "$outfile" "$@"
     '''
     s.manifest_name='0000-MANIFEST.txt'
     # private
     s._destdir = destdir
-    s._outcounter = 1
+    s._outcounter = 0
     s._manifest = []
+    s._dirmap = { }
+    s._package_files = { } # map filename => infol
 
-  def src_filter_glob(s, src): # default s.src_filter
-    for pat in s.src_filter_globs:
+  def thing_matches_globs(s, thing, globs):
+    for pat in globs:
       negate = pat.startswith('!')
       if negate: pat = pat[1:]
-      if fnmatch.fnmatch(src, pat):
+      if fnmatch.fnmatch(thing, pat):
         return not negate
     return negate
 
+  def src_filter_glob(s, src): # default s.src_filter
+    return s.thing_matches_globs(src, s.src_filter_globs)
+
   def src_likeparent_git(s, src):
     try:
-      stat(os.path.join(d, '.git/.'))
+      os.stat(os.path.join(src, '.git/.'))
     except FileNotFoundError:
       return False
     else:
@@ -60,11 +74,12 @@ class SourceShipmentPreparer():
         search = os.path.realpath(search)
 
       def ascend():
+        nonlocal search
         xinfo.append(os.path.basename(search))
         search = os.path.dirname(search)
 
       try:
-        stab = lstat(search)
+        stab = os.lstat(search)
       except FileNotFoundError:
         return
       if stat.S_ISREG(stab.st_mode):
@@ -73,72 +88,186 @@ class SourceShipmentPreparer():
       while not os.path.ismount(search):
         if s.src_likeparent(search):
           xinfo.reverse()
-          infol.append(os.path.join(*xinfo))
+          if len(xinfo): infol.append('want=' + os.path.join(*xinfo))
           return search
 
         ascend()
 
     # no .git found anywhere
-    return d
+    return src
 
-  def src_prenormaliser(s, d): # callers may monkey-patch away
+  def path_prenormaliser(s, d, infol): # callers may monkey-patch away
     return os.path.join(s.cwd, os.path.abspath(d))
 
-  def src_find_rune(s, d):
-    script = 'find -type f -perm +004'
-    for excl in s.excludes:
+  def srcdir_find_rune(s, d):
+    script = s.find_rune_base
+    for excl in s.excludes + [s.output_name, s.manifest_name]:
       assert("'" not in excl)
       script += r" \! -name '%s'" % excl
     script += ' -print0'
+    return script
+
+  def manifest_append(s, name, infol):
+    s._manifest.append({ 'file':name, 'info':' '.join(infol) })
+
+  def manifest_append_absentfile(s, name, infol):
+    s._manifest.append({ 'file_print':name, 'info':' '.join(infol) })
 
   def new_output_name(s, nametail, infol):
-    name = '%04d-%s' % (s._outcounter++, nametail)
-    s._manifest.append((name, infol.join(' '))
+    s._outcounter += 1
+    name = '%04d-%s' % (s._outcounter, nametail)
+    s.manifest_append(name, infol)
     return name
 
   def open_output_fh(s, name, mode):
     return open(os.path.join(s._destdir, name), mode)
 
-  def new_output_fh(s, nametail, infol):
-    name = new_output_name(s, nametail, infol)
-    return open_output_fh(name, 'wb')
+  def src_dir(s, d, infol):
+    try: name = s._dirmap[d]
+    except KeyError: pass
+    else:
+      s.manifest_append(name, infol)
+      return
 
-  def mk_from_dir(s, d):
-    find_rune = s.src_find_rune(s, d)
+    if s.show_pathnames: infol.append(d)
+    find_rune = s.srcdir_find_rune(d)
     total_rune = s.rune_cpio % find_rune
-    fh = new_output_fh('src.cpio')
+
+    name = s.new_output_name('src.cpio', infol)
+    s._dirmap[d] = name
+    fh = s.open_output_fh(name, 'wb')
+
     subprocess.run(s.rune_shell + [total_rune],
-                   cwd=s._destdir,
+                   cwd=d,
                    stdin=subprocess.DEVNULL,
                    stdout=fh,
-                   restore_signals=True)
+                   restore_signals=True,
+                   check=True)
     fh.close()
 
-  def mk_from_src(s, d, infol):
-    d = s.src_prenormaliser(d, infol)
+  def src_indir(s, d, infol):
+    d = s.path_prenormaliser(d, infol)
     if not s.src_filter(d): return
+
     d = s.src_parentfinder(d, infol)
-    s.mk_from_dir(d, infol)
+    if d is None: return
+    s.src_dir(d, infol)
+
+  def report_from_packages_debian(s, files):
+    dpkg_S_in = tempfile.TemporaryFile(mode='w+')
+    for (file, infols) in files.items():
+      assert('\n' not in file)
+      dpkg_S_in.write(file)
+      dpkg_S_in.write('\0')
+    dpkg_S_in.seek(0)
+    cmdl = ['xargs','-0r','dpkg','-S','--']
+    dpkg_S = subprocess.Popen(cmdl,
+                              cwd='/',
+                              stdin=dpkg_S_in,
+                              stdout=subprocess.PIPE,
+                              stderr=sys.stderr,
+                              close_fds=False)
+    dpkg_show_in = tempfile.TemporaryFile(mode='w+')
+    pkginfos = { }
+    for l in dpkg_S.stdout:
+      l = l.strip(b'\n').decode('utf-8')
+      (pkgs, fname) = l.split(': ',1)
+      pks = pkgs.split(', ')
+      for pk in pks:
+        pkginfos.setdefault(pk,{'files':[]})['files'].append(fname)
+        print(pk, file=dpkg_show_in)
+    assert(dpkg_S.wait() == 0)
+    dpkg_show_in.seek(0)
+    cmdl = ['xargs','-r','dpkg-query',
+            r'-f${binary:Package}\t${Package}\t${Architecture}\t${Version}\t${source:Package}\t${source:Version}\n',
+            '--show','--']
+    dpkg_show = subprocess.Popen(cmdl,
+                                 cwd='/',
+                                 stdin=dpkg_show_in,
+                                 stdout=subprocess.PIPE,
+                                 stderr=sys.stderr,
+                                 close_fds=False)
+    for l in dpkg_show.stdout:
+      l = l.strip(b'\n').decode('utf-8')
+      (pk,p,a,v,sp,sv) = l.split('\t')
+      pkginfos[pk]['binary'] = p
+      pkginfos[pk]['arch'] = a
+      pkginfos[pk]['version'] = v
+      pkginfos[pk]['source'] = sp
+      pkginfos[pk]['sourceversion'] = sv
+    assert(dpkg_show.wait() == 0)
+    for pk in sorted(pkginfos.keys()):
+      pi = pkginfos[pk]
+      debfname = '%s_%s_%s.deb' % (pi['binary'], pi['version'], pi['arch'])
+      dscfname = '%s_%s.dsc' % (pi['source'], pi['sourceversion'])
+      s.manifest_append_absentfile(dscfname, [debfname])
+      for fname in pi['files']:
+        infol = files[fname]
+        if s.show_pathnames: infol = infol + ['loaded='+fname]
+        s.manifest_append_absentfile(' \t' + debfname, infol)
+
+  def thing_ought_packaged(s, fname):
+    return s.thing_matches_globs(fname, s.src_package_globs)
 
-  def mk_from_srcs(s, dirs=sys.path):
-    s.mk_from_src(sys.argv[0], ['argv[0]'])
+  def src_file_packaged(s, fname, infol):
+    s._package_files.setdefault(fname,[]).extend(infol)
+
+  def src_file(s, fname, infol):
+    def fngens():
+      yield (infol, fname)
+      infol_copy = infol.copy()
+      yield (infol_copy, s.path_prenormaliser(fname, infol_copy))
+      yield (infol, os.path.realpath(fname))
+
+    for (tinfol, tfname) in fngens():
+      if s.thing_ought_packaged(tfname):
+        s.src_file_packaged(tfname, tinfol)
+        return
+
+    s.src_indir(fname, infol)
+
+  def src_argv0(s, program, infol):
+    s.src_file(program, infol)
+
+  def src_syspath(s, fname, infol):
+    if s.thing_ought_packaged(fname): return
+    s.src_indir(fname, infol)
+
+  def src_module(s, m, infol):
+    try: fname = m.__file__
+    except AttributeError: return
+    infol.append('module='+m.__name__)
+
+    if s.thing_ought_packaged(fname):
+      s.src_file_packaged(fname, infol)
+    else:
+      s.src_indir(fname, infol)
+
+  def srcs_allitems(s, dirs=sys.path):
+    s.src_argv0(sys.argv[0], ['argv[0]'])
     for d in sys.path:
-      s.mk_from_src(d, ['sys.path'])
+      s.src_syspath(d, ['sys.path'])
+    for m in sys.modules.values():
+      s.src_module(m, ['sys.modules'])
+    s.report_from_packages(s._package_files)
 
-  def mk_portmanteau(s):]
+  def mk_portmanteau(s):
     cmdl = s.rune_shell + [ s.rune_portmanteau, 'x',
                             s.output_name, s.manifest_name ]
-    mfh = open_output_fh(s.manifest_name,'w')
-    for (name, info) in s._manifest:
-      cmdl.append(name)
-      print('%s\t%s\n' % (name,info), file=mfh)
+    mfh = s.open_output_fh(s.manifest_name,'w')
+    for me in s._manifest:
+      try: fname = me['file']
+      except KeyError: fname = me.get('file_print','')
+      else: cmdl.append(fname)
+      print('%s\t%s' % (fname, me['info']), file=mfh)
     mfh.close()
-    subprocess.run(s.rune_shell + cmdl,
-                   cmd=s._destdir,
+    subprocess.run(cmdl,
+                   cwd=s._destdir,
                    stdin=subprocess.DEVNULL,
                    stdout=sys.stderr,
-                   restore_signals=True)
+                   restore_signals=True,
+                   check=True)
 
   def generate(s):
-    s.mk_from_srcdirs()
+    s.srcs_allitems()
     s.mk_portmanteau()