| 1 | # -*- python -*- |
| 2 | # |
| 3 | # Hippotat - Asinine IP Over HTTP program |
| 4 | # hippotatlib/ownsource.py - Automatic source code provision (AGPL compliance) |
| 5 | # |
| 6 | # Copyright 2017 Ian Jackson |
| 7 | # |
| 8 | # This program is free software: you can redistribute it and/or modify |
| 9 | # it under the terms of the GNU Affero General Public License as |
| 10 | # published by the Free Software Foundation, either version 3 of the |
| 11 | # License, or (at your option) any later version, with the "CAF Login |
| 12 | # Exception" as published by Ian Jackson (version 2, or at your option |
| 13 | # any later version) as an Additional Permission. |
| 14 | # |
| 15 | # This program is distributed in the hope that it will be useful, |
| 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | # GNU Affero General Public License for more details. |
| 19 | # |
| 20 | # You should have received a copy of the GNU Affero General Public |
| 21 | # License and the CAF Login Exception along with this program, in the |
| 22 | # file AGPLv3+CAFv2. If not, email Ian Jackson |
| 23 | # <ijackson@chiark.greenend.org.uk>. |
| 24 | |
| 25 | |
| 26 | import os |
| 27 | import sys |
| 28 | import fnmatch |
| 29 | import stat |
| 30 | import subprocess |
| 31 | import tempfile |
| 32 | import shutil |
| 33 | |
| 34 | try: import debian.deb822 |
| 35 | except ImportError: pass |
| 36 | |
| 37 | class SourceShipmentPreparer(): |
| 38 | def __init__(s, destdir): |
| 39 | # caller may modify, and should read after calling generate() |
| 40 | s.output_names = ['srcbomb.tar.gz', 'fullsrcbomb.tar'] |
| 41 | s.output_paths = [None,None] # alternatively caller may read this |
| 42 | # defaults, caller can modify after creation |
| 43 | s.logger = lambda m: print('SourceShipmentPreparer',m) |
| 44 | s.src_filter = s.src_filter_glob |
| 45 | s.src_package_globs = ['!/usr/local/*', '/usr*'] |
| 46 | s.src_filter_globs = ['!/etc/*'] |
| 47 | s.src_likeparent = s.src_likeparent_git |
| 48 | s.src_direxcludes = s.src_direxcludes_git |
| 49 | s.report_from_packages = s.report_from_packages_debian |
| 50 | s.cwd = os.getcwd() |
| 51 | s.find_rune_base = "find -type f -perm -004 \! -path '*/tmp/*'" |
| 52 | s.ignores = ['*~', '*.bak', '*.tmp', '#*#', '__pycache__', |
| 53 | '[0-9][0-9][0-9][0-9]-src.cpio'] |
| 54 | s.rune_shell = ['/bin/bash', '-ec'] |
| 55 | s.show_pathnames = True |
| 56 | s.download_packages = True |
| 57 | s.rune_cpio = r''' |
| 58 | set -o pipefail |
| 59 | ( |
| 60 | %s |
| 61 | # ^ by default, is find ... -print0 |
| 62 | ) | ( |
| 63 | cpio -Hustar -o --quiet -0 -R 1000:1000 || \ |
| 64 | cpio -Hustar -o --quiet -0 |
| 65 | ) |
| 66 | ''' |
| 67 | s.rune_portmanteau = r''' |
| 68 | GZIP=-1 tar zcf - "$@" |
| 69 | ''' |
| 70 | s.rune_portmanteau_uncompressed = r''' |
| 71 | tar cf - "$@" |
| 72 | ''' |
| 73 | s.manifest_name='0000-MANIFEST.txt' |
| 74 | # private |
| 75 | s._destdir = destdir |
| 76 | s._outcounter = 0 |
| 77 | s._manifest = [] |
| 78 | s._dirmap = { } |
| 79 | s._package_files = { } # map filename => infol |
| 80 | s._packages_path = os.path.join(s._destdir, 'packages') |
| 81 | s._package_sources = [] |
| 82 | |
| 83 | def thing_matches_globs(s, thing, globs): |
| 84 | for pat in globs: |
| 85 | negate = pat.startswith('!') |
| 86 | if negate: pat = pat[1:] |
| 87 | if fnmatch.fnmatch(thing, pat): |
| 88 | return not negate |
| 89 | return negate |
| 90 | |
| 91 | def src_filter_glob(s, src): # default s.src_filter |
| 92 | return s.thing_matches_globs(src, s.src_filter_globs) |
| 93 | |
| 94 | def src_direxcludes_git(s, d): |
| 95 | try: |
| 96 | excl = open(os.path.join(d, '.gitignore')) |
| 97 | except FileNotFoundError: |
| 98 | return [] |
| 99 | r = [] |
| 100 | for l in excl: |
| 101 | l.strip |
| 102 | if l.startswith('#'): next |
| 103 | if not len(l): next |
| 104 | r += l |
| 105 | return r |
| 106 | |
| 107 | def src_likeparent_git(s, src): |
| 108 | try: |
| 109 | os.stat(os.path.join(src, '.git/.')) |
| 110 | except FileNotFoundError: |
| 111 | return False |
| 112 | else: |
| 113 | return True |
| 114 | |
| 115 | def src_parentfinder(s, src, infol): # callers may monkey-patch away |
| 116 | for deref in (False,True): |
| 117 | xinfo = [] |
| 118 | |
| 119 | search = src |
| 120 | if deref: |
| 121 | search = os.path.realpath(search) |
| 122 | |
| 123 | def ascend(): |
| 124 | nonlocal search |
| 125 | xinfo.append(os.path.basename(search)) |
| 126 | search = os.path.dirname(search) |
| 127 | |
| 128 | try: |
| 129 | stab = os.lstat(search) |
| 130 | except FileNotFoundError: |
| 131 | return |
| 132 | if stat.S_ISREG(stab.st_mode): |
| 133 | ascend() |
| 134 | |
| 135 | while not os.path.ismount(search): |
| 136 | if s.src_likeparent(search): |
| 137 | xinfo.reverse() |
| 138 | if len(xinfo): infol.append('want=' + os.path.join(*xinfo)) |
| 139 | return search |
| 140 | |
| 141 | ascend() |
| 142 | |
| 143 | # no .git found anywhere |
| 144 | return src |
| 145 | |
| 146 | def path_prenormaliser(s, d, infol): # callers may monkey-patch away |
| 147 | return os.path.join(s.cwd, os.path.abspath(d)) |
| 148 | |
| 149 | def srcdir_find_rune(s, d): |
| 150 | script = s.find_rune_base |
| 151 | ignores = s.ignores + s.output_names + [s.manifest_name] |
| 152 | ignores += s.src_direxcludes(d) |
| 153 | for excl in ignores: |
| 154 | assert("'" not in excl) |
| 155 | script += r" \! -name '%s'" % excl |
| 156 | script += r" \! -path '*/%s/*'" % excl |
| 157 | script += ' -print0' |
| 158 | return script |
| 159 | |
| 160 | def manifest_append(s, name, infol): |
| 161 | s._manifest.append({ 'file':name, 'info':' '.join(infol) }) |
| 162 | |
| 163 | def manifest_append_absentfile(s, name, infol): |
| 164 | s._manifest.append({ 'file_print':name, 'info':' '.join(infol) }) |
| 165 | |
| 166 | def new_output_name(s, nametail, infol): |
| 167 | s._outcounter += 1 |
| 168 | name = '%04d-%s' % (s._outcounter, nametail) |
| 169 | s.manifest_append(name, infol) |
| 170 | return name |
| 171 | |
| 172 | def open_output_fh(s, name, mode): |
| 173 | return open(os.path.join(s._destdir, name), mode) |
| 174 | |
| 175 | def src_dir(s, d, infol): |
| 176 | try: name = s._dirmap[d] |
| 177 | except KeyError: pass |
| 178 | else: |
| 179 | s.manifest_append(name, infol) |
| 180 | return |
| 181 | |
| 182 | if s.show_pathnames: infol.append(d) |
| 183 | find_rune = s.srcdir_find_rune(d) |
| 184 | total_rune = s.rune_cpio % find_rune |
| 185 | |
| 186 | name = s.new_output_name('src.cpio', infol) |
| 187 | s._dirmap[d] = name |
| 188 | fh = s.open_output_fh(name, 'wb') |
| 189 | |
| 190 | s.logger('packing up into %s: %s (because %s)' % |
| 191 | (name, d, ' '.join(infol))) |
| 192 | |
| 193 | subprocess.run(s.rune_shell + [total_rune], |
| 194 | cwd=d, |
| 195 | stdin=subprocess.DEVNULL, |
| 196 | stdout=fh, |
| 197 | restore_signals=True, |
| 198 | check=True) |
| 199 | fh.close() |
| 200 | |
| 201 | def src_indir(s, d, infol): |
| 202 | d = s.path_prenormaliser(d, infol) |
| 203 | if not s.src_filter(d): return |
| 204 | |
| 205 | d = s.src_parentfinder(d, infol) |
| 206 | if d is None: return |
| 207 | s.src_dir(d, infol) |
| 208 | |
| 209 | def report_from_packages_debian(s, files): |
| 210 | dpkg_S_in = tempfile.TemporaryFile(mode='w+') |
| 211 | for (file, infols) in files.items(): |
| 212 | assert('\n' not in file) |
| 213 | dpkg_S_in.write(file) |
| 214 | dpkg_S_in.write('\0') |
| 215 | dpkg_S_in.seek(0) |
| 216 | cmdl = ['xargs','-0r','dpkg','-S','--'] |
| 217 | dpkg_S = subprocess.Popen(cmdl, |
| 218 | cwd='/', |
| 219 | stdin=dpkg_S_in, |
| 220 | stdout=subprocess.PIPE, |
| 221 | stderr=sys.stderr, |
| 222 | close_fds=False) |
| 223 | dpkg_show_in = tempfile.TemporaryFile(mode='w+') |
| 224 | pkginfos = { } |
| 225 | for l in dpkg_S.stdout: |
| 226 | l = l.strip(b'\n').decode('utf-8') |
| 227 | (pkgs, fname) = l.split(': ',1) |
| 228 | pks = pkgs.split(', ') |
| 229 | for pk in pks: |
| 230 | pkginfos.setdefault(pk,{'files':[]})['files'].append(fname) |
| 231 | print(pk, file=dpkg_show_in) |
| 232 | assert(dpkg_S.wait() == 0) |
| 233 | dpkg_show_in.seek(0) |
| 234 | cmdl = ['xargs','-r','dpkg-query', |
| 235 | r'-f${binary:Package}\t${Package}\t${Architecture}\t${Version}\t${source:Package}\t${source:Version}\t${source:Upstream-Version}\n', |
| 236 | '--show','--'] |
| 237 | dpkg_show = subprocess.Popen(cmdl, |
| 238 | cwd='/', |
| 239 | stdin=dpkg_show_in, |
| 240 | stdout=subprocess.PIPE, |
| 241 | stderr=sys.stderr, |
| 242 | close_fds=False) |
| 243 | for l in dpkg_show.stdout: |
| 244 | l = l.strip(b'\n').decode('utf-8') |
| 245 | (pk,p,a,v,sp,sv,suv) = l.split('\t') |
| 246 | pkginfos[pk]['binary'] = p |
| 247 | pkginfos[pk]['arch'] = a |
| 248 | pkginfos[pk]['version'] = v |
| 249 | pkginfos[pk]['source'] = sp |
| 250 | pkginfos[pk]['sourceversion'] = sv |
| 251 | pkginfos[pk]['sourceupstreamversion'] = sv |
| 252 | assert(dpkg_show.wait() == 0) |
| 253 | for pk in sorted(pkginfos.keys()): |
| 254 | pi = pkginfos[pk] |
| 255 | debfname = '%s_%s_%s.deb' % (pi['binary'], pi['version'], pi['arch']) |
| 256 | dscfname = '%s_%s.dsc' % (pi['source'], pi['sourceversion']) |
| 257 | s.manifest_append_absentfile(dscfname, [debfname]) |
| 258 | s.logger('mentioning %s and %s because %s' % |
| 259 | (dscfname, debfname, pi['files'][0])) |
| 260 | for fname in pi['files']: |
| 261 | infol = files[fname] |
| 262 | if s.show_pathnames: infol = infol + ['loaded='+fname] |
| 263 | s.manifest_append_absentfile(' \t' + debfname, infol) |
| 264 | |
| 265 | if s.download_packages: |
| 266 | try: os.mkdir(s._packages_path) |
| 267 | except FileExistsError: pass |
| 268 | |
| 269 | cmdl = ['apt-get','--download-only','source', |
| 270 | '%s=%s' % (pi['source'], pi['sourceversion'])] |
| 271 | subprocess.run(cmdl, |
| 272 | cwd=s._packages_path, |
| 273 | stdin=subprocess.DEVNULL, |
| 274 | stdout=sys.stdout, |
| 275 | stderr=sys.stderr, |
| 276 | restore_signals=True, |
| 277 | check=True) |
| 278 | |
| 279 | s._package_sources.append(dscfname) |
| 280 | dsc = debian.deb822.Dsc(open(s._packages_path + '/' + dscfname)) |
| 281 | for indsc in dsc['Files']: |
| 282 | s._package_sources.append(indsc['name']) |
| 283 | |
| 284 | def thing_ought_packaged(s, fname): |
| 285 | return s.thing_matches_globs(fname, s.src_package_globs) |
| 286 | |
| 287 | def src_file_packaged(s, fname, infol): |
| 288 | s._package_files.setdefault(fname,[]).extend(infol) |
| 289 | |
| 290 | def src_file(s, fname, infol): |
| 291 | def fngens(): |
| 292 | yield (infol, fname) |
| 293 | infol_copy = infol.copy() |
| 294 | yield (infol_copy, s.path_prenormaliser(fname, infol_copy)) |
| 295 | yield (infol, os.path.realpath(fname)) |
| 296 | |
| 297 | for (tinfol, tfname) in fngens(): |
| 298 | if s.thing_ought_packaged(tfname): |
| 299 | s.src_file_packaged(tfname, tinfol) |
| 300 | return |
| 301 | |
| 302 | s.src_indir(fname, infol) |
| 303 | |
| 304 | def src_argv0(s, program, infol): |
| 305 | s.src_file(program, infol) |
| 306 | |
| 307 | def src_syspath(s, fname, infol): |
| 308 | if s.thing_ought_packaged(fname): return |
| 309 | s.src_indir(fname, infol) |
| 310 | |
| 311 | def src_module(s, m, infol): |
| 312 | try: fname = m.__file__ |
| 313 | except AttributeError: return |
| 314 | infol.append('module='+m.__name__) |
| 315 | |
| 316 | if s.thing_ought_packaged(fname): |
| 317 | s.src_file_packaged(fname, infol) |
| 318 | else: |
| 319 | s.src_indir(fname, infol) |
| 320 | |
| 321 | def srcs_allitems(s, dirs=sys.path): |
| 322 | s.logger('allitems') |
| 323 | s.src_argv0(sys.argv[0], ['argv[0]']) |
| 324 | for d in sys.path: |
| 325 | s.src_syspath(d, ['sys.path']) |
| 326 | for m in sys.modules.values(): |
| 327 | s.src_module(m, ['sys.modules']) |
| 328 | s.report_from_packages(s._package_files) |
| 329 | s.logger('allitems done') |
| 330 | |
| 331 | def _mk_portmanteau(s, ix, rune, cwd, files): |
| 332 | output_name = s.output_names[ix] |
| 333 | s.logger('making portmanteau %s' % output_name) |
| 334 | output_path = os.path.join(s._destdir, output_name) |
| 335 | subprocess.run(s.rune_shell + [ rune, 'x' ] + files, |
| 336 | cwd=cwd, |
| 337 | stdin=subprocess.DEVNULL, |
| 338 | stdout=open(output_path, 'wb'), |
| 339 | restore_signals=True, |
| 340 | check=True) |
| 341 | s.output_paths[ix] = output_path |
| 342 | |
| 343 | def mk_inner_portmanteau(s): |
| 344 | outputs = [s.manifest_name] |
| 345 | mfh = s.open_output_fh(s.manifest_name,'w') |
| 346 | for me in s._manifest: |
| 347 | try: fname = me['file'] |
| 348 | except KeyError: fname = me.get('file_print','') |
| 349 | else: outputs.append(fname) |
| 350 | print('%s\t%s' % (fname, me['info']), file=mfh) |
| 351 | mfh.close() |
| 352 | |
| 353 | s._mk_portmanteau(0, s.rune_portmanteau, |
| 354 | s._destdir, outputs) |
| 355 | |
| 356 | def mk_packages_portmanteau(s): |
| 357 | s._mk_portmanteau(1, s.rune_portmanteau_uncompressed, |
| 358 | s._packages_path, s._package_sources) |
| 359 | |
| 360 | def generate(s): |
| 361 | s.srcs_allitems() |
| 362 | s.mk_inner_portmanteau() |
| 363 | s.mk_packages_portmanteau() |
| 364 | s.logger('portmanteau ready in %s %s' % tuple(s.output_paths)) |