From: Hans-Christoph Steiner Date: Wed, 29 Nov 2017 11:40:04 +0000 (+0100) Subject: mirror: new command to make a mirror of a repo X-Git-Tag: 1.0.0~62^2~4 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=commitdiff_plain;h=2162703a1ae8c7d9348362b5471d254fd53c998a;p=fdroidserver.git mirror: new command to make a mirror of a repo This creates a mirror of a full repo by downloading all files listed in the index, and the ones that are generated based on that data, e.g. icons of different resolutions. This could be useful for setting up mirrors of small repositories, instead of having to learn and manage rsync or something else for mirroring. This just needs a working repo. It uses wget in a batch mode with the aim as being as efficient as possible. wget mirroring over HTTP is always going to be less efficient than rsync, but it shouldn't be so bad since it uses --continue to check whether it has already downloaded a file. I suppose it could be extended to use ETags for a little more efficiency. I developed this creating a test mirror of f-droid.org, which is now a bit ironic, since I added a specific check to prevent people from using this on f-droid.org. --- diff --git a/completion/bash-completion b/completion/bash-completion index 8bdd333f..d18edd35 100644 --- a/completion/bash-completion +++ b/completion/bash-completion @@ -264,6 +264,12 @@ __complete_btlog() { __complete_options } +__complete_mirror() { + opts="-v" + lopts="--archive --output-dir" + __complete_options +} + __complete_nightly() { opts="-v -q" lopts="--show-secret-var" @@ -316,6 +322,7 @@ import \ init \ install \ lint \ +mirror \ nightly \ publish \ readmeta \ diff --git a/fdroid b/fdroid index a07a4ecf..f5e6c92b 100755 --- a/fdroid +++ b/fdroid @@ -48,6 +48,7 @@ commands = OrderedDict([ ("btlog", _("Update the binary transparency log for a URL")), ("signatures", _("Extract signatures from APKs")), ("nightly", _("Set up an app build for a nightly build repo")), + ("mirror", _("Download complete mirrors of small repos")), ]) diff --git a/fdroidserver/mirror.py b/fdroidserver/mirror.py new file mode 100644 index 00000000..3578f310 --- /dev/null +++ b/fdroidserver/mirror.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +import io +import ipaddress +import json +import logging +import os +import socket +import subprocess +import sys +import zipfile +from argparse import ArgumentParser +from urllib.parse import urlparse + +from . import _ +from . import common +from . import net +from . import update + +options = None + + +def main(): + global options + + parser = ArgumentParser(usage="%(prog)s [options] url") + common.setup_global_opts(parser) + parser.add_argument("url", nargs='?', help=_("Base URL to mirror")) + parser.add_argument("--archive", action='store_true', default=False, + help=_("Also mirror the full archive section")) + parser.add_argument("--output-dir", default=os.getcwd(), + help=_("The directory to write the mirror to")) + options = parser.parse_args() + + baseurl = options.url + basedir = options.output_dir + + url = urlparse(baseurl) + hostname = url.netloc + ip = None + try: + ip = ipaddress.ip_address(hostname) + except ValueError: + pass + if hostname == 'f-droid.org' \ + or (ip is not None and hostname in socket.gethostbyname_ex('f-droid.org')[2]): + print(_('ERROR: this command should never be used to mirror f-droid.org!\n' + 'A full mirror of f-droid.org requires more than 200GB.')) + sys.exit(1) + + path = url.path.rstrip('/') + if path.endswith('repo') or path.endswith('archive'): + logging.error(_('Do not include "{path}" in URL!').format(path=path.split('/')[-1])) + sys.exit(1) + elif not path.endswith('fdroid'): + logging.warning(_('{url} does not end with "fdroid", check the URL path!') + .format(url=baseurl)) + + icondirs = ['icons', ] + for density in update.screen_densities: + icondirs.append('icons-' + density) + + if options.archive: + sections = ('repo', 'archive') + else: + sections = ('repo', ) + + for section in sections: + sectionurl = baseurl + '/' + section + sectiondir = os.path.join(basedir, section) + repourl = sectionurl + '/index-v1.jar' + + content, etag = net.http_get(repourl) + with zipfile.ZipFile(io.BytesIO(content)) as zip: + jsoncontents = zip.open('index-v1.json').read() + + os.makedirs(sectiondir, exist_ok=True) + os.chdir(sectiondir) + for icondir in icondirs: + os.makedirs(os.path.join(sectiondir, icondir), exist_ok=True) + + data = json.loads(jsoncontents.decode('utf-8')) + urls = '' + for packageName, packageList in data['packages'].items(): + for package in packageList: + to_fetch = [] + for k in ('apkName', 'srcname'): + if k in package: + to_fetch.append(package[k]) + elif k == 'apkName': + logging.error(_('{appid} is missing {name}') + .format(appid=package['packageName'], name=k)) + for f in to_fetch: + if not os.path.exists(f) \ + or (f.endswith('.apk') and os.path.getsize(f) != package['size']): + url = sectionurl + '/' + f + urls += url + '\n' + urls += url + '.asc\n' + + for app in data['apps']: + localized = app.get('localized') + if localized: + for locale, d in localized.items(): + for k in update.GRAPHIC_NAMES: + f = d.get(k) + if f: + urls += '/'.join((sectionurl, locale, f)) + '\n' + for k in update.SCREENSHOT_DIRS: + filelist = d.get(k) + if filelist: + for f in filelist: + urls += '/'.join((sectionurl, locale, k, f)) + '\n' + + with open('.rsync-input-file', 'w') as fp: + fp.write(urls) + subprocess.call(['wget', '--continue', '--user-agent="fdroid mirror"', + '--input-file=.rsync-input-file']) + os.remove('.rsync-input-file') + + urls = dict() + for app in data['apps']: + if 'icon' not in app: + logging.error(_('no "icon" in {appid}').format(appid=app['packageName'])) + continue + icon = app['icon'] + for icondir in icondirs: + url = sectionurl + '/' + icondir + '/' + icon + if icondir not in urls: + urls[icondir] = '' + urls[icondir] += url + '\n' + + for icondir in icondirs: + os.chdir(os.path.join(basedir, section, icondir)) + with open('.rsync-input-file', 'w') as fp: + fp.write(urls[icondir]) + subprocess.call(['wget', '--continue', '--input-file=.rsync-input-file']) + os.remove('.rsync-input-file') + + +if __name__ == "__main__": + main()