chiark / gitweb /
Merge branch 'error_on_jars' into 'master'
[fdroidserver.git] / fdroidserver / scanner.py
index 15a25219a91ef0b9326e356c0e304276b54b6ba4..0b622bcdbc208fd0b87585112fc11a6e58804469 100644 (file)
@@ -1,5 +1,4 @@
-#!/usr/bin/env python2
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
 #
 # scanner.py - part of the FDroid server tools
 # Copyright (C) 2010-13, Ciaran Gultnieks, ciaran@ciarang.com
@@ -23,205 +22,223 @@ import traceback
 from argparse import ArgumentParser
 import logging
 
-import common
-import metadata
-from common import BuildException, VCSException
+from . import _
+from . import common
+from . import metadata
+from .exception import BuildException, VCSException
 
 config = None
 options = None
 
 
-def init_mime_type():
-    '''
-    There are two incompatible versions of the 'magic' module, one
-    that comes as part of libmagic, which is what Debian includes as
-    python-magic, then another called python-magic that is a separate
-    project that wraps libmagic.  The second is 'magic' on pypi, so
-    both need to be supported.  Then on platforms where libmagic is
-    not easily included, e.g. OSX and Windows, fallback to the
-    built-in 'mimetypes' module so this will work without
-    libmagic. Hence this function with the following hacks:
-    '''
+def get_gradle_compile_commands(build):
+    compileCommands = ['compile', 'releaseCompile']
+    if build.gradle and build.gradle != ['yes']:
+        compileCommands += [flavor + 'Compile' for flavor in build.gradle]
+        compileCommands += [flavor + 'ReleaseCompile' for flavor in build.gradle]
 
-    init_path = ''
-    method = ''
-    ms = None
+    return [re.compile(r'\s*' + c, re.IGNORECASE) for c in compileCommands]
 
-    def mime_from_file(path):
-        try:
-            return magic.from_file(path, mime=True)
-        except UnicodeError:
-            return None
-
-    def mime_file(path):
-        try:
-            return ms.file(path)
-        except UnicodeError:
-            return None
 
-    def mime_guess_type(path):
-        return mimetypes.guess_type(path, strict=False)
-
-    try:
-        import magic
-        try:
-            ms = magic.open(magic.MIME_TYPE)
-            ms.load()
-            magic.from_file(init_path, mime=True)
-            method = 'from_file'
-        except AttributeError:
-            ms.file(init_path)
-            method = 'file'
-    except ImportError:
-        import mimetypes
-        mimetypes.init()
-        method = 'guess_type'
-
-    logging.info("Using magic method " + method)
-    if method == 'from_file':
-        return mime_from_file
-    if method == 'file':
-        return mime_file
-    if method == 'guess_type':
-        return mime_guess_type
-
-    logging.critical("unknown magic method!")
-
-
-# Scan the source code in the given directory (and all subdirectories)
-# and return the number of fatal problems encountered
-def scan_source(build_dir, root_dir, thisbuild):
+def scan_source(build_dir, build):
+    """Scan the source code in the given directory (and all subdirectories)
+    and return the number of fatal problems encountered
+    """
 
     count = 0
 
     # Common known non-free blobs (always lower case):
-    usual_suspects = [
-        re.compile(r'.*flurryagent', re.IGNORECASE),
-        re.compile(r'.*paypal.*mpl', re.IGNORECASE),
-        re.compile(r'.*google.*analytics', re.IGNORECASE),
-        re.compile(r'.*admob.*sdk.*android', re.IGNORECASE),
-        re.compile(r'.*google.*ad.*view', re.IGNORECASE),
-        re.compile(r'.*google.*admob', re.IGNORECASE),
-        re.compile(r'.*google.*play.*services', re.IGNORECASE),
-        re.compile(r'.*crittercism', re.IGNORECASE),
-        re.compile(r'.*heyzap', re.IGNORECASE),
-        re.compile(r'.*jpct.*ae', re.IGNORECASE),
-        re.compile(r'.*youtube.*android.*player.*api', re.IGNORECASE),
-        re.compile(r'.*bugsense', re.IGNORECASE),
-        re.compile(r'.*crashlytics', re.IGNORECASE),
-        re.compile(r'.*ouya.*sdk', re.IGNORECASE),
-        re.compile(r'.*libspen23', re.IGNORECASE),
+    usual_suspects = {
+        exp: re.compile(r'.*' + exp, re.IGNORECASE) for exp in [
+            r'flurryagent',
+            r'paypal.*mpl',
+            r'google.*analytics',
+            r'admob.*sdk.*android',
+            r'google.*ad.*view',
+            r'google.*admob',
+            r'google.*play.*services',
+            r'crittercism',
+            r'heyzap',
+            r'jpct.*ae',
+            r'youtube.*android.*player.*api',
+            r'bugsense',
+            r'crashlytics',
+            r'ouya.*sdk',
+            r'libspen23',
+            r'firebase',
+        ]
+    }
+
+    def suspects_found(s):
+        for n, r in usual_suspects.items():
+            if r.match(s):
+                yield n
+
+    gradle_mavenrepo = re.compile(r'maven *{ *(url)? *[\'"]?([^ \'"]*)[\'"]?')
+
+    allowed_repos = [re.compile(r'^https?://' + re.escape(repo) + r'/*') for repo in [
+        'repo1.maven.org/maven2',  # mavenCentral()
+        'jcenter.bintray.com',     # jcenter()
+        'jitpack.io',
+        'repo.maven.apache.org/maven2',
+        'oss.jfrog.org/artifactory/oss-snapshot-local',
+        'oss.sonatype.org/content/repositories/snapshots',
+        'oss.sonatype.org/content/repositories/releases',
+        'oss.sonatype.org/content/groups/public',
+        'clojars.org/repo',  # Clojure free software libs
+        's3.amazonaws.com/repo.commonsware.com',  # CommonsWare
+        'plugins.gradle.org/m2',  # Gradle plugin repo
+        'maven.google.com',  # Google Maven Repo, https://developer.android.com/studio/build/dependencies.html#google-maven
+        ]
     ]
 
-    scanignore = common.getpaths(build_dir, thisbuild, 'scanignore')
-    scandelete = common.getpaths(build_dir, thisbuild, 'scandelete')
+    scanignore = common.getpaths_map(build_dir, build.scanignore)
+    scandelete = common.getpaths_map(build_dir, build.scandelete)
 
     scanignore_worked = set()
     scandelete_worked = set()
 
-    def toignore(fd):
-        for p in scanignore:
-            if fd.startswith(p):
-                scanignore_worked.add(p)
-                return True
+    def toignore(path_in_build_dir):
+        for k, paths in scanignore.items():
+            for p in paths:
+                if path_in_build_dir.startswith(p):
+                    scanignore_worked.add(k)
+                    return True
         return False
 
-    def todelete(fd):
-        for p in scandelete:
-            if fd.startswith(p):
-                scandelete_worked.add(p)
-                return True
+    def todelete(path_in_build_dir):
+        for k, paths in scandelete.items():
+            for p in paths:
+                if path_in_build_dir.startswith(p):
+                    scandelete_worked.add(k)
+                    return True
         return False
 
-    def ignoreproblem(what, fd, fp):
-        logging.info('Ignoring %s at %s' % (what, fd))
+    def ignoreproblem(what, path_in_build_dir):
+        logging.info('Ignoring %s at %s' % (what, path_in_build_dir))
         return 0
 
-    def removeproblem(what, fd, fp):
-        logging.info('Removing %s at %s' % (what, fd))
-        os.remove(fp)
+    def removeproblem(what, path_in_build_dir, filepath):
+        logging.info('Removing %s at %s' % (what, path_in_build_dir))
+        os.remove(filepath)
         return 0
 
-    def warnproblem(what, fd):
-        logging.warn('Found %s at %s' % (what, fd))
-
-    def handleproblem(what, fd, fp):
-        if toignore(fd):
-            return ignoreproblem(what, fd, fp)
-        if todelete(fd):
-            return removeproblem(what, fd, fp)
-        logging.error('Found %s at %s' % (what, fd))
+    def warnproblem(what, path_in_build_dir):
+        if toignore(path_in_build_dir):
+            return
+        logging.warn('Found %s at %s' % (what, path_in_build_dir))
+
+    def handleproblem(what, path_in_build_dir, filepath):
+        if toignore(path_in_build_dir):
+            return ignoreproblem(what, path_in_build_dir)
+        if todelete(path_in_build_dir):
+            return removeproblem(what, path_in_build_dir, filepath)
+        logging.error('Found %s at %s' % (what, path_in_build_dir))
         return 1
 
-    get_mime_type = init_mime_type()
+    def is_executable(path):
+        return os.path.exists(path) and os.access(path, os.X_OK)
 
-    # Iterate through all files in the source code
-    for r, d, f in os.walk(build_dir, topdown=True):
+    textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})
 
-        # It's topdown, so checking the basename is enough
-        for ignoredir in ('.hg', '.git', '.svn', '.bzr'):
-            if ignoredir in d:
-                d.remove(ignoredir)
-
-        for curfile in f:
-
-            # Path (relative) to the file
-            fp = os.path.join(r, curfile)
-            fd = fp[len(build_dir) + 1:]
+    def is_binary(path):
+        d = None
+        with open(path, 'rb') as f:
+            d = f.read(1024)
+        return bool(d.translate(None, textchars))
 
-            mime = get_mime_type(fp)
-
-            if mime == 'application/x-sharedlib':
-                count += handleproblem('shared library', fd, fp)
-
-            elif mime == 'application/x-archive':
-                count += handleproblem('static library', fd, fp)
+    # False positives patterns for files that are binary and executable.
+    safe_paths = [re.compile(r) for r in [
+        r".*/drawable[^/]*/.*\.png$",  # png drawables
+        r".*/mipmap[^/]*/.*\.png$",    # png mipmaps
+        ]
+    ]
 
-            elif mime == 'application/x-executable' or mime == 'application/x-mach-binary':
-                count += handleproblem('binary executable', fd, fp)
+    def safe_path(path):
+        for sp in safe_paths:
+            if sp.match(path):
+                return True
+        return False
 
-            elif mime == 'application/x-java-applet':
-                count += handleproblem('Java compiled class', fd, fp)
+    gradle_compile_commands = get_gradle_compile_commands(build)
 
-            elif mime in (
-                    'application/jar',
-                    'application/zip',
-                    'application/java-archive',
-                    'application/octet-stream',
-                    'binary', ):
+    def is_used_by_gradle(line):
+        return any(command.match(line) for command in gradle_compile_commands)
 
-                if common.has_extension(fp, 'apk'):
-                    removeproblem('APK file', fd, fp)
+    # Iterate through all files in the source code
+    for root, dirs, files in os.walk(build_dir, topdown=True):
 
-                elif common.has_extension(fp, 'jar'):
+        # It's topdown, so checking the basename is enough
+        for ignoredir in ('.hg', '.git', '.svn', '.bzr'):
+            if ignoredir in dirs:
+                dirs.remove(ignoredir)
 
-                    if any(suspect.match(curfile) for suspect in usual_suspects):
-                        count += handleproblem('usual supect', fd, fp)
-                    else:
-                        warnproblem('JAR file', fd)
+        for curfile in files:
 
-                elif common.has_extension(fp, 'zip'):
-                    warnproblem('ZIP file', fd)
+            if curfile in ['.DS_Store']:
+                continue
 
+            # Path (relative) to the file
+            filepath = os.path.join(root, curfile)
+
+            if os.path.islink(filepath):
+                continue
+
+            path_in_build_dir = os.path.relpath(filepath, build_dir)
+            _, ext = common.get_extension(path_in_build_dir)
+
+            if ext == 'so':
+                count += handleproblem('shared library', path_in_build_dir, filepath)
+            elif ext == 'a':
+                count += handleproblem('static library', path_in_build_dir, filepath)
+            elif ext == 'class':
+                count += handleproblem('Java compiled class', path_in_build_dir, filepath)
+            elif ext == 'apk':
+                removeproblem('APK file', path_in_build_dir, filepath)
+
+            elif ext == 'jar':
+                for name in suspects_found(curfile):
+                    count += handleproblem('usual supect \'%s\'' % name, path_in_build_dir, filepath)
+                if curfile == 'gradle-wrapper.jar':
+                    removeproblem('gradle-wrapper.jar', path_in_build_dir, filepath)
                 else:
-                    warnproblem('unknown compressed or binary file', fd)
+                    count += handleproblem('JAR file', path_in_build_dir, filepath)
 
-            elif common.has_extension(fp, 'java'):
-                if not os.path.isfile(fp):
-                    continue
-                for line in file(fp):
-                    if 'DexClassLoader' in line:
-                        count += handleproblem('DexClassLoader', fd, fp)
-                        break
+            elif ext == 'aar':
+                count += handleproblem('AAR file', path_in_build_dir, filepath)
 
-            elif common.has_extension(fp, 'gradle'):
-                if not os.path.isfile(fp):
+            elif ext == 'java':
+                if not os.path.isfile(filepath):
                     continue
-                for i, line in enumerate(file(fp)):
-                    i = i + 1
-                    if any(suspect.match(line) for suspect in usual_suspects):
-                        count += handleproblem('usual suspect at line %d' % i, fd, fp)
-                        break
+                with open(filepath, 'r', encoding='utf8', errors='replace') as f:
+                    for line in f:
+                        if 'DexClassLoader' in line:
+                            count += handleproblem('DexClassLoader', path_in_build_dir, filepath)
+                            break
+
+            elif ext == 'gradle':
+                if not os.path.isfile(filepath):
+                    continue
+                with open(filepath, 'r', encoding='utf8', errors='replace') as f:
+                    lines = f.readlines()
+                for i, line in enumerate(lines):
+                    if is_used_by_gradle(line):
+                        for name in suspects_found(line):
+                            count += handleproblem('usual supect \'%s\' at line %d' % (name, i + 1), path_in_build_dir, filepath)
+                noncomment_lines = [l for l in lines if not common.gradle_comment.match(l)]
+                joined = re.sub(r'[\n\r\s]+', ' ', ' '.join(noncomment_lines))
+                for m in gradle_mavenrepo.finditer(joined):
+                    url = m.group(2)
+                    if not any(r.match(url) for r in allowed_repos):
+                        count += handleproblem('unknown maven repo \'%s\'' % url, path_in_build_dir, filepath)
+
+            elif ext in ['', 'bin', 'out', 'exe']:
+                if is_binary(filepath):
+                    count += handleproblem('binary', path_in_build_dir, filepath)
+
+            elif is_executable(filepath):
+                if is_binary(filepath) and not safe_path(path_in_build_dir):
+                    warnproblem('possible binary', path_in_build_dir)
 
     for p in scanignore:
         if p not in scanignore_worked:
@@ -233,14 +250,6 @@ def scan_source(build_dir, root_dir, thisbuild):
             logging.error('Unused scandelete path: %s' % p)
             count += 1
 
-    # Presence of a jni directory without buildjni=yes might
-    # indicate a problem (if it's not a problem, explicitly use
-    # buildjni=no to bypass this check)
-    if (os.path.exists(os.path.join(root_dir, 'jni')) and
-            not thisbuild['buildjni']):
-        logging.error('Found jni directory, but buildjni is not enabled. Set it to \'no\' to ignore.')
-        count += 1
-
     return count
 
 
@@ -251,8 +260,10 @@ def main():
     # Parse command line...
     parser = ArgumentParser(usage="%(prog)s [options] [APPID[:VERCODE] [APPID[:VERCODE] ...]]")
     common.setup_global_opts(parser)
-    parser.add_argument("appid", nargs='*', help="app-id with optional versioncode in the form APPID[:VERCODE]")
+    parser.add_argument("appid", nargs='*', help=_("applicationId with optional versionCode in the form APPID[:VERCODE]"))
+    metadata.add_metadata_arguments(parser)
     options = parser.parse_args()
+    metadata.warnings_action = options.W
 
     config = common.read_config(options)
 
@@ -269,42 +280,45 @@ def main():
     srclib_dir = os.path.join(build_dir, 'srclib')
     extlib_dir = os.path.join(build_dir, 'extlib')
 
-    for appid, app in apps.iteritems():
+    for appid, app in apps.items():
 
-        if app['Disabled']:
-            logging.info("Skipping %s: disabled" % appid)
+        if app.Disabled:
+            logging.info(_("Skipping {appid}: disabled").format(appid=appid))
             continue
-        if not app['builds']:
-            logging.info("Skipping %s: no builds specified" % appid)
+        if not app.builds:
+            logging.info(_("Skipping {appid}: no builds specified").format(appid=appid))
             continue
 
-        logging.info("Processing " + appid)
+        logging.info(_("Processing {appid}").format(appid=appid))
 
         try:
 
-            build_dir = 'build/' + appid
+            if app.RepoType == 'srclib':
+                build_dir = os.path.join('build', 'srclib', app.Repo)
+            else:
+                build_dir = os.path.join('build', appid)
 
             # Set up vcs interface and make sure we have the latest code...
-            vcs = common.getvcs(app['Repo Type'], app['Repo'], build_dir)
+            vcs = common.getvcs(app.RepoType, app.Repo, build_dir)
 
-            for thisbuild in app['builds']:
+            for build in app.builds:
 
-                if thisbuild['disable']:
+                if build.disable:
                     logging.info("...skipping version %s - %s" % (
-                        thisbuild['version'], thisbuild.get('disable', thisbuild['commit'][1:])))
+                        build.versionName, build.get('disable', build.commit[1:])))
                 else:
-                    logging.info("...scanning version " + thisbuild['version'])
+                    logging.info("...scanning version " + build.versionName)
 
                     # Prepare the source code...
-                    root_dir, _ = common.prepare_source(vcs, app, thisbuild,
-                                                        build_dir, srclib_dir,
-                                                        extlib_dir, False)
+                    common.prepare_source(vcs, app, build,
+                                          build_dir, srclib_dir,
+                                          extlib_dir, False)
 
                     # Do the scan...
-                    count = scan_source(build_dir, root_dir, thisbuild)
+                    count = scan_source(build_dir, build)
                     if count > 0:
                         logging.warn('Scanner found %d problems in %s (%s)' % (
-                            count, appid, thisbuild['vercode']))
+                            count, appid, build.versionCode))
                         probcount += count
 
         except BuildException as be:
@@ -319,8 +333,9 @@ def main():
                 appid, traceback.format_exc()))
             probcount += 1
 
-    logging.info("Finished:")
-    print "%d app(s) with problems" % probcount
+    logging.info(_("Finished"))
+    print(_("%d problems found") % probcount)
+
 
 if __name__ == "__main__":
     main()