chiark / gitweb /
PEP8 fix E225 missing whitespace around operator
[fdroidserver.git] / fdroidserver / scanner.py
index 745010f3dc9d4718b96c19661eb95fb90ca2e5ad..e41e557057a8da4d9d5a71f5095504fedeed6819 100644 (file)
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-import sys
 import os
+import re
 import traceback
-from optparse import OptionParser
+from argparse import ArgumentParser
+import logging
+
 import common
-from common import BuildException
-from common import VCSException
+import metadata
+from common import BuildException, VCSException
 
 config = None
 options = None
 
+
+def get_gradle_compile_commands(build):
+    compileCommands = ['compile', 'releaseCompile']
+    if build.gradle and build.gradle != ['yes']:
+        compileCommands += [flavor + 'Compile' for flavor in build.gradle]
+        compileCommands += [flavor + 'ReleaseCompile' for flavor in build.gradle]
+
+    return [re.compile(r'\s*' + c, re.IGNORECASE) for c in compileCommands]
+
+
+# Scan the source code in the given directory (and all subdirectories)
+# and return the number of fatal problems encountered
+def scan_source(build_dir, root_dir, build):
+
+    count = 0
+
+    # Common known non-free blobs (always lower case):
+    usual_suspects = {
+        exp: re.compile(r'.*' + exp, re.IGNORECASE) for exp in [
+            r'flurryagent',
+            r'paypal.*mpl',
+            r'google.*analytics',
+            r'admob.*sdk.*android',
+            r'google.*ad.*view',
+            r'google.*admob',
+            r'google.*play.*services',
+            r'crittercism',
+            r'heyzap',
+            r'jpct.*ae',
+            r'youtube.*android.*player.*api',
+            r'bugsense',
+            r'crashlytics',
+            r'ouya.*sdk',
+            r'libspen23',
+        ]
+    }
+
+    def suspects_found(s):
+        for n, r in usual_suspects.iteritems():
+            if r.match(s):
+                yield n
+
+    gradle_mavenrepo = re.compile(r'maven *{ *(url)? *[\'"]?([^ \'"]*)[\'"]?')
+
+    allowed_repos = [re.compile(r'^https?://' + re.escape(repo) + r'/*') for repo in [
+        'repo1.maven.org/maven2',  # mavenCentral()
+        'jcenter.bintray.com',     # jcenter()
+        'jitpack.io',
+        'repo.maven.apache.org/maven2',
+        'oss.sonatype.org/content/repositories/snapshots',
+        'oss.sonatype.org/content/repositories/releases',
+        'oss.sonatype.org/content/groups/public',
+        'clojars.org/repo',  # Clojure free software libs
+        ]
+    ]
+
+    scanignore = common.getpaths_map(build_dir, build.scanignore)
+    scandelete = common.getpaths_map(build_dir, build.scandelete)
+
+    scanignore_worked = set()
+    scandelete_worked = set()
+
+    def toignore(fd):
+        for k, paths in scanignore.iteritems():
+            for p in paths:
+                if fd.startswith(p):
+                    scanignore_worked.add(k)
+                    return True
+        return False
+
+    def todelete(fd):
+        for k, paths in scandelete.iteritems():
+            for p in paths:
+                if fd.startswith(p):
+                    scandelete_worked.add(k)
+                    return True
+        return False
+
+    def ignoreproblem(what, fd, fp):
+        logging.info('Ignoring %s at %s' % (what, fd))
+        return 0
+
+    def removeproblem(what, fd, fp):
+        logging.info('Removing %s at %s' % (what, fd))
+        os.remove(fp)
+        return 0
+
+    def warnproblem(what, fd):
+        if toignore(fd):
+            return
+        logging.warn('Found %s at %s' % (what, fd))
+
+    def handleproblem(what, fd, fp):
+        if toignore(fd):
+            return ignoreproblem(what, fd, fp)
+        if todelete(fd):
+            return removeproblem(what, fd, fp)
+        logging.error('Found %s at %s' % (what, fd))
+        return 1
+
+    def is_executable(path):
+        return os.path.exists(path) and os.access(path, os.X_OK)
+
+    textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})
+
+    def is_binary(path):
+        d = None
+        with open(path, 'rb') as f:
+            d = f.read(1024)
+        return bool(d.translate(None, textchars))
+
+    # False positives patterns for files that are binary and executable.
+    safe_paths = [re.compile(r) for r in [
+        r".*/drawable[^/]*/.*\.png$",  # png drawables
+        r".*/mipmap[^/]*/.*\.png$",    # png mipmaps
+        ]
+    ]
+
+    def safe_path(path):
+        for sp in safe_paths:
+            if sp.match(path):
+                return True
+        return False
+
+    gradle_compile_commands = get_gradle_compile_commands(build)
+
+    def is_used_by_gradle(line):
+        return any(command.match(line) for command in gradle_compile_commands)
+
+    # Iterate through all files in the source code
+    for r, d, f in os.walk(build_dir, topdown=True):
+
+        # It's topdown, so checking the basename is enough
+        for ignoredir in ('.hg', '.git', '.svn', '.bzr'):
+            if ignoredir in d:
+                d.remove(ignoredir)
+
+        for curfile in f:
+
+            if curfile in ['.DS_Store']:
+                continue
+
+            # Path (relative) to the file
+            fp = os.path.join(r, curfile)
+
+            if os.path.islink(fp):
+                continue
+
+            fd = fp[len(build_dir) + 1:]
+            _, ext = common.get_extension(fd)
+
+            if ext == 'so':
+                count += handleproblem('shared library', fd, fp)
+            elif ext == 'a':
+                count += handleproblem('static library', fd, fp)
+            elif ext == 'class':
+                count += handleproblem('Java compiled class', fd, fp)
+            elif ext == 'apk':
+                removeproblem('APK file', fd, fp)
+
+            elif ext == 'jar':
+                for name in suspects_found(curfile):
+                    count += handleproblem('usual supect \'%s\'' % name, fd, fp)
+                warnproblem('JAR file', fd)
+
+            elif ext == 'java':
+                if not os.path.isfile(fp):
+                    continue
+                for line in file(fp):
+                    if 'DexClassLoader' in line:
+                        count += handleproblem('DexClassLoader', fd, fp)
+                        break
+
+            elif ext == 'gradle':
+                if not os.path.isfile(fp):
+                    continue
+                with open(fp, 'r') as f:
+                    lines = f.readlines()
+                for i, line in enumerate(lines):
+                    if is_used_by_gradle(line):
+                        for name in suspects_found(line):
+                            count += handleproblem('usual supect \'%s\' at line %d' % (name, i + 1), fd, fp)
+                noncomment_lines = [l for l in lines if not common.gradle_comment.match(l)]
+                joined = re.sub(r'[\n\r\s]+', ' ', ' '.join(noncomment_lines))
+                for m in gradle_mavenrepo.finditer(joined):
+                    url = m.group(2)
+                    if not any(r.match(url) for r in allowed_repos):
+                        count += handleproblem('unknown maven repo \'%s\'' % url, fd, fp)
+
+            elif ext in ['', 'bin', 'out', 'exe']:
+                if is_binary(fp):
+                    count += handleproblem('binary', fd, fp)
+
+            elif is_executable(fp):
+                if is_binary(fp) and not safe_path(fd):
+                    warnproblem('possible binary', fd)
+
+    for p in scanignore:
+        if p not in scanignore_worked:
+            logging.error('Unused scanignore path: %s' % p)
+            count += 1
+
+    for p in scandelete:
+        if p not in scandelete_worked:
+            logging.error('Unused scandelete path: %s' % p)
+            count += 1
+
+    return count
+
+
 def main():
 
     global config, options
 
     # Parse command line...
-    parser = OptionParser()
-    parser.add_option("-v", "--verbose", action="store_true", default=False,
-                      help="Spew out even more information than normal")
-    parser.add_option("-p", "--package", default=None,
-                      help="Scan only the specified package")
-    parser.add_option("--nosvn", action="store_true", default=False,
-                      help="Skip svn repositories - for test purposes, because they are too slow.")
-    (options, args) = parser.parse_args()
+    parser = ArgumentParser(usage="%(prog)s [options] [APPID[:VERCODE] [APPID[:VERCODE] ...]]")
+    common.setup_global_opts(parser)
+    parser.add_argument("appid", nargs='*', help="app-id with optional versioncode in the form APPID[:VERCODE]")
+    options = parser.parse_args()
 
     config = common.read_config(options)
 
-    # Get all apps...
-    apps = common.read_metadata()
-
-    # Filter apps according to command-line options
-    if options.package:
-        apps = [app for app in apps if app['id'] == options.package]
-        if len(apps) == 0:
-            print "No such package"
-            sys.exit(1)
+    # Read all app and srclib metadata
+    allapps = metadata.read_metadata()
+    apps = common.read_app_args(options.appid, allapps, True)
 
-    problems = []
+    probcount = 0
 
     build_dir = 'build'
     if not os.path.isdir(build_dir):
-        print "Creating build directory"
+        logging.info("Creating build directory")
         os.makedirs(build_dir)
     srclib_dir = os.path.join(build_dir, 'srclib')
     extlib_dir = os.path.join(build_dir, 'extlib')
 
-    for app in apps:
-
-        skip = False
-        if app['Disabled']:
-            print "Skipping %s: disabled" % app['id']
-            skip = True
-        elif not app['builds']:
-            print "Skipping %s: no builds specified" % app['id']
-            skip = True
-        elif options.nosvn and app['Repo Type'] == 'svn':
-            skip = True
-
-        if not skip:
-
-            print "Processing " + app['id']
-
-            try:
-
-                build_dir = 'build/' + app['id']
-
-                # Set up vcs interface and make sure we have the latest code...
-                vcs = common.getvcs(app['Repo Type'], app['Repo'], build_dir,
-                        config['sdk_path'])
-
-                for thisbuild in app['builds']:
-
-                    if 'disable' in thisbuild:
-                        print ("..skipping version " + thisbuild['version'] + " - " +
-                                thisbuild.get('disable', thisbuild['commit'][1:]))
-                    else:
-                        print "..scanning version " + thisbuild['version']
-
-                        # Prepare the source code...
-                        root_dir, _ = common.prepare_source(vcs, app, thisbuild,
-                                build_dir, srclib_dir, extlib_dir,
-                                config['sdk_path'], config['ndk_path'],
-                                config['javacc_path'], config['mvn3'],
-                                False)
-
-                        # Do the scan...
-                        buildprobs = common.scan_source(build_dir, root_dir, thisbuild)
-                        for problem in buildprobs:
-                            problems.append(problem + 
-                                ' in ' + app['id'] + ' ' + thisbuild['version'])
-
-            except BuildException as be:
-                msg = "Could not scan app %s due to BuildException: %s" % (app['id'], be)
-                problems.append(msg)
-            except VCSException as vcse:
-                msg = "VCS error while scanning app %s: %s" % (app['id'], vcse)
-                problems.append(msg)
-            except Exception:
-                msg = "Could not scan app %s due to unknown error: %s" % (app['id'], traceback.format_exc())
-                problems.append(msg)
-
-    print "Finished:"
-    for problem in problems:
-        print problem
-    print str(len(problems)) + ' problems.'
+    for appid, app in apps.iteritems():
+
+        if app.Disabled:
+            logging.info("Skipping %s: disabled" % appid)
+            continue
+        if not app.builds:
+            logging.info("Skipping %s: no builds specified" % appid)
+            continue
+
+        logging.info("Processing " + appid)
+
+        try:
+
+            if app.RepoType == 'srclib':
+                build_dir = os.path.join('build', 'srclib', app.Repo)
+            else:
+                build_dir = os.path.join('build', appid)
+
+            # Set up vcs interface and make sure we have the latest code...
+            vcs = common.getvcs(app.RepoType, app.Repo, build_dir)
+
+            for build in app.builds:
+
+                if build.disable:
+                    logging.info("...skipping version %s - %s" % (
+                        build.version, build.get('disable', build.commit[1:])))
+                else:
+                    logging.info("...scanning version " + build.version)
+
+                    # Prepare the source code...
+                    root_dir, _ = common.prepare_source(vcs, app, build,
+                                                        build_dir, srclib_dir,
+                                                        extlib_dir, False)
+
+                    # Do the scan...
+                    count = scan_source(build_dir, root_dir, build)
+                    if count > 0:
+                        logging.warn('Scanner found %d problems in %s (%s)' % (
+                            count, appid, build.vercode))
+                        probcount += count
+
+        except BuildException as be:
+            logging.warn("Could not scan app %s due to BuildException: %s" % (
+                appid, be))
+            probcount += 1
+        except VCSException as vcse:
+            logging.warn("VCS error while scanning app %s: %s" % (appid, vcse))
+            probcount += 1
+        except Exception:
+            logging.warn("Could not scan app %s due to unknown error: %s" % (
+                appid, traceback.format_exc()))
+            probcount += 1
+
+    logging.info("Finished:")
+    print("%d problems found" % probcount)
 
 if __name__ == "__main__":
     main()
-