chiark / gitweb /
Rewrite scanner logic
authorDaniel Martí <mvdan@mvdan.cc>
Mon, 14 Sep 2015 05:11:53 +0000 (22:11 -0700)
committerDaniel Martí <mvdan@mvdan.cc>
Mon, 14 Sep 2015 05:17:37 +0000 (22:17 -0700)
Initially, the scanner used libmagic which used magic numbers in the file's
content to detect what kind of file it appears to be. Since that library isn't
available on all systems, we added support for two other libraries, mimetypes
amongst them.

The issue with mimetypes is that it only uses the file's extension, not its
actual content. So this ends in variable behaviour depending on what system
you're using fdroidserver on. For example, an executable binary without
extension would be ignored if mimetypes was being used.

We now drop all libraries - mimetypes too as it depends on the system's
mime.types file - and instead check extensions ourselves. On top of that, do
a simple binary content check to find binary executables that don't have an
extension.

The new in-house code without any dependencies doesn't add any new checks, so
no builds should break. The current checks still work:

 % fdroid scanner app.openconnect:1029
[...]
Found executable binary at assets/raw/armeabi/curl
Found executable binary at assets/raw/mips/curl
Found executable binary at assets/raw/x86/curl
Found JAR file at lib/XposedBridgeApi-54.jar
Found JAR file at libs/acra-4.5.0.jar
Found JAR file at libs/openconnect-wrapper.jar
Found JAR file at libs/stoken-wrapper.jar
Found shared library at libs/armeabi/libopenconnect.so
Found shared library at libs/armeabi/libstoken.so
Found shared library at libs/mips/libopenconnect.so
Found shared library at libs/mips/libstoken.so
Found shared library at libs/x86/libopenconnect.so
Found shared library at libs/x86/libstoken.so

.gitlab-ci.yml
docs/fdroid.texi
fdroidserver/common.py
fdroidserver/scanner.py
setup.py

index a66d2d5959070e91c1203161f912e31912a41c3e..df6e202968edf99fa02966c20dedd9b020b96b21 100644 (file)
@@ -4,7 +4,7 @@ before_script:
   - apt-get -q install -y wget tar
   - echo " == Installing packages required by fdroidserver"
   - apt-get -q install -y python
-      python-git python-imaging python-libcloud python-logilab-astng python-magic
+      python-git python-imaging python-libcloud python-logilab-astng
       python-paramiko python-pip python-pyasn1 python-pyasn1-modules
       python-requests python-virtualenv python-yaml
       rsync
index 76988d96f437d0487d9472f1496acdbb2bb33ad8..af58c0bb2ef73da6933ae25417c097005e8a599e 100644 (file)
@@ -122,8 +122,6 @@ vagrant-cachier plugin (unpackaged): `vagrant plugin install vagrant-cachier`
 Paramiko (debian package python-paramiko)
 @item
 Imaging (debian package python-imaging)
-@item
-Magic (debian package python-magic)
 @end itemize
 
 On the other hand, if you want to build the apps directly on your system
index becd35688c3122644bed69a20beb9773bfb4e134..d93bf73927d223e710a994bf5ef3043238de9600 100644 (file)
@@ -367,10 +367,16 @@ def read_app_args(args, allapps, allow_vercodes=False):
     return apps
 
 
-def has_extension(filename, extension):
-    name, ext = os.path.splitext(filename)
-    ext = ext.lower()[1:]
-    return ext == extension
+def get_extension(filename):
+    _, ext = os.path.splitext(filename)
+    if not ext:
+        return ''
+    return ext.lower()[1:]
+
+
+def has_extension(filename, ext):
+    return ext == get_extension(filename)
+
 
 apk_regex = None
 
index dce0180717eeca8ef8ca649f6dd9efb34384e68e..7f7c6aa9b226a496e80acb436fc5c1efdc76dc08 100644 (file)
@@ -31,63 +31,6 @@ config = None
 options = None
 
 
-def init_mime_type():
-    '''
-    There are two incompatible versions of the 'magic' module, one
-    that comes as part of libmagic, which is what Debian includes as
-    python-magic, then another called python-magic that is a separate
-    project that wraps libmagic.  The second is 'magic' on pypi, so
-    both need to be supported.  Then on platforms where libmagic is
-    not easily included, e.g. OSX and Windows, fallback to the
-    built-in 'mimetypes' module so this will work without
-    libmagic. Hence this function with the following hacks:
-    '''
-
-    init_path = ''
-    method = ''
-    ms = None
-
-    def mime_from_file(path):
-        try:
-            return magic.from_file(path, mime=True)
-        except UnicodeError:
-            return None
-
-    def mime_file(path):
-        try:
-            return ms.file(path)
-        except UnicodeError:
-            return None
-
-    def mime_guess_type(path):
-        return mimetypes.guess_type(path, strict=False)
-
-    try:
-        import magic
-        try:
-            ms = magic.open(magic.MIME_TYPE)
-            ms.load()
-            magic.from_file(init_path, mime=True)
-            method = 'from_file'
-        except AttributeError:
-            ms.file(init_path)
-            method = 'file'
-    except ImportError:
-        import mimetypes
-        mimetypes.init()
-        method = 'guess_type'
-
-    logging.info("Using magic method " + method)
-    if method == 'from_file':
-        return mime_from_file
-    if method == 'file':
-        return mime_file
-    if method == 'guess_type':
-        return mime_guess_type
-
-    logging.critical("unknown magic method!")
-
-
 # Scan the source code in the given directory (and all subdirectories)
 # and return the number of fatal problems encountered
 def scan_source(build_dir, root_dir, thisbuild):
@@ -153,7 +96,16 @@ def scan_source(build_dir, root_dir, thisbuild):
         logging.error('Found %s at %s' % (what, fd))
         return 1
 
-    get_mime_type = init_mime_type()
+    def is_executable(path):
+        return os.path.exists(path) and os.access(path, os.X_OK)
+
+    textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})
+
+    def is_binary(path):
+        d = None
+        with open(path, 'rb') as f:
+            d = f.read(1024)
+        return bool(d.translate(None, textchars))
 
     # Iterate through all files in the source code
     for r, d, f in os.walk(build_dir, topdown=True):
@@ -169,44 +121,24 @@ def scan_source(build_dir, root_dir, thisbuild):
             fp = os.path.join(r, curfile)
             fd = fp[len(build_dir) + 1:]
 
-            mime = get_mime_type(fp)
+            ext = common.get_extension(fd)
 
-            if mime == 'application/x-sharedlib':
+            if ext == 'so':
                 count += handleproblem('shared library', fd, fp)
-
-            elif mime == 'application/x-archive':
+            elif ext == 'a':
                 count += handleproblem('static library', fd, fp)
-
-            elif mime == 'application/x-executable' or mime == 'application/x-mach-binary':
-                count += handleproblem('binary executable', fd, fp)
-
-            elif mime == 'application/x-java-applet':
+            elif ext == 'class':
                 count += handleproblem('Java compiled class', fd, fp)
+            elif ext == 'apk':
+                removeproblem('APK file', fd, fp)
 
-            elif mime in (
-                    'application/jar',
-                    'application/zip',
-                    'application/java-archive',
-                    'application/octet-stream',
-                    'binary', ):
-
-                if common.has_extension(fp, 'apk'):
-                    removeproblem('APK file', fd, fp)
-
-                elif common.has_extension(fp, 'jar'):
-
-                    if any(suspect.match(curfile) for suspect in usual_suspects):
-                        count += handleproblem('usual supect', fd, fp)
-                    else:
-                        warnproblem('JAR file', fd)
-
-                elif common.has_extension(fp, 'zip'):
-                    warnproblem('ZIP file', fd)
-
+            elif ext == 'jar':
+                if any(suspect.match(curfile) for suspect in usual_suspects):
+                    count += handleproblem('usual supect', fd, fp)
                 else:
-                    warnproblem('unknown compressed or binary file', fd)
+                    warnproblem('JAR file', fd)
 
-            elif common.has_extension(fp, 'java'):
+            elif ext == 'java':
                 if not os.path.isfile(fp):
                     continue
                 for line in file(fp):
@@ -214,7 +146,7 @@ def scan_source(build_dir, root_dir, thisbuild):
                         count += handleproblem('DexClassLoader', fd, fp)
                         break
 
-            elif common.has_extension(fp, 'gradle'):
+            elif ext == 'gradle':
                 if not os.path.isfile(fp):
                     continue
                 for i, line in enumerate(file(fp)):
@@ -223,6 +155,12 @@ def scan_source(build_dir, root_dir, thisbuild):
                         count += handleproblem('usual suspect at line %d' % i, fd, fp)
                         break
 
+            elif is_binary(fp):
+                if is_executable(fp):
+                    count += handleproblem('executable binary', fd, fp)
+                elif ext == '':
+                    count += handleproblem('unknown binary', fd, fp)
+
     for p in scanignore:
         if p not in scanignore_worked:
             logging.error('Unused scanignore path: %s' % p)
@@ -323,7 +261,7 @@ def main():
             probcount += 1
 
     logging.info("Finished:")
-    print "%d app(s) with problems" % probcount
+    print "%d problems found" % probcount
 
 if __name__ == "__main__":
     main()
index a98fe264972a799687569285d6c293024022b826..85e61522d1bbb13b845839094f89a9f2aaa132dd 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@ setup(name='fdroidserver',
                   'examples/opensc-fdroid.cfg',
                   'examples/fdroid-icon.png']),
       ],
-      install_requires=[  # should include 'python-magic' but its not strictly required
+      install_requires=[
           'mwclient',
           'paramiko',
           'Pillow',