From 055759cf761c7d30595d8a3f43fc4ef4c0acdbf4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Mart=C3=AD?= Date: Sun, 13 Sep 2015 22:11:53 -0700 Subject: [PATCH] Rewrite scanner logic Initially, the scanner used libmagic which used magic numbers in the file's content to detect what kind of file it appears to be. Since that library isn't available on all systems, we added support for two other libraries, mimetypes amongst them. The issue with mimetypes is that it only uses the file's extension, not its actual content. So this ends in variable behaviour depending on what system you're using fdroidserver on. For example, an executable binary without extension would be ignored if mimetypes was being used. We now drop all libraries - mimetypes too as it depends on the system's mime.types file - and instead check extensions ourselves. On top of that, do a simple binary content check to find binary executables that don't have an extension. The new in-house code without any dependencies doesn't add any new checks, so no builds should break. The current checks still work: % fdroid scanner app.openconnect:1029 [...] Found executable binary at assets/raw/armeabi/curl Found executable binary at assets/raw/mips/curl Found executable binary at assets/raw/x86/curl Found JAR file at lib/XposedBridgeApi-54.jar Found JAR file at libs/acra-4.5.0.jar Found JAR file at libs/openconnect-wrapper.jar Found JAR file at libs/stoken-wrapper.jar Found shared library at libs/armeabi/libopenconnect.so Found shared library at libs/armeabi/libstoken.so Found shared library at libs/mips/libopenconnect.so Found shared library at libs/mips/libstoken.so Found shared library at libs/x86/libopenconnect.so Found shared library at libs/x86/libstoken.so --- .gitlab-ci.yml | 2 +- docs/fdroid.texi | 2 - fdroidserver/common.py | 14 +++-- fdroidserver/scanner.py | 120 ++++++++++------------------------------ setup.py | 2 +- 5 files changed, 41 insertions(+), 99 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a66d2d59..df6e2029 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,7 +4,7 @@ before_script: - apt-get -q install -y wget tar - echo " == Installing packages required by fdroidserver" - apt-get -q install -y python - python-git python-imaging python-libcloud python-logilab-astng python-magic + python-git python-imaging python-libcloud python-logilab-astng python-paramiko python-pip python-pyasn1 python-pyasn1-modules python-requests python-virtualenv python-yaml rsync diff --git a/docs/fdroid.texi b/docs/fdroid.texi index 76988d96..af58c0bb 100644 --- a/docs/fdroid.texi +++ b/docs/fdroid.texi @@ -122,8 +122,6 @@ vagrant-cachier plugin (unpackaged): `vagrant plugin install vagrant-cachier` Paramiko (debian package python-paramiko) @item Imaging (debian package python-imaging) -@item -Magic (debian package python-magic) @end itemize On the other hand, if you want to build the apps directly on your system diff --git a/fdroidserver/common.py b/fdroidserver/common.py index becd3568..d93bf739 100644 --- a/fdroidserver/common.py +++ b/fdroidserver/common.py @@ -367,10 +367,16 @@ def read_app_args(args, allapps, allow_vercodes=False): return apps -def has_extension(filename, extension): - name, ext = os.path.splitext(filename) - ext = ext.lower()[1:] - return ext == extension +def get_extension(filename): + _, ext = os.path.splitext(filename) + if not ext: + return '' + return ext.lower()[1:] + + +def has_extension(filename, ext): + return ext == get_extension(filename) + apk_regex = None diff --git a/fdroidserver/scanner.py b/fdroidserver/scanner.py index dce01807..7f7c6aa9 100644 --- a/fdroidserver/scanner.py +++ b/fdroidserver/scanner.py @@ -31,63 +31,6 @@ config = None options = None -def init_mime_type(): - ''' - There are two incompatible versions of the 'magic' module, one - that comes as part of libmagic, which is what Debian includes as - python-magic, then another called python-magic that is a separate - project that wraps libmagic. The second is 'magic' on pypi, so - both need to be supported. Then on platforms where libmagic is - not easily included, e.g. OSX and Windows, fallback to the - built-in 'mimetypes' module so this will work without - libmagic. Hence this function with the following hacks: - ''' - - init_path = '' - method = '' - ms = None - - def mime_from_file(path): - try: - return magic.from_file(path, mime=True) - except UnicodeError: - return None - - def mime_file(path): - try: - return ms.file(path) - except UnicodeError: - return None - - def mime_guess_type(path): - return mimetypes.guess_type(path, strict=False) - - try: - import magic - try: - ms = magic.open(magic.MIME_TYPE) - ms.load() - magic.from_file(init_path, mime=True) - method = 'from_file' - except AttributeError: - ms.file(init_path) - method = 'file' - except ImportError: - import mimetypes - mimetypes.init() - method = 'guess_type' - - logging.info("Using magic method " + method) - if method == 'from_file': - return mime_from_file - if method == 'file': - return mime_file - if method == 'guess_type': - return mime_guess_type - - logging.critical("unknown magic method!") - - # Scan the source code in the given directory (and all subdirectories) # and return the number of fatal problems encountered def scan_source(build_dir, root_dir, thisbuild): @@ -153,7 +96,16 @@ def scan_source(build_dir, root_dir, thisbuild): logging.error('Found %s at %s' % (what, fd)) return 1 - get_mime_type = init_mime_type() + def is_executable(path): + return os.path.exists(path) and os.access(path, os.X_OK) + + textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f}) + + def is_binary(path): + d = None + with open(path, 'rb') as f: + d = f.read(1024) + return bool(d.translate(None, textchars)) # Iterate through all files in the source code for r, d, f in os.walk(build_dir, topdown=True): @@ -169,44 +121,24 @@ def scan_source(build_dir, root_dir, thisbuild): fp = os.path.join(r, curfile) fd = fp[len(build_dir) + 1:] - mime = get_mime_type(fp) + ext = common.get_extension(fd) - if mime == 'application/x-sharedlib': + if ext == 'so': count += handleproblem('shared library', fd, fp) - - elif mime == 'application/x-archive': + elif ext == 'a': count += handleproblem('static library', fd, fp) - - elif mime == 'application/x-executable' or mime == 'application/x-mach-binary': - count += handleproblem('binary executable', fd, fp) - - elif mime == 'application/x-java-applet': + elif ext == 'class': count += handleproblem('Java compiled class', fd, fp) + elif ext == 'apk': + removeproblem('APK file', fd, fp) - elif mime in ( - 'application/jar', - 'application/zip', - 'application/java-archive', - 'application/octet-stream', - 'binary', ): - - if common.has_extension(fp, 'apk'): - removeproblem('APK file', fd, fp) - - elif common.has_extension(fp, 'jar'): - - if any(suspect.match(curfile) for suspect in usual_suspects): - count += handleproblem('usual supect', fd, fp) - else: - warnproblem('JAR file', fd) - - elif common.has_extension(fp, 'zip'): - warnproblem('ZIP file', fd) - + elif ext == 'jar': + if any(suspect.match(curfile) for suspect in usual_suspects): + count += handleproblem('usual supect', fd, fp) else: - warnproblem('unknown compressed or binary file', fd) + warnproblem('JAR file', fd) - elif common.has_extension(fp, 'java'): + elif ext == 'java': if not os.path.isfile(fp): continue for line in file(fp): @@ -214,7 +146,7 @@ def scan_source(build_dir, root_dir, thisbuild): count += handleproblem('DexClassLoader', fd, fp) break - elif common.has_extension(fp, 'gradle'): + elif ext == 'gradle': if not os.path.isfile(fp): continue for i, line in enumerate(file(fp)): @@ -223,6 +155,12 @@ def scan_source(build_dir, root_dir, thisbuild): count += handleproblem('usual suspect at line %d' % i, fd, fp) break + elif is_binary(fp): + if is_executable(fp): + count += handleproblem('executable binary', fd, fp) + elif ext == '': + count += handleproblem('unknown binary', fd, fp) + for p in scanignore: if p not in scanignore_worked: logging.error('Unused scanignore path: %s' % p) @@ -323,7 +261,7 @@ def main(): probcount += 1 logging.info("Finished:") - print "%d app(s) with problems" % probcount + print "%d problems found" % probcount if __name__ == "__main__": main() diff --git a/setup.py b/setup.py index a98fe264..85e61522 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ setup(name='fdroidserver', 'examples/opensc-fdroid.cfg', 'examples/fdroid-icon.png']), ], - install_requires=[ # should include 'python-magic' but its not strictly required + install_requires=[ 'mwclient', 'paramiko', 'Pillow', -- 2.30.2