chiark / gitweb /
Merge branch 'master' into 'master'
[fdroidserver.git] / fdroidserver / scanner.py
1 #!/usr/bin/env python3
2 #
3 # scanner.py - part of the FDroid server tools
4 # Copyright (C) 2010-13, Ciaran Gultnieks, ciaran@ciarang.com
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Affero General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU Affero General Public License for more details.
15 #
16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19 import os
20 import re
21 import traceback
22 from argparse import ArgumentParser
23 import logging
24
25 from . import _
26 from . import common
27 from . import metadata
28 from .exception import BuildException, VCSException
29
30 config = None
31 options = None
32
33
34 def get_gradle_compile_commands(build):
35     compileCommands = ['compile',
36                        'provided'
37                        'apk'
38                        'implementation'
39                        'api'
40                        'compileOnly'
41                        'runtimeOnly',
42                        'releaseCompile'
43                        'releaseProvided'
44                        'releaseApk'
45                        'releaseImplementation'
46                        'releaseApi'
47                        'releaseCompileOnly'
48                        'releaseRuntimeOnly']
49     if build.gradle and build.gradle != ['yes']:
50         compileCommands += [flavor + 'Compile' for flavor in build.gradle]
51         compileCommands += [flavor + 'ReleaseCompile' for flavor in build.gradle]
52
53     return [re.compile(r'\s*' + c, re.IGNORECASE) for c in compileCommands]
54
55
56 def scan_source(build_dir, build=metadata.Build()):
57     """Scan the source code in the given directory (and all subdirectories)
58     and return the number of fatal problems encountered
59     """
60
61     count = 0
62
63     # Common known non-free blobs (always lower case):
64     usual_suspects = {
65         exp: re.compile(r'.*' + exp, re.IGNORECASE) for exp in [
66             r'flurryagent',
67             r'paypal.*mpl',
68             r'google.*analytics',
69             r'admob.*sdk.*android',
70             r'google.*ad.*view',
71             r'google.*admob',
72             r'google.*play.*services',
73             r'crittercism',
74             r'heyzap',
75             r'jpct.*ae',
76             r'youtube.*android.*player.*api',
77             r'bugsense',
78             r'crashlytics',
79             r'ouya.*sdk',
80             r'libspen23',
81             r'firebase',
82         ]
83     }
84
85     whitelisted = [
86         'firebase-jobdispatcher',  # https://github.com/firebase/firebase-jobdispatcher-android/blob/master/LICENSE
87         'com.firebaseui',          # https://github.com/firebase/FirebaseUI-Android/blob/master/LICENSE
88         'geofire-android'          # https://github.com/firebase/geofire-java/blob/master/LICENSE
89     ]
90
91     def is_whitelisted(s):
92         return any(wl in s for wl in whitelisted)
93
94     def suspects_found(s):
95         for n, r in usual_suspects.items():
96             if r.match(s) and not is_whitelisted(s):
97                 yield n
98
99     gradle_mavenrepo = re.compile(r'maven *{ *(url)? *[\'"]?([^ \'"]*)[\'"]?')
100
101     allowed_repos = [re.compile(r'^https?://' + re.escape(repo) + r'/*') for repo in [
102         'repo1.maven.org/maven2',  # mavenCentral()
103         'jcenter.bintray.com',     # jcenter()
104         'jitpack.io',
105         'repo.maven.apache.org/maven2',
106         'oss.jfrog.org/artifactory/oss-snapshot-local',
107         'oss.sonatype.org/content/repositories/snapshots',
108         'oss.sonatype.org/content/repositories/releases',
109         'oss.sonatype.org/content/groups/public',
110         'clojars.org/repo',  # Clojure free software libs
111         's3.amazonaws.com/repo.commonsware.com',  # CommonsWare
112         'plugins.gradle.org/m2',  # Gradle plugin repo
113         'maven.google.com',  # Google Maven Repo, https://developer.android.com/studio/build/dependencies.html#google-maven
114         ]
115     ]
116
117     scanignore = common.getpaths_map(build_dir, build.scanignore)
118     scandelete = common.getpaths_map(build_dir, build.scandelete)
119
120     scanignore_worked = set()
121     scandelete_worked = set()
122
123     def toignore(path_in_build_dir):
124         for k, paths in scanignore.items():
125             for p in paths:
126                 if path_in_build_dir.startswith(p):
127                     scanignore_worked.add(k)
128                     return True
129         return False
130
131     def todelete(path_in_build_dir):
132         for k, paths in scandelete.items():
133             for p in paths:
134                 if path_in_build_dir.startswith(p):
135                     scandelete_worked.add(k)
136                     return True
137         return False
138
139     def ignoreproblem(what, path_in_build_dir):
140         logging.info('Ignoring %s at %s' % (what, path_in_build_dir))
141         return 0
142
143     def removeproblem(what, path_in_build_dir, filepath):
144         logging.info('Removing %s at %s' % (what, path_in_build_dir))
145         os.remove(filepath)
146         return 0
147
148     def warnproblem(what, path_in_build_dir):
149         if toignore(path_in_build_dir):
150             return
151         logging.warn('Found %s at %s' % (what, path_in_build_dir))
152
153     def handleproblem(what, path_in_build_dir, filepath):
154         if toignore(path_in_build_dir):
155             return ignoreproblem(what, path_in_build_dir)
156         if todelete(path_in_build_dir):
157             return removeproblem(what, path_in_build_dir, filepath)
158         logging.error('Found %s at %s' % (what, path_in_build_dir))
159         return 1
160
161     def is_executable(path):
162         return os.path.exists(path) and os.access(path, os.X_OK)
163
164     textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})
165
166     def is_binary(path):
167         d = None
168         with open(path, 'rb') as f:
169             d = f.read(1024)
170         return bool(d.translate(None, textchars))
171
172     # False positives patterns for files that are binary and executable.
173     safe_paths = [re.compile(r) for r in [
174         r".*/drawable[^/]*/.*\.png$",  # png drawables
175         r".*/mipmap[^/]*/.*\.png$",    # png mipmaps
176         ]
177     ]
178
179     def safe_path(path):
180         for sp in safe_paths:
181             if sp.match(path):
182                 return True
183         return False
184
185     gradle_compile_commands = get_gradle_compile_commands(build)
186
187     def is_used_by_gradle(line):
188         return any(command.match(line) for command in gradle_compile_commands)
189
190     # Iterate through all files in the source code
191     for root, dirs, files in os.walk(build_dir, topdown=True):
192
193         # It's topdown, so checking the basename is enough
194         for ignoredir in ('.hg', '.git', '.svn', '.bzr'):
195             if ignoredir in dirs:
196                 dirs.remove(ignoredir)
197
198         for curfile in files:
199
200             if curfile in ['.DS_Store']:
201                 continue
202
203             # Path (relative) to the file
204             filepath = os.path.join(root, curfile)
205
206             if os.path.islink(filepath):
207                 continue
208
209             path_in_build_dir = os.path.relpath(filepath, build_dir)
210             _ignored, ext = common.get_extension(path_in_build_dir)
211
212             if ext == 'so':
213                 count += handleproblem('shared library', path_in_build_dir, filepath)
214             elif ext == 'a':
215                 count += handleproblem('static library', path_in_build_dir, filepath)
216             elif ext == 'class':
217                 count += handleproblem('Java compiled class', path_in_build_dir, filepath)
218             elif ext == 'apk':
219                 removeproblem('APK file', path_in_build_dir, filepath)
220
221             elif ext == 'jar':
222                 for name in suspects_found(curfile):
223                     count += handleproblem('usual suspect \'%s\'' % name, path_in_build_dir, filepath)
224                 if curfile == 'gradle-wrapper.jar':
225                     removeproblem('gradle-wrapper.jar', path_in_build_dir, filepath)
226                 else:
227                     warnproblem('JAR file', path_in_build_dir)
228
229             elif ext == 'aar':
230                 warnproblem('AAR file', path_in_build_dir)
231
232             elif ext == 'java':
233                 if not os.path.isfile(filepath):
234                     continue
235                 with open(filepath, 'r', encoding='utf8', errors='replace') as f:
236                     for line in f:
237                         if 'DexClassLoader' in line:
238                             count += handleproblem('DexClassLoader', path_in_build_dir, filepath)
239                             break
240
241             elif ext == 'gradle':
242                 if not os.path.isfile(filepath):
243                     continue
244                 with open(filepath, 'r', encoding='utf8', errors='replace') as f:
245                     lines = f.readlines()
246                 for i, line in enumerate(lines):
247                     if is_used_by_gradle(line):
248                         for name in suspects_found(line):
249                             count += handleproblem('usual suspect \'%s\' at line %d' % (name, i + 1), path_in_build_dir, filepath)
250                 noncomment_lines = [l for l in lines if not common.gradle_comment.match(l)]
251                 joined = re.sub(r'[\n\r\s]+', ' ', ' '.join(noncomment_lines))
252                 for m in gradle_mavenrepo.finditer(joined):
253                     url = m.group(2)
254                     if not any(r.match(url) for r in allowed_repos):
255                         count += handleproblem('unknown maven repo \'%s\'' % url, path_in_build_dir, filepath)
256
257             elif ext in ['', 'bin', 'out', 'exe']:
258                 if is_binary(filepath):
259                     count += handleproblem('binary', path_in_build_dir, filepath)
260
261             elif is_executable(filepath):
262                 if is_binary(filepath) and not safe_path(path_in_build_dir):
263                     warnproblem('possible binary', path_in_build_dir)
264
265     for p in scanignore:
266         if p not in scanignore_worked:
267             logging.error('Unused scanignore path: %s' % p)
268             count += 1
269
270     for p in scandelete:
271         if p not in scandelete_worked:
272             logging.error('Unused scandelete path: %s' % p)
273             count += 1
274
275     return count
276
277
278 def main():
279
280     global config, options
281
282     # Parse command line...
283     parser = ArgumentParser(usage="%(prog)s [options] [APPID[:VERCODE] [APPID[:VERCODE] ...]]")
284     common.setup_global_opts(parser)
285     parser.add_argument("appid", nargs='*', help=_("applicationId with optional versionCode in the form APPID[:VERCODE]"))
286     metadata.add_metadata_arguments(parser)
287     options = parser.parse_args()
288     metadata.warnings_action = options.W
289
290     config = common.read_config(options)
291
292     # Read all app and srclib metadata
293     allapps = metadata.read_metadata()
294     apps = common.read_app_args(options.appid, allapps, True)
295
296     probcount = 0
297
298     build_dir = 'build'
299     if not os.path.isdir(build_dir):
300         logging.info("Creating build directory")
301         os.makedirs(build_dir)
302     srclib_dir = os.path.join(build_dir, 'srclib')
303     extlib_dir = os.path.join(build_dir, 'extlib')
304
305     for appid, app in apps.items():
306
307         if app.Disabled:
308             logging.info(_("Skipping {appid}: disabled").format(appid=appid))
309             continue
310
311         try:
312             if app.RepoType == 'srclib':
313                 build_dir = os.path.join('build', 'srclib', app.Repo)
314             else:
315                 build_dir = os.path.join('build', appid)
316
317             if app.builds:
318                 logging.info(_("Processing {appid}").format(appid=appid))
319             else:
320                 logging.info(_("{appid}: no builds specified, running on current source state")
321                              .format(appid=appid))
322                 count = scan_source(build_dir)
323                 if count > 0:
324                     logging.warn(_('Scanner found {count} problems in {appid}:')
325                                  .format(count=count, appid=appid))
326                     probcount += count
327                 continue
328
329             # Set up vcs interface and make sure we have the latest code...
330             vcs = common.getvcs(app.RepoType, app.Repo, build_dir)
331
332             for build in app.builds:
333
334                 if build.disable:
335                     logging.info("...skipping version %s - %s" % (
336                         build.versionName, build.get('disable', build.commit[1:])))
337                     continue
338
339                 logging.info("...scanning version " + build.versionName)
340                 # Prepare the source code...
341                 common.prepare_source(vcs, app, build,
342                                       build_dir, srclib_dir,
343                                       extlib_dir, False)
344
345                 count = scan_source(build_dir, build)
346                 if count > 0:
347                     logging.warn(_('Scanner found {count} problems in {appid}:{versionCode}:')
348                                  .format(count=count, appid=appid, versionCode=build.versionCode))
349                     probcount += count
350
351         except BuildException as be:
352             logging.warn("Could not scan app %s due to BuildException: %s" % (
353                 appid, be))
354             probcount += 1
355         except VCSException as vcse:
356             logging.warn("VCS error while scanning app %s: %s" % (appid, vcse))
357             probcount += 1
358         except Exception:
359             logging.warn("Could not scan app %s due to unknown error: %s" % (
360                 appid, traceback.format_exc()))
361             probcount += 1
362
363     logging.info(_("Finished"))
364     print(_("%d problems found") % probcount)
365
366
367 if __name__ == "__main__":
368     main()