chiark / gitweb /
Merge branch 'error_on_jars' into 'master'
[fdroidserver.git] / fdroidserver / scanner.py
1 #!/usr/bin/env python3
2 #
3 # scanner.py - part of the FDroid server tools
4 # Copyright (C) 2010-13, Ciaran Gultnieks, ciaran@ciarang.com
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Affero General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU Affero General Public License for more details.
15 #
16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19 import os
20 import re
21 import traceback
22 from argparse import ArgumentParser
23 import logging
24
25 from . import _
26 from . import common
27 from . import metadata
28 from .exception import BuildException, VCSException
29
30 config = None
31 options = None
32
33
34 def get_gradle_compile_commands(build):
35     compileCommands = ['compile', 'releaseCompile']
36     if build.gradle and build.gradle != ['yes']:
37         compileCommands += [flavor + 'Compile' for flavor in build.gradle]
38         compileCommands += [flavor + 'ReleaseCompile' for flavor in build.gradle]
39
40     return [re.compile(r'\s*' + c, re.IGNORECASE) for c in compileCommands]
41
42
43 def scan_source(build_dir, build):
44     """Scan the source code in the given directory (and all subdirectories)
45     and return the number of fatal problems encountered
46     """
47
48     count = 0
49
50     # Common known non-free blobs (always lower case):
51     usual_suspects = {
52         exp: re.compile(r'.*' + exp, re.IGNORECASE) for exp in [
53             r'flurryagent',
54             r'paypal.*mpl',
55             r'google.*analytics',
56             r'admob.*sdk.*android',
57             r'google.*ad.*view',
58             r'google.*admob',
59             r'google.*play.*services',
60             r'crittercism',
61             r'heyzap',
62             r'jpct.*ae',
63             r'youtube.*android.*player.*api',
64             r'bugsense',
65             r'crashlytics',
66             r'ouya.*sdk',
67             r'libspen23',
68             r'firebase',
69         ]
70     }
71
72     def suspects_found(s):
73         for n, r in usual_suspects.items():
74             if r.match(s):
75                 yield n
76
77     gradle_mavenrepo = re.compile(r'maven *{ *(url)? *[\'"]?([^ \'"]*)[\'"]?')
78
79     allowed_repos = [re.compile(r'^https?://' + re.escape(repo) + r'/*') for repo in [
80         'repo1.maven.org/maven2',  # mavenCentral()
81         'jcenter.bintray.com',     # jcenter()
82         'jitpack.io',
83         'repo.maven.apache.org/maven2',
84         'oss.jfrog.org/artifactory/oss-snapshot-local',
85         'oss.sonatype.org/content/repositories/snapshots',
86         'oss.sonatype.org/content/repositories/releases',
87         'oss.sonatype.org/content/groups/public',
88         'clojars.org/repo',  # Clojure free software libs
89         's3.amazonaws.com/repo.commonsware.com',  # CommonsWare
90         'plugins.gradle.org/m2',  # Gradle plugin repo
91         'maven.google.com',  # Google Maven Repo, https://developer.android.com/studio/build/dependencies.html#google-maven
92         ]
93     ]
94
95     scanignore = common.getpaths_map(build_dir, build.scanignore)
96     scandelete = common.getpaths_map(build_dir, build.scandelete)
97
98     scanignore_worked = set()
99     scandelete_worked = set()
100
101     def toignore(path_in_build_dir):
102         for k, paths in scanignore.items():
103             for p in paths:
104                 if path_in_build_dir.startswith(p):
105                     scanignore_worked.add(k)
106                     return True
107         return False
108
109     def todelete(path_in_build_dir):
110         for k, paths in scandelete.items():
111             for p in paths:
112                 if path_in_build_dir.startswith(p):
113                     scandelete_worked.add(k)
114                     return True
115         return False
116
117     def ignoreproblem(what, path_in_build_dir):
118         logging.info('Ignoring %s at %s' % (what, path_in_build_dir))
119         return 0
120
121     def removeproblem(what, path_in_build_dir, filepath):
122         logging.info('Removing %s at %s' % (what, path_in_build_dir))
123         os.remove(filepath)
124         return 0
125
126     def warnproblem(what, path_in_build_dir):
127         if toignore(path_in_build_dir):
128             return
129         logging.warn('Found %s at %s' % (what, path_in_build_dir))
130
131     def handleproblem(what, path_in_build_dir, filepath):
132         if toignore(path_in_build_dir):
133             return ignoreproblem(what, path_in_build_dir)
134         if todelete(path_in_build_dir):
135             return removeproblem(what, path_in_build_dir, filepath)
136         logging.error('Found %s at %s' % (what, path_in_build_dir))
137         return 1
138
139     def is_executable(path):
140         return os.path.exists(path) and os.access(path, os.X_OK)
141
142     textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})
143
144     def is_binary(path):
145         d = None
146         with open(path, 'rb') as f:
147             d = f.read(1024)
148         return bool(d.translate(None, textchars))
149
150     # False positives patterns for files that are binary and executable.
151     safe_paths = [re.compile(r) for r in [
152         r".*/drawable[^/]*/.*\.png$",  # png drawables
153         r".*/mipmap[^/]*/.*\.png$",    # png mipmaps
154         ]
155     ]
156
157     def safe_path(path):
158         for sp in safe_paths:
159             if sp.match(path):
160                 return True
161         return False
162
163     gradle_compile_commands = get_gradle_compile_commands(build)
164
165     def is_used_by_gradle(line):
166         return any(command.match(line) for command in gradle_compile_commands)
167
168     # Iterate through all files in the source code
169     for root, dirs, files in os.walk(build_dir, topdown=True):
170
171         # It's topdown, so checking the basename is enough
172         for ignoredir in ('.hg', '.git', '.svn', '.bzr'):
173             if ignoredir in dirs:
174                 dirs.remove(ignoredir)
175
176         for curfile in files:
177
178             if curfile in ['.DS_Store']:
179                 continue
180
181             # Path (relative) to the file
182             filepath = os.path.join(root, curfile)
183
184             if os.path.islink(filepath):
185                 continue
186
187             path_in_build_dir = os.path.relpath(filepath, build_dir)
188             _, ext = common.get_extension(path_in_build_dir)
189
190             if ext == 'so':
191                 count += handleproblem('shared library', path_in_build_dir, filepath)
192             elif ext == 'a':
193                 count += handleproblem('static library', path_in_build_dir, filepath)
194             elif ext == 'class':
195                 count += handleproblem('Java compiled class', path_in_build_dir, filepath)
196             elif ext == 'apk':
197                 removeproblem('APK file', path_in_build_dir, filepath)
198
199             elif ext == 'jar':
200                 for name in suspects_found(curfile):
201                     count += handleproblem('usual supect \'%s\'' % name, path_in_build_dir, filepath)
202                 if curfile == 'gradle-wrapper.jar':
203                     removeproblem('gradle-wrapper.jar', path_in_build_dir, filepath)
204                 else:
205                     count += handleproblem('JAR file', path_in_build_dir, filepath)
206
207             elif ext == 'aar':
208                 count += handleproblem('AAR file', path_in_build_dir, filepath)
209
210             elif ext == 'java':
211                 if not os.path.isfile(filepath):
212                     continue
213                 with open(filepath, 'r', encoding='utf8', errors='replace') as f:
214                     for line in f:
215                         if 'DexClassLoader' in line:
216                             count += handleproblem('DexClassLoader', path_in_build_dir, filepath)
217                             break
218
219             elif ext == 'gradle':
220                 if not os.path.isfile(filepath):
221                     continue
222                 with open(filepath, 'r', encoding='utf8', errors='replace') as f:
223                     lines = f.readlines()
224                 for i, line in enumerate(lines):
225                     if is_used_by_gradle(line):
226                         for name in suspects_found(line):
227                             count += handleproblem('usual supect \'%s\' at line %d' % (name, i + 1), path_in_build_dir, filepath)
228                 noncomment_lines = [l for l in lines if not common.gradle_comment.match(l)]
229                 joined = re.sub(r'[\n\r\s]+', ' ', ' '.join(noncomment_lines))
230                 for m in gradle_mavenrepo.finditer(joined):
231                     url = m.group(2)
232                     if not any(r.match(url) for r in allowed_repos):
233                         count += handleproblem('unknown maven repo \'%s\'' % url, path_in_build_dir, filepath)
234
235             elif ext in ['', 'bin', 'out', 'exe']:
236                 if is_binary(filepath):
237                     count += handleproblem('binary', path_in_build_dir, filepath)
238
239             elif is_executable(filepath):
240                 if is_binary(filepath) and not safe_path(path_in_build_dir):
241                     warnproblem('possible binary', path_in_build_dir)
242
243     for p in scanignore:
244         if p not in scanignore_worked:
245             logging.error('Unused scanignore path: %s' % p)
246             count += 1
247
248     for p in scandelete:
249         if p not in scandelete_worked:
250             logging.error('Unused scandelete path: %s' % p)
251             count += 1
252
253     return count
254
255
256 def main():
257
258     global config, options
259
260     # Parse command line...
261     parser = ArgumentParser(usage="%(prog)s [options] [APPID[:VERCODE] [APPID[:VERCODE] ...]]")
262     common.setup_global_opts(parser)
263     parser.add_argument("appid", nargs='*', help=_("applicationId with optional versionCode in the form APPID[:VERCODE]"))
264     metadata.add_metadata_arguments(parser)
265     options = parser.parse_args()
266     metadata.warnings_action = options.W
267
268     config = common.read_config(options)
269
270     # Read all app and srclib metadata
271     allapps = metadata.read_metadata()
272     apps = common.read_app_args(options.appid, allapps, True)
273
274     probcount = 0
275
276     build_dir = 'build'
277     if not os.path.isdir(build_dir):
278         logging.info("Creating build directory")
279         os.makedirs(build_dir)
280     srclib_dir = os.path.join(build_dir, 'srclib')
281     extlib_dir = os.path.join(build_dir, 'extlib')
282
283     for appid, app in apps.items():
284
285         if app.Disabled:
286             logging.info(_("Skipping {appid}: disabled").format(appid=appid))
287             continue
288         if not app.builds:
289             logging.info(_("Skipping {appid}: no builds specified").format(appid=appid))
290             continue
291
292         logging.info(_("Processing {appid}").format(appid=appid))
293
294         try:
295
296             if app.RepoType == 'srclib':
297                 build_dir = os.path.join('build', 'srclib', app.Repo)
298             else:
299                 build_dir = os.path.join('build', appid)
300
301             # Set up vcs interface and make sure we have the latest code...
302             vcs = common.getvcs(app.RepoType, app.Repo, build_dir)
303
304             for build in app.builds:
305
306                 if build.disable:
307                     logging.info("...skipping version %s - %s" % (
308                         build.versionName, build.get('disable', build.commit[1:])))
309                 else:
310                     logging.info("...scanning version " + build.versionName)
311
312                     # Prepare the source code...
313                     common.prepare_source(vcs, app, build,
314                                           build_dir, srclib_dir,
315                                           extlib_dir, False)
316
317                     # Do the scan...
318                     count = scan_source(build_dir, build)
319                     if count > 0:
320                         logging.warn('Scanner found %d problems in %s (%s)' % (
321                             count, appid, build.versionCode))
322                         probcount += count
323
324         except BuildException as be:
325             logging.warn("Could not scan app %s due to BuildException: %s" % (
326                 appid, be))
327             probcount += 1
328         except VCSException as vcse:
329             logging.warn("VCS error while scanning app %s: %s" % (appid, vcse))
330             probcount += 1
331         except Exception:
332             logging.warn("Could not scan app %s due to unknown error: %s" % (
333                 appid, traceback.format_exc()))
334             probcount += 1
335
336     logging.info(_("Finished"))
337     print(_("%d problems found") % probcount)
338
339
340 if __name__ == "__main__":
341     main()