chiark / gitweb /
scanner: promote jar and aar files present in the repo to errors
[fdroidserver.git] / fdroidserver / scanner.py
1 #!/usr/bin/env python3
2 #
3 # scanner.py - part of the FDroid server tools
4 # Copyright (C) 2010-13, Ciaran Gultnieks, ciaran@ciarang.com
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Affero General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU Affero General Public License for more details.
15 #
16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19 import os
20 import re
21 import traceback
22 from argparse import ArgumentParser
23 import logging
24
25 from . import common
26 from . import metadata
27 from .exception import BuildException, VCSException
28
29 config = None
30 options = None
31
32
33 def get_gradle_compile_commands(build):
34     compileCommands = ['compile', 'releaseCompile']
35     if build.gradle and build.gradle != ['yes']:
36         compileCommands += [flavor + 'Compile' for flavor in build.gradle]
37         compileCommands += [flavor + 'ReleaseCompile' for flavor in build.gradle]
38
39     return [re.compile(r'\s*' + c, re.IGNORECASE) for c in compileCommands]
40
41
42 def scan_source(build_dir, build):
43     """Scan the source code in the given directory (and all subdirectories)
44     and return the number of fatal problems encountered
45     """
46
47     count = 0
48
49     # Common known non-free blobs (always lower case):
50     usual_suspects = {
51         exp: re.compile(r'.*' + exp, re.IGNORECASE) for exp in [
52             r'flurryagent',
53             r'paypal.*mpl',
54             r'google.*analytics',
55             r'admob.*sdk.*android',
56             r'google.*ad.*view',
57             r'google.*admob',
58             r'google.*play.*services',
59             r'crittercism',
60             r'heyzap',
61             r'jpct.*ae',
62             r'youtube.*android.*player.*api',
63             r'bugsense',
64             r'crashlytics',
65             r'ouya.*sdk',
66             r'libspen23',
67             r'firebase',
68         ]
69     }
70
71     def suspects_found(s):
72         for n, r in usual_suspects.items():
73             if r.match(s):
74                 yield n
75
76     gradle_mavenrepo = re.compile(r'maven *{ *(url)? *[\'"]?([^ \'"]*)[\'"]?')
77
78     allowed_repos = [re.compile(r'^https?://' + re.escape(repo) + r'/*') for repo in [
79         'repo1.maven.org/maven2',  # mavenCentral()
80         'jcenter.bintray.com',     # jcenter()
81         'jitpack.io',
82         'repo.maven.apache.org/maven2',
83         'oss.jfrog.org/artifactory/oss-snapshot-local',
84         'oss.sonatype.org/content/repositories/snapshots',
85         'oss.sonatype.org/content/repositories/releases',
86         'oss.sonatype.org/content/groups/public',
87         'clojars.org/repo',  # Clojure free software libs
88         's3.amazonaws.com/repo.commonsware.com',  # CommonsWare
89         'plugins.gradle.org/m2',  # Gradle plugin repo
90         'maven.google.com',  # Google Maven Repo, https://developer.android.com/studio/build/dependencies.html#google-maven
91         ]
92     ]
93
94     scanignore = common.getpaths_map(build_dir, build.scanignore)
95     scandelete = common.getpaths_map(build_dir, build.scandelete)
96
97     scanignore_worked = set()
98     scandelete_worked = set()
99
100     def toignore(path_in_build_dir):
101         for k, paths in scanignore.items():
102             for p in paths:
103                 if path_in_build_dir.startswith(p):
104                     scanignore_worked.add(k)
105                     return True
106         return False
107
108     def todelete(path_in_build_dir):
109         for k, paths in scandelete.items():
110             for p in paths:
111                 if path_in_build_dir.startswith(p):
112                     scandelete_worked.add(k)
113                     return True
114         return False
115
116     def ignoreproblem(what, path_in_build_dir):
117         logging.info('Ignoring %s at %s' % (what, path_in_build_dir))
118         return 0
119
120     def removeproblem(what, path_in_build_dir, filepath):
121         logging.info('Removing %s at %s' % (what, path_in_build_dir))
122         os.remove(filepath)
123         return 0
124
125     def warnproblem(what, path_in_build_dir):
126         if toignore(path_in_build_dir):
127             return
128         logging.warn('Found %s at %s' % (what, path_in_build_dir))
129
130     def handleproblem(what, path_in_build_dir, filepath):
131         if toignore(path_in_build_dir):
132             return ignoreproblem(what, path_in_build_dir)
133         if todelete(path_in_build_dir):
134             return removeproblem(what, path_in_build_dir, filepath)
135         logging.error('Found %s at %s' % (what, path_in_build_dir))
136         return 1
137
138     def is_executable(path):
139         return os.path.exists(path) and os.access(path, os.X_OK)
140
141     textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})
142
143     def is_binary(path):
144         d = None
145         with open(path, 'rb') as f:
146             d = f.read(1024)
147         return bool(d.translate(None, textchars))
148
149     # False positives patterns for files that are binary and executable.
150     safe_paths = [re.compile(r) for r in [
151         r".*/drawable[^/]*/.*\.png$",  # png drawables
152         r".*/mipmap[^/]*/.*\.png$",    # png mipmaps
153         ]
154     ]
155
156     def safe_path(path):
157         for sp in safe_paths:
158             if sp.match(path):
159                 return True
160         return False
161
162     gradle_compile_commands = get_gradle_compile_commands(build)
163
164     def is_used_by_gradle(line):
165         return any(command.match(line) for command in gradle_compile_commands)
166
167     # Iterate through all files in the source code
168     for dirpath, dirnames, filenames in os.walk(build_dir, topdown=True):
169
170         # It's topdown, so checking the basename is enough
171         for ignoredir in ('.hg', '.git', '.svn', '.bzr'):
172             if ignoredir in dirnames:
173                 dirnames.remove(ignoredir)
174
175         for curfile in filenames:
176
177             if curfile in ['.DS_Store']:
178                 continue
179
180             # Path (relative) to the file
181             filepath = os.path.join(dirpath, curfile)
182
183             if os.path.islink(filepath):
184                 continue
185
186             path_in_build_dir = os.path.relpath(filepath, build_dir)
187             _, ext = common.get_extension(path_in_build_dir)
188
189             if ext == 'so':
190                 count += handleproblem('shared library', path_in_build_dir, filepath)
191             elif ext == 'a':
192                 count += handleproblem('static library', path_in_build_dir, filepath)
193             elif ext == 'class':
194                 count += handleproblem('Java compiled class', path_in_build_dir, filepath)
195             elif ext == 'apk':
196                 removeproblem('APK file', path_in_build_dir, filepath)
197
198             elif ext == 'jar':
199                 for name in suspects_found(curfile):
200                     count += handleproblem('usual supect \'%s\'' % name, path_in_build_dir, filepath)
201                 if curfile == 'gradle-wrapper.jar':
202                     removeproblem('gradle-wrapper.jar', path_in_build_dir, filepath)
203                 else:
204                     count += handleproblem('JAR file', path_in_build_dir, filepath)
205
206             elif ext == 'aar':
207                 count += handleproblem('AAR file', path_in_build_dir, filepath)
208
209             elif ext == 'java':
210                 if not os.path.isfile(filepath):
211                     continue
212                 with open(filepath, 'r', encoding='utf8', errors='replace') as f:
213                     for line in f:
214                         if 'DexClassLoader' in line:
215                             count += handleproblem('DexClassLoader', path_in_build_dir, filepath)
216                             break
217
218             elif ext == 'gradle':
219                 if not os.path.isfile(filepath):
220                     continue
221                 with open(filepath, 'r', encoding='utf8', errors='replace') as f:
222                     lines = f.readlines()
223                 for i, line in enumerate(lines):
224                     if is_used_by_gradle(line):
225                         for name in suspects_found(line):
226                             count += handleproblem('usual supect \'%s\' at line %d' % (name, i + 1), path_in_build_dir, filepath)
227                 noncomment_lines = [l for l in lines if not common.gradle_comment.match(l)]
228                 joined = re.sub(r'[\n\r\s]+', ' ', ' '.join(noncomment_lines))
229                 for m in gradle_mavenrepo.finditer(joined):
230                     url = m.group(2)
231                     if not any(r.match(url) for r in allowed_repos):
232                         count += handleproblem('unknown maven repo \'%s\'' % url, path_in_build_dir, filepath)
233
234             elif ext in ['', 'bin', 'out', 'exe']:
235                 if is_binary(filepath):
236                     count += handleproblem('binary', path_in_build_dir, filepath)
237
238             elif is_executable(filepath):
239                 if is_binary(filepath) and not safe_path(path_in_build_dir):
240                     warnproblem('possible binary', path_in_build_dir)
241
242     for p in scanignore:
243         if p not in scanignore_worked:
244             logging.error('Unused scanignore path: %s' % p)
245             count += 1
246
247     for p in scandelete:
248         if p not in scandelete_worked:
249             logging.error('Unused scandelete path: %s' % p)
250             count += 1
251
252     return count
253
254
255 def main():
256
257     global config, options
258
259     # Parse command line...
260     parser = ArgumentParser(usage="%(prog)s [options] [APPID[:VERCODE] [APPID[:VERCODE] ...]]")
261     common.setup_global_opts(parser)
262     parser.add_argument("appid", nargs='*', help="app-id with optional versionCode in the form APPID[:VERCODE]")
263     metadata.add_metadata_arguments(parser)
264     options = parser.parse_args()
265     metadata.warnings_action = options.W
266
267     config = common.read_config(options)
268
269     # Read all app and srclib metadata
270     allapps = metadata.read_metadata()
271     apps = common.read_app_args(options.appid, allapps, True)
272
273     probcount = 0
274
275     build_dir = 'build'
276     if not os.path.isdir(build_dir):
277         logging.info("Creating build directory")
278         os.makedirs(build_dir)
279     srclib_dir = os.path.join(build_dir, 'srclib')
280     extlib_dir = os.path.join(build_dir, 'extlib')
281
282     for appid, app in apps.items():
283
284         if app.Disabled:
285             logging.info("Skipping %s: disabled" % appid)
286             continue
287         if not app.builds:
288             logging.info("Skipping %s: no builds specified" % appid)
289             continue
290
291         logging.info("Processing " + appid)
292
293         try:
294
295             if app.RepoType == 'srclib':
296                 build_dir = os.path.join('build', 'srclib', app.Repo)
297             else:
298                 build_dir = os.path.join('build', appid)
299
300             # Set up vcs interface and make sure we have the latest code...
301             vcs = common.getvcs(app.RepoType, app.Repo, build_dir)
302
303             for build in app.builds:
304
305                 if build.disable:
306                     logging.info("...skipping version %s - %s" % (
307                         build.versionName, build.get('disable', build.commit[1:])))
308                 else:
309                     logging.info("...scanning version " + build.versionName)
310
311                     # Prepare the source code...
312                     common.prepare_source(vcs, app, build,
313                                           build_dir, srclib_dir,
314                                           extlib_dir, False)
315
316                     # Do the scan...
317                     count = scan_source(build_dir, build)
318                     if count > 0:
319                         logging.warn('Scanner found %d problems in %s (%s)' % (
320                             count, appid, build.versionCode))
321                         probcount += count
322
323         except BuildException as be:
324             logging.warn("Could not scan app %s due to BuildException: %s" % (
325                 appid, be))
326             probcount += 1
327         except VCSException as vcse:
328             logging.warn("VCS error while scanning app %s: %s" % (appid, vcse))
329             probcount += 1
330         except Exception:
331             logging.warn("Could not scan app %s due to unknown error: %s" % (
332                 appid, traceback.format_exc()))
333             probcount += 1
334
335     logging.info("Finished:")
336     print("%d problems found" % probcount)
337
338
339 if __name__ == "__main__":
340     main()