3 # stats.py - part of the FDroid server tools
4 # Copyright (C) 2010-13, Ciaran Gultnieks, ciaran@ciarang.com
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Affero General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Affero General Public License for more details.
16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 from argparse import ArgumentParser
31 from collections import Counter
35 from . import metadata
38 def carbon_send(key, value):
40 s.connect((config['carbon_host'], config['carbon_port']))
41 msg = '%s %d %d\n' % (key, value, int(time.time()))
50 def most_common_stable(counts):
53 pairs.append((s, counts[s]))
54 return sorted(pairs, key=lambda t: (-t[1], t[0]))
59 global options, config
61 # Parse command line...
62 parser = ArgumentParser()
63 common.setup_global_opts(parser)
64 parser.add_argument("-d", "--download", action="store_true", default=False,
65 help=_("Download logs we don't have"))
66 parser.add_argument("--recalc", action="store_true", default=False,
67 help=_("Recalculate aggregate stats - use when changes "
68 "have been made that would invalidate old cached data."))
69 parser.add_argument("--nologs", action="store_true", default=False,
70 help=_("Don't do anything logs-related"))
71 metadata.add_metadata_arguments(parser)
72 options = parser.parse_args()
73 metadata.warnings_action = options.W
75 config = common.read_config(options)
77 if not config['update_stats']:
78 logging.info("Stats are disabled - set \"update_stats = True\" in your config.py")
81 # Get all metadata-defined apps...
82 allmetaapps = [app for app in metadata.read_metadata().values()]
83 metaapps = [app for app in allmetaapps if not app.Disabled]
86 logsdir = os.path.join(statsdir, 'logs')
87 datadir = os.path.join(statsdir, 'data')
88 if not os.path.exists(statsdir):
90 if not os.path.exists(logsdir):
92 if not os.path.exists(datadir):
96 # Get any access logs we don't have...
100 logging.info('Retrieving logs')
101 ssh = paramiko.SSHClient()
102 ssh.load_system_host_keys()
103 ssh.connect(config['stats_server'], username=config['stats_user'],
104 timeout=10, key_filename=config['webserver_keyfile'])
105 ftp = ssh.open_sftp()
106 ftp.get_channel().settimeout(60)
107 logging.info("...connected")
110 files = ftp.listdir()
112 if f.startswith('access-') and f.endswith('.log.gz'):
114 destpath = os.path.join(logsdir, f)
115 destsize = ftp.stat(f).st_size
116 if (not os.path.exists(destpath) or
117 os.path.getsize(destpath) != destsize):
118 logging.debug("...retrieving " + f)
121 traceback.print_exc()
130 knownapks = common.KnownApks()
133 if not options.nologs:
135 logging.info('Processing logs...')
136 appscount = Counter()
137 appsvercount = Counter()
138 logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] ' + \
139 '"GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) ' + \
140 '\d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
141 logsearch = re.compile(logexpr).search
142 for logfile in glob.glob(os.path.join(logsdir, 'access-*.log.gz')):
143 logging.debug('...' + logfile)
145 # Get the date for this log - e.g. 2012-02-28
146 thisdate = os.path.basename(logfile)[7:-7]
148 agg_path = os.path.join(datadir, thisdate + '.json')
149 if not options.recalc and os.path.exists(agg_path):
150 # Use previously calculated aggregate data
151 with open(agg_path, 'r') as f:
155 # Calculate from logs...
159 'appsver': Counter(),
163 p = subprocess.Popen(["zcat", logfile], stdout=subprocess.PIPE)
164 matches = (logsearch(line) for line in p.stdout)
165 for match in matches:
168 if match.group('statuscode') != '200':
170 if match.group('ip') in config['stats_ignore']:
172 uri = match.group('uri')
173 if not uri.endswith('.apk'):
175 _ignored, apkname = os.path.split(uri)
176 app = knownapks.getapp(apkname)
178 appid, _ignored = app
179 today['apps'][appid] += 1
180 # Strip the '.apk' from apkname
181 appver = apkname[:-4]
182 today['appsver'][appver] += 1
184 if apkname not in today['unknown']:
185 today['unknown'].append(apkname)
187 # Save calculated aggregate data for today to cache
188 with open(agg_path, 'w') as f:
191 # Add today's stats (whether cached or recalculated) to the total
192 for appid in today['apps']:
193 appscount[appid] += today['apps'][appid]
194 for appid in today['appsver']:
195 appsvercount[appid] += today['appsver'][appid]
196 for uk in today['unknown']:
197 if uk not in unknownapks:
198 unknownapks.append(uk)
200 # Calculate and write stats for total downloads...
203 for appid in appscount:
204 count = appscount[appid]
205 lst.append(appid + " " + str(count))
206 if config['stats_to_carbon']:
207 carbon_send('fdroid.download.' + appid.replace('.', '_'),
209 alldownloads += count
210 lst.append("ALL " + str(alldownloads))
211 with open(os.path.join(statsdir, 'total_downloads_app.txt'), 'w') as f:
212 f.write('# Total downloads by application, since October 2011\n')
213 for line in sorted(lst):
217 for appver in appsvercount:
218 count = appsvercount[appver]
219 lst.append(appver + " " + str(count))
221 with open(os.path.join(statsdir, 'total_downloads_app_version.txt'), 'w') as f:
222 f.write('# Total downloads by application and version, '
223 'since October 2011\n')
224 for line in sorted(lst):
227 # Calculate and write stats for repo types...
228 logging.info("Processing repo types...")
229 repotypes = Counter()
231 rtype = app.RepoType or 'none'
232 if rtype == 'srclib':
233 rtype = common.getsrclibvcs(app.Repo)
234 repotypes[rtype] += 1
235 with open(os.path.join(statsdir, 'repotypes.txt'), 'w') as f:
236 for rtype, count in most_common_stable(repotypes):
237 f.write(rtype + ' ' + str(count) + '\n')
239 # Calculate and write stats for update check modes...
240 logging.info("Processing update check modes...")
243 checkmode = app.UpdateCheckMode
244 if checkmode.startswith('RepoManifest/'):
245 checkmode = checkmode[:12]
246 if checkmode.startswith('Tags '):
247 checkmode = checkmode[:4]
249 with open(os.path.join(statsdir, 'update_check_modes.txt'), 'w') as f:
250 for checkmode, count in most_common_stable(ucms):
251 f.write(checkmode + ' ' + str(count) + '\n')
253 logging.info("Processing categories...")
256 for category in app.Categories:
258 with open(os.path.join(statsdir, 'categories.txt'), 'w') as f:
259 for category, count in most_common_stable(ctgs):
260 f.write(category + ' ' + str(count) + '\n')
262 logging.info("Processing antifeatures...")
265 if app.AntiFeatures is None:
267 for antifeature in app.AntiFeatures:
268 afs[antifeature] += 1
269 with open(os.path.join(statsdir, 'antifeatures.txt'), 'w') as f:
270 for antifeature, count in most_common_stable(afs):
271 f.write(antifeature + ' ' + str(count) + '\n')
273 # Calculate and write stats for licenses...
274 logging.info("Processing licenses...")
277 license = app.License
278 licenses[license] += 1
279 with open(os.path.join(statsdir, 'licenses.txt'), 'w') as f:
280 for license, count in most_common_stable(licenses):
281 f.write(license + ' ' + str(count) + '\n')
283 # Write list of disabled apps...
284 logging.info("Processing disabled apps...")
285 disabled = [app.id for app in allmetaapps if app.Disabled]
286 with open(os.path.join(statsdir, 'disabled_apps.txt'), 'w') as f:
287 for appid in sorted(disabled):
288 f.write(appid + '\n')
290 # Write list of latest apps added to the repo...
291 logging.info("Processing latest apps...")
292 latest = knownapks.getlatest(10)
293 with open(os.path.join(statsdir, 'latestapps.txt'), 'w') as f:
295 f.write(appid + '\n')
298 logging.info('\nUnknown apks:')
299 for apk in unknownapks:
302 logging.info(_("Finished"))
305 if __name__ == "__main__":