2 # -*- coding: utf-8 -*-
4 # stats.py - part of the FDroid server tools
5 # Copyright (C) 2010-13, Ciaran Gultnieks, ciaran@ciarang.com
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27 from argparse import ArgumentParser
34 from collections import Counter
37 def carbon_send(key, value):
39 s.connect((config['carbon_host'], config['carbon_port']))
40 msg = '%s %d %d\n' % (key, value, int(time.time()))
50 global options, config
52 # Parse command line...
53 parser = ArgumentParser()
54 common.setup_global_opts(parser)
55 parser.add_argument("-d", "--download", action="store_true", default=False,
56 help="Download logs we don't have")
57 parser.add_argument("--recalc", action="store_true", default=False,
58 help="Recalculate aggregate stats - use when changes "
59 "have been made that would invalidate old cached data.")
60 parser.add_argument("--nologs", action="store_true", default=False,
61 help="Don't do anything logs-related")
62 options = parser.parse_args()
64 config = common.read_config(options)
66 if not config['update_stats']:
67 logging.info("Stats are disabled - set \"update_stats = True\" in your config.py")
70 # Get all metadata-defined apps...
71 allmetaapps = [app for app in metadata.read_metadata().itervalues()]
72 metaapps = [app for app in allmetaapps if not app.Disabled]
75 logsdir = os.path.join(statsdir, 'logs')
76 datadir = os.path.join(statsdir, 'data')
77 if not os.path.exists(statsdir):
79 if not os.path.exists(logsdir):
81 if not os.path.exists(datadir):
85 # Get any access logs we don't have...
89 logging.info('Retrieving logs')
90 ssh = paramiko.SSHClient()
91 ssh.load_system_host_keys()
92 ssh.connect(config['stats_server'], username=config['stats_user'],
93 timeout=10, key_filename=config['webserver_keyfile'])
95 ftp.get_channel().settimeout(60)
96 logging.info("...connected")
101 if f.startswith('access-') and f.endswith('.log.gz'):
103 destpath = os.path.join(logsdir, f)
104 destsize = ftp.stat(f).st_size
105 if (not os.path.exists(destpath) or
106 os.path.getsize(destpath) != destsize):
107 logging.debug("...retrieving " + f)
110 traceback.print_exc()
119 knownapks = common.KnownApks()
122 if not options.nologs:
124 logging.info('Processing logs...')
125 appscount = Counter()
126 appsvercount = Counter()
127 logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] ' + \
128 '"GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) ' + \
129 '\d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
130 logsearch = re.compile(logexpr).search
131 for logfile in glob.glob(os.path.join(logsdir, 'access-*.log.gz')):
132 logging.debug('...' + logfile)
134 # Get the date for this log - e.g. 2012-02-28
135 thisdate = os.path.basename(logfile)[7:-7]
137 agg_path = os.path.join(datadir, thisdate + '.json')
138 if not options.recalc and os.path.exists(agg_path):
139 # Use previously calculated aggregate data
140 with open(agg_path, 'r') as f:
144 # Calculate from logs...
148 'appsver': Counter(),
152 p = subprocess.Popen(["zcat", logfile], stdout=subprocess.PIPE)
153 matches = (logsearch(line) for line in p.stdout)
154 for match in matches:
157 if match.group('statuscode') != '200':
159 if match.group('ip') in config['stats_ignore']:
161 uri = match.group('uri')
162 if not uri.endswith('.apk'):
164 _, apkname = os.path.split(uri)
165 app = knownapks.getapp(apkname)
168 today['apps'][appid] += 1
169 # Strip the '.apk' from apkname
170 appver = apkname[:-4]
171 today['appsver'][appver] += 1
173 if apkname not in today['unknown']:
174 today['unknown'].append(apkname)
176 # Save calculated aggregate data for today to cache
177 with open(agg_path, 'w') as f:
180 # Add today's stats (whether cached or recalculated) to the total
181 for appid in today['apps']:
182 appscount[appid] += today['apps'][appid]
183 for appid in today['appsver']:
184 appsvercount[appid] += today['appsver'][appid]
185 for uk in today['unknown']:
186 if uk not in unknownapks:
187 unknownapks.append(uk)
189 # Calculate and write stats for total downloads...
192 for appid in appscount:
193 count = appscount[appid]
194 lst.append(appid + " " + str(count))
195 if config['stats_to_carbon']:
196 carbon_send('fdroid.download.' + appid.replace('.', '_'),
198 alldownloads += count
199 lst.append("ALL " + str(alldownloads))
200 with open(os.path.join(statsdir, 'total_downloads_app.txt'), 'w') as f:
201 f.write('# Total downloads by application, since October 2011\n')
202 for line in sorted(lst):
206 for appver in appsvercount:
207 count = appsvercount[appver]
208 lst.append(appver + " " + str(count))
210 with open(os.path.join(statsdir, 'total_downloads_app_version.txt'), 'w') as f:
211 f.write('# Total downloads by application and version, '
212 'since October 2011\n')
213 for line in sorted(lst):
216 # Calculate and write stats for repo types...
217 logging.info("Processing repo types...")
218 repotypes = Counter()
220 rtype = app.RepoType or 'none'
221 if rtype == 'srclib':
222 rtype = common.getsrclibvcs(app.Repo)
223 repotypes[rtype] += 1
224 with open(os.path.join(statsdir, 'repotypes.txt'), 'w') as f:
225 for rtype, count in repotypes.most_common():
226 f.write(rtype + ' ' + str(count) + '\n')
228 # Calculate and write stats for update check modes...
229 logging.info("Processing update check modes...")
232 checkmode = app.UpdateCheckMode
233 if checkmode.startswith('RepoManifest/'):
234 checkmode = checkmode[:12]
235 if checkmode.startswith('Tags '):
236 checkmode = checkmode[:4]
238 with open(os.path.join(statsdir, 'update_check_modes.txt'), 'w') as f:
239 for checkmode, count in ucms.most_common():
240 f.write(checkmode + ' ' + str(count) + '\n')
242 logging.info("Processing categories...")
245 for category in app.Categories:
247 with open(os.path.join(statsdir, 'categories.txt'), 'w') as f:
248 for category, count in ctgs.most_common():
249 f.write(category + ' ' + str(count) + '\n')
251 logging.info("Processing antifeatures...")
254 if app.AntiFeatures is None:
256 for antifeature in app.AntiFeatures:
257 afs[antifeature] += 1
258 with open(os.path.join(statsdir, 'antifeatures.txt'), 'w') as f:
259 for antifeature, count in afs.most_common():
260 f.write(antifeature + ' ' + str(count) + '\n')
262 # Calculate and write stats for licenses...
263 logging.info("Processing licenses...")
266 license = app.License
267 licenses[license] += 1
268 with open(os.path.join(statsdir, 'licenses.txt'), 'w') as f:
269 for license, count in licenses.most_common():
270 f.write(license + ' ' + str(count) + '\n')
272 # Write list of disabled apps...
273 logging.info("Processing disabled apps...")
274 disabled = [app.id for app in allmetaapps if app.Disabled]
275 with open(os.path.join(statsdir, 'disabled_apps.txt'), 'w') as f:
276 for appid in sorted(disabled):
277 f.write(appid + '\n')
279 # Write list of latest apps added to the repo...
280 logging.info("Processing latest apps...")
281 latest = knownapks.getlatest(10)
282 with open(os.path.join(statsdir, 'latestapps.txt'), 'w') as f:
284 f.write(appid + '\n')
287 logging.info('\nUnknown apks:')
288 for apk in unknownapks:
291 logging.info("Finished.")
293 if __name__ == "__main__":