2 # -*- coding: utf-8 -*-
4 # stats.py - part of the FDroid server tools
5 # Copyright (C) 2010-13, Ciaran Gultnieks, ciaran@ciarang.com
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
27 from optparse import OptionParser
34 from collections import Counter
37 def carbon_send(key, value):
39 s.connect((config['carbon_host'], config['carbon_port']))
40 msg = '%s %d %d\n' % (key, value, int(time.time()))
50 global options, config
52 # Parse command line...
53 parser = OptionParser()
54 parser.add_option("-v", "--verbose", action="store_true", default=False,
55 help="Spew out even more information than normal")
56 parser.add_option("-q", "--quiet", action="store_true", default=False,
57 help="Restrict output to warnings and errors")
58 parser.add_option("-d", "--download", action="store_true", default=False,
59 help="Download logs we don't have")
60 parser.add_option("--recalc", action="store_true", default=False,
61 help="Recalculate aggregate stats - use when changes "
62 "have been made that would invalidate old cached data.")
63 parser.add_option("--nologs", action="store_true", default=False,
64 help="Don't do anything logs-related")
65 (options, args) = parser.parse_args()
67 config = common.read_config(options)
69 if not config['update_stats']:
70 logging.info("Stats are disabled - check your configuration")
73 # Get all metadata-defined apps...
74 allmetaapps = [a for a in metadata.read_metadata().itervalues()]
75 metaapps = [a for a in allmetaapps if not a['Disabled']]
78 logsdir = os.path.join(statsdir, 'logs')
79 datadir = os.path.join(statsdir, 'data')
80 if not os.path.exists(statsdir):
82 if not os.path.exists(logsdir):
84 if not os.path.exists(datadir):
88 # Get any access logs we don't have...
92 logging.info('Retrieving logs')
93 ssh = paramiko.SSHClient()
94 ssh.load_system_host_keys()
95 ssh.connect(config['stats_server'], username=config['stats_user'],
96 timeout=10, key_filename=config['webserver_keyfile'])
98 ftp.get_channel().settimeout(60)
99 logging.info("...connected")
102 files = ftp.listdir()
104 if f.startswith('access-') and f.endswith('.log.gz'):
106 destpath = os.path.join(logsdir, f)
107 destsize = ftp.stat(f).st_size
108 if (not os.path.exists(destpath) or
109 os.path.getsize(destpath) != destsize):
110 logging.debug("...retrieving " + f)
113 traceback.print_exc()
122 knownapks = common.KnownApks()
125 if not options.nologs:
127 logging.info('Processing logs...')
128 appscount = Counter()
129 appsvercount = Counter()
130 logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] ' + \
131 '"GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) ' + \
132 '\d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
133 logsearch = re.compile(logexpr).search
134 for logfile in glob.glob(os.path.join(logsdir, 'access-*.log.gz')):
135 logging.debug('...' + logfile)
137 # Get the date for this log - e.g. 2012-02-28
138 thisdate = os.path.basename(logfile)[7:-7]
140 agg_path = os.path.join(datadir, thisdate + '.json')
141 if not options.recalc and os.path.exists(agg_path):
142 # Use previously calculated aggregate data
143 with open(agg_path, 'r') as f:
147 # Calculate from logs...
151 'appsver': Counter(),
155 p = subprocess.Popen(["zcat", logfile], stdout=subprocess.PIPE)
156 matches = (logsearch(line) for line in p.stdout)
157 for match in matches:
160 if match.group('statuscode') != '200':
162 if match.group('ip') in config['stats_ignore']:
164 uri = match.group('uri')
165 if not uri.endswith('.apk'):
167 _, apkname = os.path.split(uri)
168 app = knownapks.getapp(apkname)
171 today['apps'][appid] += 1
172 # Strip the '.apk' from apkname
173 appver = apkname[:-4]
174 today['appsver'][appver] += 1
176 if apkname not in today['unknown']:
177 today['unknown'].append(apkname)
179 # Save calculated aggregate data for today to cache
180 with open(agg_path, 'w') as f:
183 # Add today's stats (whether cached or recalculated) to the total
184 for appid in today['apps']:
185 appscount[appid] += today['apps'][appid]
186 for appid in today['appsver']:
187 appsvercount[appid] += today['appsver'][appid]
188 for uk in today['unknown']:
189 if uk not in unknownapks:
190 unknownapks.append(uk)
192 # Calculate and write stats for total downloads...
195 for appid in appscount:
196 count = appscount[appid]
197 lst.append(appid + " " + str(count))
198 if config['stats_to_carbon']:
199 carbon_send('fdroid.download.' + appid.replace('.', '_'),
201 alldownloads += count
202 lst.append("ALL " + str(alldownloads))
203 f = open('stats/total_downloads_app.txt', 'w')
204 f.write('# Total downloads by application, since October 2011\n')
205 for line in sorted(lst):
209 f = open('stats/total_downloads_app_version.txt', 'w')
210 f.write('# Total downloads by application and version, '
211 'since October 2011\n')
213 for appver in appsvercount:
214 count = appsvercount[appver]
215 lst.append(appver + " " + str(count))
216 for line in sorted(lst):
220 # Calculate and write stats for repo types...
221 logging.info("Processing repo types...")
222 repotypes = Counter()
224 rtype = app['Repo Type'] or 'none'
225 if rtype == 'srclib':
226 rtype = common.getsrclibvcs(app['Repo'])
227 repotypes[rtype] += 1
228 f = open('stats/repotypes.txt', 'w')
229 for rtype in repotypes:
230 count = repotypes[rtype]
231 f.write(rtype + ' ' + str(count) + '\n')
234 # Calculate and write stats for update check modes...
235 logging.info("Processing update check modes...")
238 checkmode = app['Update Check Mode']
239 if checkmode.startswith('RepoManifest/'):
240 checkmode = checkmode[:12]
241 if checkmode.startswith('Tags '):
242 checkmode = checkmode[:4]
244 f = open('stats/update_check_modes.txt', 'w')
245 for checkmode in ucms:
246 count = ucms[checkmode]
247 f.write(checkmode + ' ' + str(count) + '\n')
250 logging.info("Processing categories...")
253 for category in app['Categories']:
255 f = open('stats/categories.txt', 'w')
256 for category in ctgs:
257 count = ctgs[category]
258 f.write(category + ' ' + str(count) + '\n')
261 logging.info("Processing antifeatures...")
264 if app['AntiFeatures'] is None:
266 antifeatures = [a.strip() for a in app['AntiFeatures'].split(',')]
267 for antifeature in antifeatures:
268 afs[antifeature] += 1
269 f = open('stats/antifeatures.txt', 'w')
270 for antifeature in afs:
271 count = afs[antifeature]
272 f.write(antifeature + ' ' + str(count) + '\n')
275 # Calculate and write stats for licenses...
276 logging.info("Processing licenses...")
279 license = app['License']
280 licenses[license] += 1
281 f = open('stats/licenses.txt', 'w')
282 for license in licenses:
283 count = licenses[license]
284 f.write(license + ' ' + str(count) + '\n')
287 # Write list of disabled apps...
288 logging.info("Processing disabled apps...")
289 disabled = [a['id'] for a in allmetaapps if a['Disabled']]
290 f = open('stats/disabled_apps.txt', 'w')
291 for appid in sorted(disabled):
292 f.write(appid + '\n')
295 # Write list of latest apps added to the repo...
296 logging.info("Processing latest apps...")
297 latest = knownapks.getlatest(10)
298 f = open('stats/latestapps.txt', 'w')
304 logging.info('\nUnknown apks:')
305 for apk in unknownapks:
308 logging.info("Finished.")
310 if __name__ == "__main__":