import time
import traceback
import glob
+import json
from optparse import OptionParser
import paramiko
-import common, metadata
import socket
+import logging
+import common
+import metadata
import subprocess
+from collections import Counter
+
def carbon_send(key, value):
s = socket.socket()
options = None
config = None
+
def main():
global options, config
parser = OptionParser()
parser.add_option("-v", "--verbose", action="store_true", default=False,
help="Spew out even more information than normal")
+ parser.add_option("-q", "--quiet", action="store_true", default=False,
+ help="Restrict output to warnings and errors")
parser.add_option("-d", "--download", action="store_true", default=False,
help="Download logs we don't have")
+ parser.add_option("--recalc", action="store_true", default=False,
+ help="Recalculate aggregate stats - use when changes "
+ "have been made that would invalidate old cached data.")
+ parser.add_option("--nologs", action="store_true", default=False,
+ help="Don't do anything logs-related")
(options, args) = parser.parse_args()
config = common.read_config(options)
if not config['update_stats']:
- print "Stats are disabled - check your configuration"
+ logging.info("Stats are disabled - check your configuration")
sys.exit(1)
# Get all metadata-defined apps...
- metaapps = metadata.read_metadata(options.verbose)
+ metaapps = metadata.read_metadata()
statsdir = 'stats'
logsdir = os.path.join(statsdir, 'logs')
ssh = None
ftp = None
try:
- print 'Retrieving logs'
+ logging.info('Retrieving logs')
ssh = paramiko.SSHClient()
ssh.load_system_host_keys()
ssh.connect('f-droid.org', username='fdroid', timeout=10,
- key_filename=config['webserver_keyfile'])
+ key_filename=config['webserver_keyfile'])
ftp = ssh.open_sftp()
ftp.get_channel().settimeout(60)
- print "...connected"
+ logging.info("...connected")
ftp.chdir('logs')
files = ftp.listdir()
destsize = ftp.stat(f).st_size
if (not os.path.exists(destpath) or
os.path.getsize(destpath) != destsize):
- print "...retrieving " + f
+ logging.debug("...retrieving " + f)
ftp.get(f, destpath)
except Exception:
traceback.print_exc()
sys.exit(1)
finally:
- #Disconnect
+ # Disconnect
if ftp is not None:
ftp.close()
if ssh is not None:
ssh.close()
- # Process logs
- if options.verbose:
- print 'Processing logs...'
- logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] "GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) \d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
- logsearch = re.compile(logexpr).search
- apps = {}
- unknownapks = []
knownapks = common.KnownApks()
- for logfile in glob.glob(os.path.join(logsdir,'access-*.log.gz')):
- if options.verbose:
- print '...' + logfile
- p = subprocess.Popen(["zcat", logfile], stdout = subprocess.PIPE)
- matches = (logsearch(line) for line in p.stdout)
- for match in matches:
- if match and match.group('statuscode') == '200':
- uri = match.group('uri')
- if uri.endswith('.apk'):
- _, apkname = os.path.split(uri)
- app = knownapks.getapp(apkname)
- if app:
- appid, _ = app
- if appid in apps:
- apps[appid] += 1
- else:
- apps[appid] = 1
- else:
- if not apkname in unknownapks:
- unknownapks.append(apkname)
-
- # Calculate and write stats for total downloads...
- lst = []
- alldownloads = 0
- for app, count in apps.iteritems():
- lst.append(app + " " + str(count))
- if config['stats_to_carbon']:
- carbon_send('fdroid.download.' + app.replace('.', '_'), count)
- alldownloads += count
- lst.append("ALL " + str(alldownloads))
- f = open('stats/total_downloads_app.txt', 'w')
- f.write('# Total downloads by application, since October 2011\n')
- for line in sorted(lst):
- f.write(line + '\n')
- f.close()
+ unknownapks = []
+
+ if not options.nologs:
+ # Process logs
+ logging.info('Processing logs...')
+ appscount = Counter()
+ appsvercount = Counter()
+ logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] ' + \
+ '"GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) ' + \
+ '\d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
+ logsearch = re.compile(logexpr).search
+ for logfile in glob.glob(os.path.join(logsdir, 'access-*.log.gz')):
+ logging.debug('...' + logfile)
+
+ # Get the date for this log - e.g. 2012-02-28
+ thisdate = os.path.basename(logfile)[7:-7]
+
+ agg_path = os.path.join(datadir, thisdate + '.json')
+ if not options.recalc and os.path.exists(agg_path):
+ # Use previously calculated aggregate data
+ with open(agg_path, 'r') as f:
+ today = json.load(f)
+
+ else:
+ # Calculate from logs...
+
+ today = {
+ 'apps': Counter(),
+ 'appsver': Counter(),
+ 'unknown': []
+ }
+
+ p = subprocess.Popen(["zcat", logfile], stdout=subprocess.PIPE)
+ matches = (logsearch(line) for line in p.stdout)
+ for match in matches:
+ if match and match.group('statuscode') == '200':
+ uri = match.group('uri')
+ if uri.endswith('.apk'):
+ _, apkname = os.path.split(uri)
+ app = knownapks.getapp(apkname)
+ if app:
+ appid, _ = app
+ today['apps'][appid] += 1
+ # Strip the '.apk' from apkname
+ appver = apkname[:-4]
+ today['appsver'][appver] += 1
+ else:
+ if apkname not in today['unknown']:
+ today['unknown'].append(apkname)
+
+ # Save calculated aggregate data for today to cache
+ with open(agg_path, 'w') as f:
+ json.dump(today, f)
+
+ # Add today's stats (whether cached or recalculated) to the total
+ for appid in today['apps']:
+ appscount[appid] += today['apps'][appid]
+ for appid in today['appsver']:
+ appsvercount[appid] += today['appsver'][appid]
+ for uk in today['unknown']:
+ if uk not in unknownapks:
+ unknownapks.append(uk)
+
+ # Calculate and write stats for total downloads...
+ lst = []
+ alldownloads = 0
+ for appid in appscount:
+ count = appscount[appid]
+ lst.append(appid + " " + str(count))
+ if config['stats_to_carbon']:
+ carbon_send('fdroid.download.' + appid.replace('.', '_'),
+ count)
+ alldownloads += count
+ lst.append("ALL " + str(alldownloads))
+ f = open('stats/total_downloads_app.txt', 'w')
+ f.write('# Total downloads by application, since October 2011\n')
+ for line in sorted(lst):
+ f.write(line + '\n')
+ f.close()
+
+ f = open('stats/total_downloads_app_version.txt', 'w')
+ f.write('# Total downloads by application and version, '
+ 'since October 2011\n')
+ lst = []
+ for appver in appsvercount:
+ count = appsvercount[appver]
+ lst.append(appver + " " + str(count))
+ for line in sorted(lst):
+ f.write(line + "\n")
+ f.close()
# Calculate and write stats for repo types...
- repotypes = {}
+ logging.info("Processing repo types...")
+ repotypes = Counter()
for app in metaapps:
if len(app['Repo Type']) == 0:
rtype = 'none'
rtype = common.getsrclibvcs(app['Repo'])
else:
rtype = app['Repo Type']
- if rtype in repotypes:
- repotypes[rtype] += 1;
- else:
- repotypes[rtype] = 1
+ repotypes[rtype] += 1
f = open('stats/repotypes.txt', 'w')
- for rtype, count in repotypes.iteritems():
+ for rtype in repotypes:
+ count = repotypes[rtype]
f.write(rtype + ' ' + str(count) + '\n')
f.close()
# Calculate and write stats for update check modes...
- ucms = {}
+ logging.info("Processing update check modes...")
+ ucms = Counter()
for app in metaapps:
- checkmode = app['Update Check Mode'].split('/')[0]
- if checkmode in ucms:
- ucms[checkmode] += 1;
- else:
- ucms[checkmode] = 1
+ checkmode = app['Update Check Mode']
+ if checkmode.startswith('RepoManifest/'):
+ checkmode = checkmode[:12]
+ if checkmode.startswith('Tags '):
+ checkmode = checkmode[:4]
+ ucms[checkmode] += 1
f = open('stats/update_check_modes.txt', 'w')
- for checkmode, count in ucms.iteritems():
+ for checkmode in ucms:
+ count = ucms[checkmode]
f.write(checkmode + ' ' + str(count) + '\n')
f.close()
- ctgs = {}
+ logging.info("Processing categories...")
+ ctgs = Counter()
for app in metaapps:
- if app['Categories'] is None:
- continue
- categories = [c.strip() for c in app['Categories'].split(',')]
- for category in categories:
- if category in ctgs:
- ctgs[category] += 1;
- else:
- ctgs[category] = 1
+ for category in app['Categories']:
+ ctgs[category] += 1
f = open('stats/categories.txt', 'w')
- for category, count in ctgs.iteritems():
+ for category in ctgs:
+ count = ctgs[category]
f.write(category + ' ' + str(count) + '\n')
f.close()
- afs = {}
+ logging.info("Processing antifeatures...")
+ afs = Counter()
for app in metaapps:
if app['AntiFeatures'] is None:
continue
antifeatures = [a.strip() for a in app['AntiFeatures'].split(',')]
for antifeature in antifeatures:
- if antifeature in afs:
- afs[antifeature] += 1;
- else:
- afs[antifeature] = 1
+ afs[antifeature] += 1
f = open('stats/antifeatures.txt', 'w')
- for antifeature, count in afs.iteritems():
+ for antifeature in afs:
+ count = afs[antifeature]
f.write(antifeature + ' ' + str(count) + '\n')
f.close()
- return
# Calculate and write stats for licenses...
- licenses = {}
+ logging.info("Processing licenses...")
+ licenses = Counter()
for app in metaapps:
license = app['License']
- if license in licenses:
- licenses[license] += 1;
- else:
- licenses[license] = 1
+ licenses[license] += 1
f = open('stats/licenses.txt', 'w')
- for license, count in licenses.iteritems():
+ for license in licenses:
+ count = licenses[license]
f.write(license + ' ' + str(count) + '\n')
f.close()
# Write list of latest apps added to the repo...
+ logging.info("Processing latest apps...")
latest = knownapks.getlatest(10)
f = open('stats/latestapps.txt', 'w')
for app in latest:
f.write(app + '\n')
f.close()
- if len(unknownapks) > 0:
- print '\nUnknown apks:'
+ if unknownapks:
+ logging.info('\nUnknown apks:')
for apk in unknownapks:
- print apk
+ logging.info(apk)
- print "Finished."
+ logging.info("Finished.")
if __name__ == "__main__":
main()
-