import time
import traceback
import glob
+import json
from optparse import OptionParser
import paramiko
import socket
import logging
+import common
+import metadata
+import subprocess
+from collections import Counter
-import common, metadata
-from common import FDroidPopen
def carbon_send(key, value):
s = socket.socket()
options = None
config = None
+
def main():
global options, config
parser = OptionParser()
parser.add_option("-v", "--verbose", action="store_true", default=False,
help="Spew out even more information than normal")
+ parser.add_option("-q", "--quiet", action="store_true", default=False,
+ help="Restrict output to warnings and errors")
parser.add_option("-d", "--download", action="store_true", default=False,
help="Download logs we don't have")
+ parser.add_option("--recalc", action="store_true", default=False,
+ help="Recalculate aggregate stats - use when changes "
+ "have been made that would invalidate old cached data.")
parser.add_option("--nologs", action="store_true", default=False,
help="Don't do anything logs-related")
(options, args) = parser.parse_args()
sys.exit(1)
# Get all metadata-defined apps...
- metaapps = metadata.read_metadata(options.verbose)
+ metaapps = metadata.read_metadata()
statsdir = 'stats'
logsdir = os.path.join(statsdir, 'logs')
ssh = paramiko.SSHClient()
ssh.load_system_host_keys()
ssh.connect('f-droid.org', username='fdroid', timeout=10,
- key_filename=config['webserver_keyfile'])
+ key_filename=config['webserver_keyfile'])
ftp = ssh.open_sftp()
ftp.get_channel().settimeout(60)
logging.info("...connected")
destsize = ftp.stat(f).st_size
if (not os.path.exists(destpath) or
os.path.getsize(destpath) != destsize):
- logging.info("...retrieving " + f)
+ logging.debug("...retrieving " + f)
ftp.get(f, destpath)
except Exception:
traceback.print_exc()
sys.exit(1)
finally:
- #Disconnect
+ # Disconnect
if ftp is not None:
ftp.close()
if ssh is not None:
if not options.nologs:
# Process logs
logging.info('Processing logs...')
- apps = {}
- appsVer = {}
- logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] "GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) \d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
+ appscount = Counter()
+ appsvercount = Counter()
+ logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] ' + \
+ '"GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) ' + \
+ '\d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
logsearch = re.compile(logexpr).search
- for logfile in glob.glob(os.path.join(logsdir,'access-*.log.gz')):
- logging.info('...' + logfile)
- p = FDroidPopen(["zcat", logfile])
- matches = (logsearch(line) for line in p.stdout)
- for match in matches:
- if match and match.group('statuscode') == '200':
- uri = match.group('uri')
- if not uri.endswith('.apk'):
- continue
- _, apkname = os.path.split(uri)
- app = knownapks.getapp(apkname)
- if app:
- appid, _ = app
- if appid in apps:
- apps[appid] += 1
- else:
- apps[appid] = 1
- # Strip the '.apk' from apkname
- appVer = apkname[:-4]
- if appVer in appsVer:
- appsVer[appVer] += 1
- else:
- appsVer[appVer] = 1
- else:
- if not apkname in unknownapks:
- unknownapks.append(apkname)
+ for logfile in glob.glob(os.path.join(logsdir, 'access-*.log.gz')):
+ logging.debug('...' + logfile)
+
+ # Get the date for this log - e.g. 2012-02-28
+ thisdate = os.path.basename(logfile)[7:-7]
+
+ agg_path = os.path.join(datadir, thisdate + '.json')
+ if not options.recalc and os.path.exists(agg_path):
+ # Use previously calculated aggregate data
+ with open(agg_path, 'r') as f:
+ today = json.load(f)
+
+ else:
+ # Calculate from logs...
+
+ today = {
+ 'apps': Counter(),
+ 'appsver': Counter(),
+ 'unknown': []
+ }
+
+ p = subprocess.Popen(["zcat", logfile], stdout=subprocess.PIPE)
+ matches = (logsearch(line) for line in p.stdout)
+ for match in matches:
+ if match and match.group('statuscode') == '200':
+ uri = match.group('uri')
+ if uri.endswith('.apk'):
+ _, apkname = os.path.split(uri)
+ app = knownapks.getapp(apkname)
+ if app:
+ appid, _ = app
+ today['apps'][appid] += 1
+ # Strip the '.apk' from apkname
+ appver = apkname[:-4]
+ today['appsver'][appver] += 1
+ else:
+ if apkname not in today['unknown']:
+ today['unknown'].append(apkname)
+
+ # Save calculated aggregate data for today to cache
+ with open(agg_path, 'w') as f:
+ json.dump(today, f)
+
+ # Add today's stats (whether cached or recalculated) to the total
+ for appid in today['apps']:
+ appscount[appid] += today['apps'][appid]
+ for appid in today['appsver']:
+ appsvercount[appid] += today['appsver'][appid]
+ for uk in today['unknown']:
+ if uk not in unknownapks:
+ unknownapks.append(uk)
# Calculate and write stats for total downloads...
lst = []
alldownloads = 0
- for app, count in apps.iteritems():
- lst.append(app + " " + str(count))
+ for appid in appscount:
+ count = appscount[appid]
+ lst.append(appid + " " + str(count))
if config['stats_to_carbon']:
- carbon_send('fdroid.download.' + app.replace('.', '_'), count)
+ carbon_send('fdroid.download.' + appid.replace('.', '_'),
+ count)
alldownloads += count
lst.append("ALL " + str(alldownloads))
f = open('stats/total_downloads_app.txt', 'w')
f.close()
f = open('stats/total_downloads_app_version.txt', 'w')
- f.write('# Total downloads by application and version, since October 2011\n')
+ f.write('# Total downloads by application and version, '
+ 'since October 2011\n')
lst = []
- for appver, count in appsVer.iteritems():
+ for appver in appsvercount:
+ count = appsvercount[appver]
lst.append(appver + " " + str(count))
for line in sorted(lst):
f.write(line + "\n")
# Calculate and write stats for repo types...
logging.info("Processing repo types...")
- repotypes = {}
+ repotypes = Counter()
for app in metaapps:
if len(app['Repo Type']) == 0:
rtype = 'none'
rtype = common.getsrclibvcs(app['Repo'])
else:
rtype = app['Repo Type']
- if rtype in repotypes:
- repotypes[rtype] += 1;
- else:
- repotypes[rtype] = 1
+ repotypes[rtype] += 1
f = open('stats/repotypes.txt', 'w')
- for rtype, count in repotypes.iteritems():
+ for rtype in repotypes:
+ count = repotypes[rtype]
f.write(rtype + ' ' + str(count) + '\n')
f.close()
# Calculate and write stats for update check modes...
logging.info("Processing update check modes...")
- ucms = {}
+ ucms = Counter()
for app in metaapps:
- checkmode = app['Update Check Mode'].split('/')[0]
- if checkmode in ucms:
- ucms[checkmode] += 1;
- else:
- ucms[checkmode] = 1
+ checkmode = app['Update Check Mode']
+ if checkmode.startswith('RepoManifest/'):
+ checkmode = checkmode[:12]
+ if checkmode.startswith('Tags '):
+ checkmode = checkmode[:4]
+ ucms[checkmode] += 1
f = open('stats/update_check_modes.txt', 'w')
- for checkmode, count in ucms.iteritems():
+ for checkmode in ucms:
+ count = ucms[checkmode]
f.write(checkmode + ' ' + str(count) + '\n')
f.close()
logging.info("Processing categories...")
- ctgs = {}
+ ctgs = Counter()
for app in metaapps:
- if app['Categories'] is None:
- continue
- categories = [c.strip() for c in app['Categories'].split(',')]
- for category in categories:
- if category in ctgs:
- ctgs[category] += 1;
- else:
- ctgs[category] = 1
+ for category in app['Categories']:
+ ctgs[category] += 1
f = open('stats/categories.txt', 'w')
- for category, count in ctgs.iteritems():
+ for category in ctgs:
+ count = ctgs[category]
f.write(category + ' ' + str(count) + '\n')
f.close()
logging.info("Processing antifeatures...")
- afs = {}
+ afs = Counter()
for app in metaapps:
if app['AntiFeatures'] is None:
continue
antifeatures = [a.strip() for a in app['AntiFeatures'].split(',')]
for antifeature in antifeatures:
- if antifeature in afs:
- afs[antifeature] += 1;
- else:
- afs[antifeature] = 1
+ afs[antifeature] += 1
f = open('stats/antifeatures.txt', 'w')
- for antifeature, count in afs.iteritems():
+ for antifeature in afs:
+ count = afs[antifeature]
f.write(antifeature + ' ' + str(count) + '\n')
f.close()
# Calculate and write stats for licenses...
logging.info("Processing licenses...")
- licenses = {}
+ licenses = Counter()
for app in metaapps:
license = app['License']
- if license in licenses:
- licenses[license] += 1;
- else:
- licenses[license] = 1
+ licenses[license] += 1
f = open('stats/licenses.txt', 'w')
- for license, count in licenses.iteritems():
+ for license in licenses:
+ count = licenses[license]
f.write(license + ' ' + str(count) + '\n')
f.close()
if __name__ == "__main__":
main()
-