chiark / gitweb /
Apply some autopep8-python2 suggestions
[fdroidserver.git] / fdroidserver / stats.py
index be62f65097ddded4db4a17c0b9828200f67b4364..ef80fe4d0e0811204101af0fb24c11a6019f3907 100644 (file)
@@ -23,11 +23,16 @@ import re
 import time
 import traceback
 import glob
+import json
 from optparse import OptionParser
 import paramiko
-import common, metadata
 import socket
+import logging
+import common
+import metadata
 import subprocess
+from collections import Counter
+
 
 def carbon_send(key, value):
     s = socket.socket()
@@ -39,6 +44,7 @@ def carbon_send(key, value):
 options = None
 config = None
 
+
 def main():
 
     global options, config
@@ -47,18 +53,25 @@ def main():
     parser = OptionParser()
     parser.add_option("-v", "--verbose", action="store_true", default=False,
                       help="Spew out even more information than normal")
+    parser.add_option("-q", "--quiet", action="store_true", default=False,
+                      help="Restrict output to warnings and errors")
     parser.add_option("-d", "--download", action="store_true", default=False,
                       help="Download logs we don't have")
+    parser.add_option("--recalc", action="store_true", default=False,
+                      help="Recalculate aggregate stats - use when changes "
+                      "have been made that would invalidate old cached data.")
+    parser.add_option("--nologs", action="store_true", default=False,
+                      help="Don't do anything logs-related")
     (options, args) = parser.parse_args()
 
     config = common.read_config(options)
 
     if not config['update_stats']:
-        print "Stats are disabled - check your configuration"
+        logging.info("Stats are disabled - check your configuration")
         sys.exit(1)
 
     # Get all metadata-defined apps...
-    metaapps = metadata.read_metadata(options.verbose)
+    metaapps = [a for a in metadata.read_metadata().itervalues() if not a['Disabled']]
 
     statsdir = 'stats'
     logsdir = os.path.join(statsdir, 'logs')
@@ -75,14 +88,14 @@ def main():
         ssh = None
         ftp = None
         try:
-            print 'Retrieving logs'
+            logging.info('Retrieving logs')
             ssh = paramiko.SSHClient()
             ssh.load_system_host_keys()
-            ssh.connect('f-droid.org', username='fdroid', timeout=10,
-                    key_filename=config['webserver_keyfile'])
+            ssh.connect(config['stats_server'], username=config['stats_user'],
+                        timeout=10, key_filename=config['webserver_keyfile'])
             ftp = ssh.open_sftp()
             ftp.get_channel().settimeout(60)
-            print "...connected"
+            logging.info("...connected")
 
             ftp.chdir('logs')
             files = ftp.listdir()
@@ -93,152 +106,197 @@ def main():
                     destsize = ftp.stat(f).st_size
                     if (not os.path.exists(destpath) or
                             os.path.getsize(destpath) != destsize):
-                        print "...retrieving " + f
+                        logging.debug("...retrieving " + f)
                         ftp.get(f, destpath)
         except Exception:
             traceback.print_exc()
             sys.exit(1)
         finally:
-            #Disconnect
+            # Disconnect
             if ftp is not None:
                 ftp.close()
             if ssh is not None:
                 ssh.close()
 
-    # Process logs
-    if options.verbose:
-        print 'Processing logs...'
-    logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] "GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) \d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
-    logsearch = re.compile(logexpr).search
-    apps = {}
-    unknownapks = []
     knownapks = common.KnownApks()
-    for logfile in glob.glob(os.path.join(logsdir,'access-*.log.gz')):
-        if options.verbose:
-            print '...' + logfile
-        p = subprocess.Popen(["zcat", logfile], stdout = subprocess.PIPE)
-        matches = (logsearch(line) for line in p.stdout)
-        for match in matches:
-            if match and match.group('statuscode') == '200':
-                uri = match.group('uri')
-                if uri.endswith('.apk'):
+    unknownapks = []
+
+    if not options.nologs:
+        # Process logs
+        logging.info('Processing logs...')
+        appscount = Counter()
+        appsvercount = Counter()
+        logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] ' + \
+            '"GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) ' + \
+            '\d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
+        logsearch = re.compile(logexpr).search
+        for logfile in glob.glob(os.path.join(logsdir, 'access-*.log.gz')):
+            logging.debug('...' + logfile)
+
+            # Get the date for this log - e.g. 2012-02-28
+            thisdate = os.path.basename(logfile)[7:-7]
+
+            agg_path = os.path.join(datadir, thisdate + '.json')
+            if not options.recalc and os.path.exists(agg_path):
+                # Use previously calculated aggregate data
+                with open(agg_path, 'r') as f:
+                    today = json.load(f)
+
+            else:
+                # Calculate from logs...
+
+                today = {
+                    'apps': Counter(),
+                    'appsver': Counter(),
+                    'unknown': []
+                }
+
+                p = subprocess.Popen(["zcat", logfile], stdout=subprocess.PIPE)
+                matches = (logsearch(line) for line in p.stdout)
+                for match in matches:
+                    if not match:
+                        continue
+                    if match.group('statuscode') != '200':
+                        continue
+                    if match.group('ip') in config['stats_ignore']:
+                        continue
+                    uri = match.group('uri')
+                    if not uri.endswith('.apk'):
+                        continue
                     _, apkname = os.path.split(uri)
                     app = knownapks.getapp(apkname)
                     if app:
                         appid, _ = app
-                        if appid in apps:
-                            apps[appid] += 1
-                        else:
-                            apps[appid] = 1
+                        today['apps'][appid] += 1
+                        # Strip the '.apk' from apkname
+                        appver = apkname[:-4]
+                        today['appsver'][appver] += 1
                     else:
-                        if not apkname in unknownapks:
-                            unknownapks.append(apkname)
-
-    # Calculate and write stats for total downloads...
-    lst = []
-    alldownloads = 0
-    for app, count in apps.iteritems():
-        lst.append(app + " " + str(count))
-        if config['stats_to_carbon']:
-            carbon_send('fdroid.download.' + app.replace('.', '_'), count)
-        alldownloads += count
-    lst.append("ALL " + str(alldownloads))
-    f = open('stats/total_downloads_app.txt', 'w')
-    f.write('# Total downloads by application, since October 2011\n')
-    for line in sorted(lst):
-        f.write(line + '\n')
-    f.close()
+                        if apkname not in today['unknown']:
+                            today['unknown'].append(apkname)
+
+                # Save calculated aggregate data for today to cache
+                with open(agg_path, 'w') as f:
+                    json.dump(today, f)
+
+            # Add today's stats (whether cached or recalculated) to the total
+            for appid in today['apps']:
+                appscount[appid] += today['apps'][appid]
+            for appid in today['appsver']:
+                appsvercount[appid] += today['appsver'][appid]
+            for uk in today['unknown']:
+                if uk not in unknownapks:
+                    unknownapks.append(uk)
+
+        # Calculate and write stats for total downloads...
+        lst = []
+        alldownloads = 0
+        for appid in appscount:
+            count = appscount[appid]
+            lst.append(appid + " " + str(count))
+            if config['stats_to_carbon']:
+                carbon_send('fdroid.download.' + appid.replace('.', '_'),
+                            count)
+            alldownloads += count
+        lst.append("ALL " + str(alldownloads))
+        f = open('stats/total_downloads_app.txt', 'w')
+        f.write('# Total downloads by application, since October 2011\n')
+        for line in sorted(lst):
+            f.write(line + '\n')
+        f.close()
+
+        f = open('stats/total_downloads_app_version.txt', 'w')
+        f.write('# Total downloads by application and version, '
+                'since October 2011\n')
+        lst = []
+        for appver in appsvercount:
+            count = appsvercount[appver]
+            lst.append(appver + " " + str(count))
+        for line in sorted(lst):
+            f.write(line + "\n")
+        f.close()
 
     # Calculate and write stats for repo types...
-    repotypes = {}
+    logging.info("Processing repo types...")
+    repotypes = Counter()
     for app in metaapps:
-        if len(app['Repo Type']) == 0:
-            rtype = 'none'
-        else:
-            if app['Repo Type'] == 'srclib':
-                rtype = common.getsrclibvcs(app['Repo'])
-            else:
-                rtype = app['Repo Type']
-        if rtype in repotypes:
-            repotypes[rtype] += 1;
-        else:
-            repotypes[rtype] = 1
+        rtype = app['Repo Type'] or 'none'
+        if rtype == 'srclib':
+            rtype = common.getsrclibvcs(app['Repo'])
+        repotypes[rtype] += 1
     f = open('stats/repotypes.txt', 'w')
-    for rtype, count in repotypes.iteritems():
+    for rtype in repotypes:
+        count = repotypes[rtype]
         f.write(rtype + ' ' + str(count) + '\n')
     f.close()
 
     # Calculate and write stats for update check modes...
-    ucms = {}
+    logging.info("Processing update check modes...")
+    ucms = Counter()
     for app in metaapps:
-        checkmode = app['Update Check Mode'].split('/')[0]
-        if checkmode in ucms:
-            ucms[checkmode] += 1;
-        else:
-            ucms[checkmode] = 1
+        checkmode = app['Update Check Mode']
+        if checkmode.startswith('RepoManifest/'):
+            checkmode = checkmode[:12]
+        if checkmode.startswith('Tags '):
+            checkmode = checkmode[:4]
+        ucms[checkmode] += 1
     f = open('stats/update_check_modes.txt', 'w')
-    for checkmode, count in ucms.iteritems():
+    for checkmode in ucms:
+        count = ucms[checkmode]
         f.write(checkmode + ' ' + str(count) + '\n')
     f.close()
 
-    ctgs = {}
+    logging.info("Processing categories...")
+    ctgs = Counter()
     for app in metaapps:
-        if app['Categories'] is None:
-            continue
-        categories = [c.strip() for c in app['Categories'].split(',')]
-        for category in categories:
-            if category in ctgs:
-                ctgs[category] += 1;
-            else:
-                ctgs[category] = 1
+        for category in app['Categories']:
+            ctgs[category] += 1
     f = open('stats/categories.txt', 'w')
-    for category, count in ctgs.iteritems():
+    for category in ctgs:
+        count = ctgs[category]
         f.write(category + ' ' + str(count) + '\n')
     f.close()
 
-    afs = {}
+    logging.info("Processing antifeatures...")
+    afs = Counter()
     for app in metaapps:
         if app['AntiFeatures'] is None:
             continue
         antifeatures = [a.strip() for a in app['AntiFeatures'].split(',')]
         for antifeature in antifeatures:
-            if antifeature in afs:
-                afs[antifeature] += 1;
-            else:
-                afs[antifeature] = 1
+            afs[antifeature] += 1
     f = open('stats/antifeatures.txt', 'w')
-    for antifeature, count in afs.iteritems():
+    for antifeature in afs:
+        count = afs[antifeature]
         f.write(antifeature + ' ' + str(count) + '\n')
     f.close()
-    return
 
     # Calculate and write stats for licenses...
-    licenses = {}
+    logging.info("Processing licenses...")
+    licenses = Counter()
     for app in metaapps:
         license = app['License']
-        if license in licenses:
-            licenses[license] += 1;
-        else:
-            licenses[license] = 1
+        licenses[license] += 1
     f = open('stats/licenses.txt', 'w')
-    for license, count in licenses.iteritems():
+    for license in licenses:
+        count = licenses[license]
         f.write(license + ' ' + str(count) + '\n')
     f.close()
 
     # Write list of latest apps added to the repo...
+    logging.info("Processing latest apps...")
     latest = knownapks.getlatest(10)
     f = open('stats/latestapps.txt', 'w')
     for app in latest:
         f.write(app + '\n')
     f.close()
 
-    if len(unknownapks) > 0:
-        print '\nUnknown apks:'
+    if unknownapks:
+        logging.info('\nUnknown apks:')
         for apk in unknownapks:
-            print apk
+            logging.info(apk)
 
-    print "Finished."
+    logging.info("Finished.")
 
 if __name__ == "__main__":
     main()
-