chiark / gitweb /
Switch all headers to python3
[fdroidserver.git] / fdroidserver / stats.py
index fab2a4a987a1e71ac5811587a9ec4f61e1997022..351390fbf77bbc4dca6fc8854d655efd825829f1 100644 (file)
@@ -1,5 +1,4 @@
-#!/usr/bin/env python2
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
 #
 # stats.py - part of the FDroid server tools
 # Copyright (C) 2010-13, Ciaran Gultnieks, ciaran@ciarang.com
@@ -24,14 +23,16 @@ import time
 import traceback
 import glob
 import json
-from optparse import OptionParser
+from argparse import ArgumentParser
 import paramiko
 import socket
 import logging
-import common, metadata
+import common
+import metadata
 import subprocess
 from collections import Counter
 
+
 def carbon_send(key, value):
     s = socket.socket()
     s.connect((config['carbon_host'], config['carbon_port']))
@@ -42,32 +43,39 @@ def carbon_send(key, value):
 options = None
 config = None
 
+
+def most_common_stable(counts):
+    pairs = []
+    for s in counts:
+        pairs.append((s, counts[s]))
+    return sorted(pairs, key=lambda t: (-t[1], t[0]))
+
+
 def main():
 
     global options, config
 
     # Parse command line...
-    parser = OptionParser()
-    parser.add_option("-v", "--verbose", action="store_true", default=False,
-                      help="Spew out even more information than normal")
-    parser.add_option("-q", "--quiet", action="store_true", default=False,
-                      help="Restrict output to warnings and errors")
-    parser.add_option("-d", "--download", action="store_true", default=False,
-                      help="Download logs we don't have")
-    parser.add_option("--recalc", action="store_true", default=False,
-                      help="Recalculate aggregate stats - use when changes have been made that would invalidate old cached data.")
-    parser.add_option("--nologs", action="store_true", default=False,
-                      help="Don't do anything logs-related")
-    (options, args) = parser.parse_args()
+    parser = ArgumentParser()
+    common.setup_global_opts(parser)
+    parser.add_argument("-d", "--download", action="store_true", default=False,
+                        help="Download logs we don't have")
+    parser.add_argument("--recalc", action="store_true", default=False,
+                        help="Recalculate aggregate stats - use when changes "
+                        "have been made that would invalidate old cached data.")
+    parser.add_argument("--nologs", action="store_true", default=False,
+                        help="Don't do anything logs-related")
+    options = parser.parse_args()
 
     config = common.read_config(options)
 
     if not config['update_stats']:
-        logging.info("Stats are disabled - check your configuration")
+        logging.info("Stats are disabled - set \"update_stats = True\" in your config.py")
         sys.exit(1)
 
     # Get all metadata-defined apps...
-    metaapps = metadata.read_metadata(options.verbose)
+    allmetaapps = [app for app in metadata.read_metadata().itervalues()]
+    metaapps = [app for app in allmetaapps if not app.Disabled]
 
     statsdir = 'stats'
     logsdir = os.path.join(statsdir, 'logs')
@@ -87,8 +95,8 @@ def main():
             logging.info('Retrieving logs')
             ssh = paramiko.SSHClient()
             ssh.load_system_host_keys()
-            ssh.connect('f-droid.org', username='fdroid', timeout=10,
-                    key_filename=config['webserver_keyfile'])
+            ssh.connect(config['stats_server'], username=config['stats_user'],
+                        timeout=10, key_filename=config['webserver_keyfile'])
             ftp = ssh.open_sftp()
             ftp.get_channel().settimeout(60)
             logging.info("...connected")
@@ -108,7 +116,7 @@ def main():
             traceback.print_exc()
             sys.exit(1)
         finally:
-            #Disconnect
+            # Disconnect
             if ftp is not None:
                 ftp.close()
             if ssh is not None:
@@ -122,9 +130,11 @@ def main():
         logging.info('Processing logs...')
         appscount = Counter()
         appsvercount = Counter()
-        logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] "GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) \d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
+        logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] ' + \
+            '"GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) ' + \
+            '\d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
         logsearch = re.compile(logexpr).search
-        for logfile in glob.glob(os.path.join(logsdir,'access-*.log.gz')):
+        for logfile in glob.glob(os.path.join(logsdir, 'access-*.log.gz')):
             logging.debug('...' + logfile)
 
             # Get the date for this log - e.g. 2012-02-28
@@ -140,28 +150,34 @@ def main():
                 # Calculate from logs...
 
                 today = {
-                        'apps': Counter(),
-                        'appsver': Counter(),
-                        'unknown': []
-                        }
+                    'apps': Counter(),
+                    'appsver': Counter(),
+                    'unknown': []
+                }
 
-                p = subprocess.Popen(["zcat", logfile], stdout = subprocess.PIPE)
+                p = subprocess.Popen(["zcat", logfile], stdout=subprocess.PIPE)
                 matches = (logsearch(line) for line in p.stdout)
                 for match in matches:
-                    if match and match.group('statuscode') == '200':
-                        uri = match.group('uri')
-                        if uri.endswith('.apk'):
-                            _, apkname = os.path.split(uri)
-                            app = knownapks.getapp(apkname)
-                            if app:
-                                appid, _ = app
-                                today['apps'][appid] += 1
-                                # Strip the '.apk' from apkname
-                                appver = apkname[:-4]
-                                today['appsver'][appver] += 1
-                            else:
-                                if not apkname in today['unknown']:
-                                    today['unknown'].append(apkname)
+                    if not match:
+                        continue
+                    if match.group('statuscode') != '200':
+                        continue
+                    if match.group('ip') in config['stats_ignore']:
+                        continue
+                    uri = match.group('uri')
+                    if not uri.endswith('.apk'):
+                        continue
+                    _, apkname = os.path.split(uri)
+                    app = knownapks.getapp(apkname)
+                    if app:
+                        appid, _ = app
+                        today['apps'][appid] += 1
+                        # Strip the '.apk' from apkname
+                        appver = apkname[:-4]
+                        today['appsver'][appver] += 1
+                    else:
+                        if apkname not in today['unknown']:
+                            today['unknown'].append(apkname)
 
                 # Save calculated aggregate data for today to cache
                 with open(agg_path, 'w') as f:
@@ -173,7 +189,7 @@ def main():
             for appid in today['appsver']:
                 appsvercount[appid] += today['appsver'][appid]
             for uk in today['unknown']:
-                if not uk in unknownapks:
+                if uk not in unknownapks:
                     unknownapks.append(uk)
 
         # Calculate and write stats for total downloads...
@@ -183,103 +199,95 @@ def main():
             count = appscount[appid]
             lst.append(appid + " " + str(count))
             if config['stats_to_carbon']:
-                carbon_send('fdroid.download.' + appid.replace('.', '_'), count)
+                carbon_send('fdroid.download.' + appid.replace('.', '_'),
+                            count)
             alldownloads += count
         lst.append("ALL " + str(alldownloads))
-        f = open('stats/total_downloads_app.txt', 'w')
-        f.write('# Total downloads by application, since October 2011\n')
-        for line in sorted(lst):
-            f.write(line + '\n')
-        f.close()
-
-        f = open('stats/total_downloads_app_version.txt', 'w')
-        f.write('# Total downloads by application and version, since October 2011\n')
+        with open(os.path.join(statsdir, 'total_downloads_app.txt'), 'w') as f:
+            f.write('# Total downloads by application, since October 2011\n')
+            for line in sorted(lst):
+                f.write(line + '\n')
+
         lst = []
         for appver in appsvercount:
             count = appsvercount[appver]
             lst.append(appver + " " + str(count))
-        for line in sorted(lst):
-            f.write(line + "\n")
-        f.close()
+
+        with open(os.path.join(statsdir, 'total_downloads_app_version.txt'), 'w') as f:
+            f.write('# Total downloads by application and version, '
+                    'since October 2011\n')
+            for line in sorted(lst):
+                f.write(line + "\n")
 
     # Calculate and write stats for repo types...
     logging.info("Processing repo types...")
     repotypes = Counter()
     for app in metaapps:
-        if len(app['Repo Type']) == 0:
-            rtype = 'none'
-        else:
-            if app['Repo Type'] == 'srclib':
-                rtype = common.getsrclibvcs(app['Repo'])
-            else:
-                rtype = app['Repo Type']
+        rtype = app.RepoType or 'none'
+        if rtype == 'srclib':
+            rtype = common.getsrclibvcs(app.Repo)
         repotypes[rtype] += 1
-    f = open('stats/repotypes.txt', 'w')
-    for rtype in repotypes:
-        count = repotypes[rtype]
-        f.write(rtype + ' ' + str(count) + '\n')
-    f.close()
+    with open(os.path.join(statsdir, 'repotypes.txt'), 'w') as f:
+        for rtype, count in most_common_stable(repotypes):
+            f.write(rtype + ' ' + str(count) + '\n')
 
     # Calculate and write stats for update check modes...
     logging.info("Processing update check modes...")
     ucms = Counter()
     for app in metaapps:
-        checkmode = app['Update Check Mode']
+        checkmode = app.UpdateCheckMode
         if checkmode.startswith('RepoManifest/'):
             checkmode = checkmode[:12]
         if checkmode.startswith('Tags '):
             checkmode = checkmode[:4]
         ucms[checkmode] += 1
-    f = open('stats/update_check_modes.txt', 'w')
-    for checkmode in ucms:
-        count = ucms[checkmode]
-        f.write(checkmode + ' ' + str(count) + '\n')
-    f.close()
+    with open(os.path.join(statsdir, 'update_check_modes.txt'), 'w') as f:
+        for checkmode, count in most_common_stable(ucms):
+            f.write(checkmode + ' ' + str(count) + '\n')
 
     logging.info("Processing categories...")
     ctgs = Counter()
     for app in metaapps:
-        for category in app['Categories']:
+        for category in app.Categories:
             ctgs[category] += 1
-    f = open('stats/categories.txt', 'w')
-    for category in ctgs:
-        count = ctgs[category]
-        f.write(category + ' ' + str(count) + '\n')
-    f.close()
+    with open(os.path.join(statsdir, 'categories.txt'), 'w') as f:
+        for category, count in most_common_stable(ctgs):
+            f.write(category + ' ' + str(count) + '\n')
 
     logging.info("Processing antifeatures...")
     afs = Counter()
     for app in metaapps:
-        if app['AntiFeatures'] is None:
+        if app.AntiFeatures is None:
             continue
-        antifeatures = [a.strip() for a in app['AntiFeatures'].split(',')]
-        for antifeature in antifeatures:
+        for antifeature in app.AntiFeatures:
             afs[antifeature] += 1
-    f = open('stats/antifeatures.txt', 'w')
-    for antifeature in afs:
-        count = afs[antifeature]
-        f.write(antifeature + ' ' + str(count) + '\n')
-    f.close()
+    with open(os.path.join(statsdir, 'antifeatures.txt'), 'w') as f:
+        for antifeature, count in most_common_stable(afs):
+            f.write(antifeature + ' ' + str(count) + '\n')
 
     # Calculate and write stats for licenses...
     logging.info("Processing licenses...")
     licenses = Counter()
     for app in metaapps:
-        license = app['License']
+        license = app.License
         licenses[license] += 1
-    f = open('stats/licenses.txt', 'w')
-    for license in licenses:
-        count = licenses[license]
-        f.write(license + ' ' + str(count) + '\n')
-    f.close()
+    with open(os.path.join(statsdir, 'licenses.txt'), 'w') as f:
+        for license, count in most_common_stable(licenses):
+            f.write(license + ' ' + str(count) + '\n')
+
+    # Write list of disabled apps...
+    logging.info("Processing disabled apps...")
+    disabled = [app.id for app in allmetaapps if app.Disabled]
+    with open(os.path.join(statsdir, 'disabled_apps.txt'), 'w') as f:
+        for appid in sorted(disabled):
+            f.write(appid + '\n')
 
     # Write list of latest apps added to the repo...
     logging.info("Processing latest apps...")
     latest = knownapks.getlatest(10)
-    f = open('stats/latestapps.txt', 'w')
-    for app in latest:
-        f.write(app + '\n')
-    f.close()
+    with open(os.path.join(statsdir, 'latestapps.txt'), 'w') as f:
+        for appid in latest:
+            f.write(appid + '\n')
 
     if unknownapks:
         logging.info('\nUnknown apks:')
@@ -290,4 +298,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-