From 0dba93a9fc87d06cfb1e63e9f6a28f589819c38f Mon Sep 17 00:00:00 2001
From: Dan McGee <dan@archlinux.org>
Date: Wed, 22 Sep 2010 13:28:12 -0500
Subject: Switch mirror status delay display to average delay

This takes a bit more work to compute, but since we cache all of this anyway
it isn't too big of deal. Using average delay instead of last delay will be
a bit more fair on mirrors that have odd syncing schedules, as well as
exposing those that only sync once a day. Also fix an issue that will arise
with cutoff_time being calculated once, and adjust mirror score to treat
hours delay as a float rather than an integer.

Signed-off-by: Dan McGee <dan@archlinux.org>
---
 mirrors/utils.py                    | 25 +++++++++++++++++++------
 templates/mirrors/status.html       |  9 ++++++---
 templates/mirrors/status_table.html |  4 ++--
 3 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/mirrors/utils.py b/mirrors/utils.py
index bbbaca26..5c8bff3b 100644
--- a/mirrors/utils.py
+++ b/mirrors/utils.py
@@ -5,10 +5,10 @@ from .models import MirrorLog, MirrorProtocol, MirrorUrl
 
 import datetime
 
-cutoff_time = datetime.datetime.utcnow() - datetime.timedelta(hours=24)
 
 @cache_function(300)
 def get_mirror_statuses():
+    cutoff_time = datetime.datetime.utcnow() - datetime.timedelta(hours=24)
     protocols = MirrorProtocol.objects.exclude(protocol__iexact='rsync')
     # I swear, this actually has decent performance...
     urls = MirrorUrl.objects.select_related(
@@ -18,15 +18,27 @@ def get_mirror_statuses():
             logs__check_time__gte=cutoff_time).annotate(
             check_count=Count('logs'), last_sync=Max('logs__last_sync'),
             last_check=Max('logs__check_time'),
-            duration_avg=Avg('logs__duration'), duration_min=Min('logs__duration'),
-            duration_max=Max('logs__duration'), duration_stddev=StdDev('logs__duration')
+            duration_avg=Avg('logs__duration'),
+            duration_stddev=StdDev('logs__duration')
             ).order_by('-last_sync', '-duration_avg')
 
+    # The Django ORM makes it really hard to get actual average delay in the
+    # above query, so run a seperate query for it and we will process the
+    # results here.
+    times = MirrorLog.objects.filter(is_success=True, last_sync__isnull=False,
+            check_time__gte=cutoff_time)
+    delays = {}
+    for log in times:
+        d = log.check_time - log.last_sync
+        delays.setdefault(log.url_id, []).append(d)
+
     for url in urls:
-        if url.last_check and url.last_sync:
-            d = url.last_check - url.last_sync
+        if url.id in delays:
+            url_delays = delays[url.id]
+            d = sum(url_delays, datetime.timedelta()) / len(url_delays)
             url.delay = d
-            url.score = d.days * 24 + d.seconds / 3600 + url.duration_avg + url.duration_stddev
+            hours = d.days * 24.0 + d.seconds / 3600.0
+            url.score = hours + url.duration_avg + url.duration_stddev
         else:
             url.delay = None
             url.score = None
@@ -34,6 +46,7 @@ def get_mirror_statuses():
 
 @cache_function(300)
 def get_mirror_errors():
+    cutoff_time = datetime.datetime.utcnow() - datetime.timedelta(hours=24)
     errors = MirrorLog.objects.filter(
             is_success=False, check_time__gte=cutoff_time).values(
             'url__url', 'url__protocol__protocol', 'url__mirror__country',
diff --git a/templates/mirrors/status.html b/templates/mirrors/status.html
index 1e7e38a5..d89f2b41 100644
--- a/templates/mirrors/status.html
+++ b/templates/mirrors/status.html
@@ -20,8 +20,9 @@
         <tt>lastsync</tt> file on the mirror. If this file could not be
         retrieved or contained data we didn't recognize, this column will show
         'unknown'.</li>
-        <li><em>Delay:</em> The calculated mirroring delay; e.g. <code>last
-            check − last sync</code>.</li>
+        <li><em>μ Delay:</em> The calculated average mirroring delay; e.g. the
+        mean value of <code>last check − last sync</code> for each check of
+        this mirror URL.</li>
         <li><em>μ Duration:</em> The average (mean) time it took to connect and
         retrieve the <tt>lastsync</tt> file from the given URL. Note that this
         connection time is from the location of the Arch server; your geography
@@ -92,8 +93,10 @@
 <script type="text/javascript" src="/media/jquery.tablesorter.min.js"></script>
 <script type="text/javascript">
 $(document).ready(function() {
-    $("#outofsync_mirrors").add("#successful_mirrors").tablesorter(
+    $("#outofsync_mirrors").tablesorter(
         {widgets: ['zebra'], sortList: [[3,1], [5,1]]});
+    $("#successful_mirrors").tablesorter(
+        {widgets: ['zebra'], sortList: [[7,0]]});
     $("#errorlog_mirrors").tablesorter(
         {widgets: ['zebra'], sortList: [[4,1], [5,1]]});
 });
diff --git a/templates/mirrors/status_table.html b/templates/mirrors/status_table.html
index 90bbf6b6..75157765 100644
--- a/templates/mirrors/status_table.html
+++ b/templates/mirrors/status_table.html
@@ -6,7 +6,7 @@
             <th>Protocol</th>
             <th>Country</th>
             <th>Last Sync</th>
-            <th>Delay (hh:mm)</th>
+            <th>μ Delay (hh:mm)</th>
             <th>μ Duration (secs)</th>
             <th>σ Duration (secs)</th>
             <th>Mirror Score</th>
@@ -22,7 +22,7 @@
             <td>{{ m_url.delay|duration|default:'unknown' }}</td>
             <td>{{ m_url.duration_avg|floatformat:2|default:'unknown' }}</td>
             <td>{{ m_url.duration_stddev|floatformat:2|default:'unknown' }}</td>
-            <td>{{ m_url.score|floatformat:1|default:'unknown' }}</td>
+            <td>{{ m_url.score|floatformat:1|default:'∞' }}</td>
         </tr>
         {% endfor %}
     </tbody>
-- 
cgit v1.2.3-55-g3dc8