From 0dba93a9fc87d06cfb1e63e9f6a28f589819c38f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 22 Sep 2010 13:28:12 -0500 Subject: Switch mirror status delay display to average delay This takes a bit more work to compute, but since we cache all of this anyway it isn't too big of deal. Using average delay instead of last delay will be a bit more fair on mirrors that have odd syncing schedules, as well as exposing those that only sync once a day. Also fix an issue that will arise with cutoff_time being calculated once, and adjust mirror score to treat hours delay as a float rather than an integer. Signed-off-by: Dan McGee --- mirrors/utils.py | 25 +++++++++++++++++++------ templates/mirrors/status.html | 9 ++++++--- templates/mirrors/status_table.html | 4 ++-- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/mirrors/utils.py b/mirrors/utils.py index bbbaca26..5c8bff3b 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -5,10 +5,10 @@ from .models import MirrorLog, MirrorProtocol, MirrorUrl import datetime -cutoff_time = datetime.datetime.utcnow() - datetime.timedelta(hours=24) @cache_function(300) def get_mirror_statuses(): + cutoff_time = datetime.datetime.utcnow() - datetime.timedelta(hours=24) protocols = MirrorProtocol.objects.exclude(protocol__iexact='rsync') # I swear, this actually has decent performance... urls = MirrorUrl.objects.select_related( @@ -18,15 +18,27 @@ def get_mirror_statuses(): logs__check_time__gte=cutoff_time).annotate( check_count=Count('logs'), last_sync=Max('logs__last_sync'), last_check=Max('logs__check_time'), - duration_avg=Avg('logs__duration'), duration_min=Min('logs__duration'), - duration_max=Max('logs__duration'), duration_stddev=StdDev('logs__duration') + duration_avg=Avg('logs__duration'), + duration_stddev=StdDev('logs__duration') ).order_by('-last_sync', '-duration_avg') + # The Django ORM makes it really hard to get actual average delay in the + # above query, so run a seperate query for it and we will process the + # results here. + times = MirrorLog.objects.filter(is_success=True, last_sync__isnull=False, + check_time__gte=cutoff_time) + delays = {} + for log in times: + d = log.check_time - log.last_sync + delays.setdefault(log.url_id, []).append(d) + for url in urls: - if url.last_check and url.last_sync: - d = url.last_check - url.last_sync + if url.id in delays: + url_delays = delays[url.id] + d = sum(url_delays, datetime.timedelta()) / len(url_delays) url.delay = d - url.score = d.days * 24 + d.seconds / 3600 + url.duration_avg + url.duration_stddev + hours = d.days * 24.0 + d.seconds / 3600.0 + url.score = hours + url.duration_avg + url.duration_stddev else: url.delay = None url.score = None @@ -34,6 +46,7 @@ def get_mirror_statuses(): @cache_function(300) def get_mirror_errors(): + cutoff_time = datetime.datetime.utcnow() - datetime.timedelta(hours=24) errors = MirrorLog.objects.filter( is_success=False, check_time__gte=cutoff_time).values( 'url__url', 'url__protocol__protocol', 'url__mirror__country', diff --git a/templates/mirrors/status.html b/templates/mirrors/status.html index 1e7e38a5..d89f2b41 100644 --- a/templates/mirrors/status.html +++ b/templates/mirrors/status.html @@ -20,8 +20,9 @@ lastsync file on the mirror. If this file could not be retrieved or contained data we didn't recognize, this column will show 'unknown'. -
  • Delay: The calculated mirroring delay; e.g. last - check − last sync.
  • +
  • μ Delay: The calculated average mirroring delay; e.g. the + mean value of last check − last sync for each check of + this mirror URL.
  • μ Duration: The average (mean) time it took to connect and retrieve the lastsync file from the given URL. Note that this connection time is from the location of the Arch server; your geography @@ -92,8 +93,10 @@