summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan McGee <dan@archlinux.org>2012-05-13 20:15:00 -0500
committerDan McGee <dan@archlinux.org>2012-05-13 20:19:03 -0500
commit2f7d770b261b3428bcff366ba6ff4fa631dd980a (patch)
tree8d4b0ec70be3c86792a5a57f1037ee69d967e2fc
parent3b0b9012353d5ffda564998cab58f986770361be (diff)
downloadarchweb-2f7d770b261b3428bcff366ba6ff4fa631dd980a.tar.gz
archweb-2f7d770b261b3428bcff366ba6ff4fa631dd980a.zip
Add rsync support to mirrorcheck and other small improvements
The main changes in this patch implement rsync:// protocol checking support by calling the rsync binary, requested in FS#29878. We track and log much of the same things as we already do for FTP and HTTP URLs- check time, last sync, total check duration, etc. Also added in this patch is a configurable timeout value which defaults to the previous hardcoded value of 10 seconds; this can be passed as an option to the mirrorcheck command. Signed-off-by: Dan McGee <dan@archlinux.org>
-rw-r--r--mirrors/management/commands/mirrorcheck.py130
-rw-r--r--templates/mirrors/status.html4
2 files changed, 91 insertions, 43 deletions
diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py
index c1269226..ae89d5e0 100644
--- a/mirrors/management/commands/mirrorcheck.py
+++ b/mirrors/management/commands/mirrorcheck.py
@@ -9,22 +9,26 @@ we encounter errors, record those as well.
Usage: ./manage.py mirrorcheck
"""
-from django.core.management.base import NoArgsCommand
-from django.db import transaction
-
from collections import deque
from datetime import datetime
import logging
+import os
+from optparse import make_option
+from pytz import utc
import re
import socket
+import subprocess
import sys
import time
+import tempfile
from threading import Thread
import types
-from pytz import utc
from Queue import Queue, Empty
import urllib2
+from django.core.management.base import NoArgsCommand
+from django.db import transaction
+
from main.utils import utc_now
from mirrors.models import MirrorUrl, MirrorLog
@@ -37,10 +41,15 @@ logger = logging.getLogger()
class Command(NoArgsCommand):
+ option_list = NoArgsCommand.option_list + (
+ make_option('-t', '--timeout', dest='timeout', default='10',
+ help='Timeout value for connecting to URL'),
+ )
help = "Runs a check on all known mirror URLs to determine their up-to-date status."
def handle_noargs(self, **options):
v = int(options.get('verbosity', 0))
+ timeout = int(options.get('timeout', 10))
if v == 0:
logger.level = logging.ERROR
elif v == 1:
@@ -48,10 +57,29 @@ class Command(NoArgsCommand):
elif v == 2:
logger.level = logging.DEBUG
- return check_current_mirrors()
+ urls = MirrorUrl.objects.select_related('protocol').filter(
+ mirror__active=True, mirror__public=True)
+
+ pool = MirrorCheckPool(urls, timeout)
+ pool.run()
+ return 0
-def check_mirror_url(mirror_url):
+def parse_lastsync(log, data):
+ '''lastsync file should be an epoch value created by us.'''
+ try:
+ parsed_time = datetime.utcfromtimestamp(int(data))
+ log.last_sync = parsed_time.replace(tzinfo=utc)
+ except ValueError:
+ # it is bad news to try logging the lastsync value;
+ # sometimes we get a crazy-encoded web page.
+ # if we couldn't parse a time, this is a failure.
+ log.last_sync = None
+ log.error = "Could not parse time from lastsync"
+ log.is_success = False
+
+
+def check_mirror_url(mirror_url, timeout):
url = mirror_url.url + 'lastsync'
logger.info("checking URL %s", url)
log = MirrorLog(url=mirror_url, check_time=utc_now())
@@ -59,28 +87,14 @@ def check_mirror_url(mirror_url):
req = urllib2.Request(url, None, headers)
try:
start = time.time()
- result = urllib2.urlopen(req, timeout=10)
+ result = urllib2.urlopen(req, timeout=timeout)
data = result.read()
result.close()
end = time.time()
- # lastsync should be an epoch value created by us
- parsed_time = None
- try:
- parsed_time = datetime.utcfromtimestamp(int(data))
- parsed_time = parsed_time.replace(tzinfo=utc)
- except ValueError:
- # it is bad news to try logging the lastsync value;
- # sometimes we get a crazy-encoded web page.
- pass
-
- log.last_sync = parsed_time
- # if we couldn't parse a time, this is a failure
- if parsed_time is None:
- log.error = "Could not parse time from lastsync"
- log.is_success = False
+ parse_lastsync(log, data)
log.duration = end - start
logger.debug("success: %s, %.2f", url, log.duration)
- except urllib2.HTTPError, e:
+ except urllib2.HTTPError as e:
if e.code == 404:
# we have a duration, just not a success
end = time.time()
@@ -88,7 +102,7 @@ def check_mirror_url(mirror_url):
log.is_success = False
log.error = str(e)
logger.debug("failed: %s, %s", url, log.error)
- except urllib2.URLError, e:
+ except urllib2.URLError as e:
log.is_success = False
log.error = e.reason
if isinstance(e.reason, types.StringTypes) and \
@@ -101,20 +115,64 @@ def check_mirror_url(mirror_url):
elif isinstance(e.reason, socket.error):
log.error = e.reason.args[1]
logger.debug("failed: %s, %s", url, log.error)
- except socket.timeout, e:
+ except socket.timeout as e:
log.is_success = False
log.error = "Connection timed out."
logger.debug("failed: %s, %s", url, log.error)
+ except socket.error as e:
+ log.is_success = False
+ log.error = str(e)
+ logger.debug("failed: %s, %s", url, log.error)
+
+ return log
+
+
+def check_rsync_url(mirror_url, timeout):
+ url = mirror_url.url + 'lastsync'
+ logger.info("checking URL %s", url)
+ log = MirrorLog(url=mirror_url, check_time=utc_now())
+
+ tempdir = tempfile.mkdtemp()
+ lastsync_path = os.path.join(tempdir, 'lastsync')
+ rsync_cmd = ["rsync", "--quiet", "--contimeout=%d" % timeout,
+ "--timeout=%d" % timeout, url, lastsync_path]
+ try:
+ with open(os.devnull, 'w') as devnull:
+ proc = subprocess.Popen(rsync_cmd, stdout=devnull,
+ stderr=subprocess.PIPE)
+ start = time.time()
+ _, errdata = proc.communicate()
+ end = time.time()
+ log.duration = end - start
+ if proc.returncode != 0:
+ logger.debug("error: %s, %s", url, errdata)
+ log.is_success = False
+ log.error = errdata.strip()
+ # look at rsync error code- if we had a command error or timed out,
+ # don't record a duration as it is misleading
+ if proc.returncode in (1, 30, 35):
+ log.duration = None
+ else:
+ logger.debug("success: %s, %.2f", url, log.duration)
+ with open(lastsync_path, 'r') as lastsync:
+ parse_lastsync(log, lastsync.read())
+ finally:
+ if os.path.exists(lastsync_path):
+ os.unlink(lastsync_path)
+ os.rmdir(tempdir)
return log
-def mirror_url_worker(work, output):
+def mirror_url_worker(work, output, timeout):
while True:
try:
- item = work.get(block=False)
+ url = work.get(block=False)
try:
- log = check_mirror_url(item)
+ if url.protocol.protocol == 'rsync':
+ log = check_rsync_url(url, timeout)
+ else:
+ log = check_mirror_url(url, timeout)
output.append(log)
finally:
work.task_done()
@@ -123,7 +181,7 @@ def mirror_url_worker(work, output):
class MirrorCheckPool(object):
- def __init__(self, urls, num_threads=10):
+ def __init__(self, urls, timeout=10, num_threads=10):
self.tasks = Queue()
self.logs = deque()
for i in list(urls):
@@ -131,7 +189,7 @@ class MirrorCheckPool(object):
self.threads = []
for i in range(num_threads):
thread = Thread(target=mirror_url_worker,
- args=(self.tasks, self.logs))
+ args=(self.tasks, self.logs, timeout))
thread.daemon = True
self.threads.append(thread)
@@ -142,18 +200,8 @@ class MirrorCheckPool(object):
thread.start()
logger.debug("joining on all threads")
self.tasks.join()
- logger.debug("processing log entries")
+ logger.debug("processing %d log entries", len(self.logs))
MirrorLog.objects.bulk_create(self.logs)
logger.debug("log entries saved")
-
-def check_current_mirrors():
- urls = MirrorUrl.objects.filter(
- protocol__is_download=True,
- mirror__active=True, mirror__public=True)
-
- pool = MirrorCheckPool(urls)
- pool.run()
- return 0
-
# vim: set ts=4 sw=4 et:
diff --git a/templates/mirrors/status.html b/templates/mirrors/status.html
index 34896c07..2c350f56 100644
--- a/templates/mirrors/status.html
+++ b/templates/mirrors/status.html
@@ -16,8 +16,8 @@
has synced recently. This page contains several pieces of information about
each mirror.</p>
<ul>
- <li><em>Mirror URL:</em> Mirrors are checked on a per-URL basis. If
- both FTP and HTTP access are provided, both will be listed here.</li>
+ <li><em>Mirror URL:</em> Mirrors are checked on a per-URL basis. All
+ available URLs and protocols for each known mirror are listed.</li>
<li><em>Completion %:</em> The number of mirror checks that have
successfully connected and disconnected from the given URL. If this is
below 100%, the mirror may be unreliable.</li>