summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan McGee <dan@archlinux.org>2010-12-13 13:40:01 -0600
committerDan McGee <dan@archlinux.org>2010-12-13 13:40:01 -0600
commit7a320edae13ccf7db48f44137a5d20a1bffe90a5 (patch)
tree1049b342f66bd4467f51b5b3592ada6c8bd9fd3e
parent1553e2b5c93f1a8566af4ed07630910fce6bc3b1 (diff)
downloadarchweb-7a320edae13ccf7db48f44137a5d20a1bffe90a5.tar.gz
archweb-7a320edae13ccf7db48f44137a5d20a1bffe90a5.zip
Move database interaction out of the threaded section
We were seeing a lot of hangs and long-running never-ending processes. This might be due to some multithreading issues within Django, so move the save() calls to a loop after the join() on the threads doing the mirror polling. Signed-off-by: Dan McGee <dan@archlinux.org>
-rw-r--r--mirrors/management/commands/mirrorcheck.py36
1 files changed, 22 insertions, 14 deletions
diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py
index bab79219..d7aad4e2 100644
--- a/mirrors/management/commands/mirrorcheck.py
+++ b/mirrors/management/commands/mirrorcheck.py
@@ -10,7 +10,6 @@ Usage: ./manage.py mirrorcheck
"""
from django.core.management.base import NoArgsCommand
-from django.db.models import Q
from datetime import datetime, timedelta
import logging
@@ -51,10 +50,10 @@ class Command(NoArgsCommand):
return check_current_mirrors()
-def parse_rfc3339_datetime(time):
+def parse_rfc3339_datetime(time_string):
# '2010-09-02 11:05:06+02:00'
m = re.match('^(\d{4})-(\d{2})-(\d{2}) '
- '(\d{2}):(\d{2}):(\d{2})([-+])(\d{2}):(\d{2})', time)
+ '(\d{2}):(\d{2}):(\d{2})([-+])(\d{2}):(\d{2})', time_string)
if m:
vals = m.groups()
parsed = datetime(int(vals[0]), int(vals[1]), int(vals[2]),
@@ -123,46 +122,55 @@ def check_mirror_url(mirror_url):
log.error = "Connection timed out."
logger.debug("failed: %s, %s" % (url, log.error))
- log.save()
return log
-def mirror_url_worker(queue):
+def mirror_url_worker(work, output):
while True:
try:
- item = queue.get(block=False)
+ item = work.get(block=False)
try:
- check_mirror_url(item)
+ log = check_mirror_url(item)
+ output.put(log)
finally:
- queue.task_done()
+ work.task_done()
except Empty:
return 0
class MirrorCheckPool(object):
def __init__(self, work, num_threads=10):
self.tasks = Queue()
- for i in work:
+ self.logs = Queue()
+ for i in list(work):
self.tasks.put(i)
self.threads = []
for i in range(num_threads):
- thread = Thread(target=mirror_url_worker, args=(self.tasks,))
+ thread = Thread(target=mirror_url_worker,
+ args=(self.tasks, self.logs))
thread.daemon = True
self.threads.append(thread)
- def run_and_join(self):
+ def run(self):
logger.debug("starting threads")
for t in self.threads:
t.start()
logger.debug("joining on all threads")
self.tasks.join()
+ logger.debug("processing log entries")
+ try:
+ while True:
+ log = self.logs.get(block=False)
+ log.save()
+ self.logs.task_done()
+ except Empty:
+ logger.debug("all log items saved to database")
def check_current_mirrors():
urls = MirrorUrl.objects.filter(
- Q(protocol__protocol__iexact='HTTP') |
- Q(protocol__protocol__iexact='FTP'),
+ protocol__is_download=True,
mirror__active=True, mirror__public=True)
pool = MirrorCheckPool(urls)
- pool.run_and_join()
+ pool.run()
return 0
# For lack of a better place to put it, here is a query to get latest check