From a1b14a417342116ac31ec937ef1f4dea91ce62f4 Mon Sep 17 00:00:00 2001 From: Jelle van der Waa Date: Sun, 22 Jul 2018 18:39:00 +0200 Subject: Implement cleaning up older log entries in mirrorcheck MirrorLog entries are not cleaned up by default and will clog the database. The django settings now defines a retention period in days for how long to keep mirror logs, on every mirrorcheck run older logs will be removed from the database. --- mirrors/management/commands/mirrorcheck.py | 11 +++++- mirrors/tests/__init__.py | 2 +- mirrors/tests/test_mirrorcheck.py | 58 ++++++++++++++++++++++++++++++ settings.py | 3 ++ 4 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 mirrors/tests/test_mirrorcheck.py diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index ad6c3001..c78ad7b7 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -10,7 +10,7 @@ Usage: ./manage.py mirrorcheck """ from collections import deque -from datetime import datetime +from datetime import datetime, timedelta from httplib import HTTPException import logging import os @@ -29,6 +29,7 @@ import urllib2 from django.core.management.base import BaseCommand from django.db import transaction +from django.conf import settings from django.utils.timezone import now from mirrors.models import MirrorUrl, MirrorLog, CheckLocation @@ -84,6 +85,7 @@ class Command(BaseCommand): pool = MirrorCheckPool(urls, location, timeout) pool.run() + pool.cleanup() return 0 @@ -266,4 +268,11 @@ class MirrorCheckPool(object): MirrorLog.objects.bulk_create(self.logs) logger.debug("log entries saved") + def cleanup(self): + days = getattr(settings, 'MIRRORLOG_RETENTION_PERIOD', 365) + removal_date = now() - timedelta(days=days) + logger.info("cleaning up older MirrorLog objects then %s", removal_date.strftime('%Y-%m-%d')) + MirrorLog.objects.filter(check_time__lt=removal_date).delete() + logger.info('Finished cleaning up old MirrorLog objects') + # vim: set ts=4 sw=4 et: diff --git a/mirrors/tests/__init__.py b/mirrors/tests/__init__.py index a1d3c2c9..4ea5e1fb 100644 --- a/mirrors/tests/__init__.py +++ b/mirrors/tests/__init__.py @@ -2,7 +2,7 @@ from mirrors.models import MirrorUrl, MirrorProtocol, Mirror def create_mirror_url(name='mirror1', country='US', - protocol='http', url='https://archlinux.org'): + protocol='http', url='https://archlinux.org/'): mirror = Mirror.objects.create(name=name, admin_email='admin@archlinux.org') mirror_protocol = MirrorProtocol.objects.create(protocol=protocol) diff --git a/mirrors/tests/test_mirrorcheck.py b/mirrors/tests/test_mirrorcheck.py new file mode 100644 index 00000000..1f22d1c5 --- /dev/null +++ b/mirrors/tests/test_mirrorcheck.py @@ -0,0 +1,58 @@ +import mock +import time + + +from django.utils.timezone import now +from datetime import timedelta + + +from django.test import TestCase +from django.core.management import call_command + + +from mirrors.tests import create_mirror_url +from mirrors.models import MirrorLog + + +class MirrorCheckTest(TestCase): + def setUp(self): + self.mirror_url = create_mirror_url() + + def tearDown(self): + self.mirror_url.delete() + + @mock.patch('urllib2.Request') + @mock.patch('urllib2.urlopen') + def test_invalid(self, urlopen, Request): + urlopen.return_value.read.return_value = 'data' + Request.get_host.return_value = 'archlinux.org' + Request.type.return_value = 'https' + + call_command('mirrorcheck') + mirrorlog = MirrorLog.objects.first() + self.assertNotEqual(mirrorlog.error, '') + self.assertEqual(mirrorlog.is_success, False) + + @mock.patch('urllib2.Request') + @mock.patch('urllib2.urlopen') + def test_valid(self, urlopen, Request): + urlopen.return_value.read.return_value = str(int(time.time())) + Request.get_host.return_value = 'archlinux.org' + Request.type.return_value = 'https' + + call_command('mirrorcheck') + mirrorlog = MirrorLog.objects.first() + self.assertEqual(mirrorlog.error, '') + self.assertEqual(mirrorlog.is_success, True) + + @mock.patch('urllib2.Request') + @mock.patch('urllib2.urlopen') + def test_valid(self, urlopen, Request): + urlopen.return_value.read.return_value = str(int(time.time())) + Request.get_host.return_value = 'archlinux.org' + Request.type.return_value = 'https' + + date = now() - timedelta(days=600) + MirrorLog.objects.create(url=self.mirror_url, check_time=date) + call_command('mirrorcheck') + self.assertEqual(len(MirrorLog.objects.all()), 1) diff --git a/settings.py b/settings.py index df447b3e..6022d572 100644 --- a/settings.py +++ b/settings.py @@ -145,6 +145,9 @@ TORRENT_TRACKERS = ( 'http://tracker.archlinux.org:6969/announce', ) +# How long to keep mirrorlog's in days +MIRRORLOG_RETENTION_PERIOD = 365 + # Shorten some names just a bit COUNTRIES_OVERRIDE = { 'GB': 'United Kingdom', -- cgit v1.2.3-55-g3dc8