From 77842a6c76095277b024505708bf528d455b9c89 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 7 Apr 2011 16:52:52 -0500 Subject: Consolidate caching black magic Get the stuff used to retrieve and refresh the latest date values all in the same place, and make it a bit more beautiful by refactoring it all into a common set of methods. Signed-off-by: Dan McGee --- feeds.py | 44 +++++--------------------------------------- main/models.py | 7 +++---- main/utils.py | 34 +++++++++++++++++++++++++--------- news/models.py | 4 ++-- 4 files changed, 35 insertions(+), 54 deletions(-) diff --git a/feeds.py b/feeds.py index 0be12531..269d0a38 100644 --- a/feeds.py +++ b/feeds.py @@ -2,15 +2,13 @@ import pytz from django.contrib.sites.models import Site from django.contrib.syndication.views import Feed -from django.core.cache import cache from django.db.models import Q from django.utils.feedgenerator import Rss201rev2Feed from django.utils.hashcompat import md5_constructor from django.views.decorators.http import condition +from main.utils import retrieve_latest from main.models import Arch, Repo, Package -from main.utils import CACHE_TIMEOUT -from main.utils import CACHE_PACKAGE_KEY, CACHE_NEWS_KEY from news.models import News def check_for_unique_id(f): @@ -31,32 +29,14 @@ class GuidNotPermalinkFeed(Rss201rev2Feed): super(GuidNotPermalinkFeed, self).write_items(handler) -def retrieve_package_latest(): - # we could break this down based on the request url, but it would probably - # cost us more in query time to do so. - latest = cache.get(CACHE_PACKAGE_KEY) - if latest: - return latest - try: - latest = Package.objects.values('last_update').latest( - 'last_update')['last_update'] - # Using add means "don't overwrite anything in there". What could be in - # there is an explicit None value that our refresh signal set, which - # means we want to avoid race condition possibilities for a bit. - cache.add(CACHE_PACKAGE_KEY, latest, CACHE_TIMEOUT) - return latest - except Package.DoesNotExist: - pass - return None - def package_etag(request, *args, **kwargs): - latest = retrieve_package_latest() + latest = retrieve_latest(Package) if latest: return md5_constructor(str(kwargs) + str(latest)).hexdigest() return None def package_last_modified(request, *args, **kwargs): - return retrieve_package_latest() + return retrieve_latest(Package) class PackageFeed(Feed): feed_type = GuidNotPermalinkFeed @@ -125,28 +105,14 @@ class PackageFeed(Feed): return (item.repo.name, item.arch.name) -def retrieve_news_latest(): - latest = cache.get(CACHE_NEWS_KEY) - if latest: - return latest - try: - latest = News.objects.values('last_modified').latest( - 'last_modified')['last_modified'] - # same thoughts apply as in retrieve_package_latest - cache.add(CACHE_NEWS_KEY, latest, CACHE_TIMEOUT) - return latest - except News.DoesNotExist: - pass - return None - def news_etag(request, *args, **kwargs): - latest = retrieve_news_latest() + latest = retrieve_latest(News) if latest: return md5_constructor(str(latest)).hexdigest() return None def news_last_modified(request, *args, **kwargs): - return retrieve_news_latest() + return retrieve_latest(News) class NewsFeed(Feed): feed_type = GuidNotPermalinkFeed diff --git a/main/models.py b/main/models.py index 8d34731f..38120134 100644 --- a/main/models.py +++ b/main/models.py @@ -128,8 +128,7 @@ class Package(models.Model): class Meta: db_table = 'packages' ordering = ('pkgname',) - #get_latest_by = 'last_update' - #ordering = ('-last_update',) + get_latest_by = 'last_update' def __unicode__(self): return self.pkgname @@ -390,10 +389,10 @@ def set_todolist_fields(sender, **kwargs): todolist.date_added = datetime.utcnow() # connect signals needed to keep cache in line with reality -from main.utils import refresh_package_latest +from main.utils import refresh_latest from django.db.models.signals import pre_save, post_save -post_save.connect(refresh_package_latest, sender=Package, +post_save.connect(refresh_latest, sender=Package, dispatch_uid="main.models") pre_save.connect(set_todolist_fields, sender=Todolist, dispatch_uid="main.models") diff --git a/main/utils.py b/main/utils.py index d7681cb6..12d12503 100644 --- a/main/utils.py +++ b/main/utils.py @@ -6,10 +6,8 @@ from django.core.cache import cache from django.utils.hashcompat import md5_constructor CACHE_TIMEOUT = 1800 -INVALIDATE_TIMEOUT = 15 - -CACHE_PACKAGE_KEY = 'cache_package_latest' -CACHE_NEWS_KEY = 'cache_news_latest' +INVALIDATE_TIMEOUT = 10 +CACHE_LATEST_PREFIX = 'cache_latest_' def cache_function_key(func, args, kwargs): raw = [func.__name__, func.__module__, args, kwargs] @@ -53,16 +51,34 @@ make_choice = lambda l: [(str(m), str(m)) for m in l] # and hoops otherwise. The only thing currently using these keys is the feed # caching stuff. -def refresh_package_latest(**kwargs): +def refresh_latest(**kwargs): + '''A post_save signal handler to clear out the cached latest value for a + given model.''' + cache_key = CACHE_LATEST_PREFIX + kwargs['sender'].__name__ # We could delete the value, but that could open a race condition # where the new data wouldn't have been committed yet by the calling # thread. Instead, explicitly set it to None for a short amount of time. # Hopefully by the time it expires we will have committed, and the cache # will be valid again. See "Scaling Django" by Mike Malone, slide 30. - cache.set(CACHE_PACKAGE_KEY, None, INVALIDATE_TIMEOUT) + cache.set(cache_key, None, INVALIDATE_TIMEOUT) -def refresh_news_latest(**kwargs): - # same thoughts apply as in refresh_package_latest - cache.set(CACHE_NEWS_KEY, None, INVALIDATE_TIMEOUT) +def retrieve_latest(sender): + # we could break this down based on the request url, but it would probably + # cost us more in query time to do so. + cache_key = CACHE_LATEST_PREFIX + sender.__name__ + latest = cache.get(cache_key) + if latest: + return latest + try: + latest_by = sender._meta.get_latest_by + latest = sender.objects.values(latest_by).latest()[latest_by] + # Using add means "don't overwrite anything in there". What could be in + # there is an explicit None value that our refresh signal set, which + # means we want to avoid race condition possibilities for a bit. + cache.add(cache_key, latest, CACHE_TIMEOUT) + return latest + except sender.DoesNotExist: + pass + return None # vim: set ts=4 sw=4 et: diff --git a/news/models.py b/news/models.py index 17d51de9..5e467515 100644 --- a/news/models.py +++ b/news/models.py @@ -36,10 +36,10 @@ def set_news_fields(sender, **kwargs): now.strftime('%Y-%m-%d'), news.get_absolute_url()) # connect signals needed to keep cache in line with reality -from main.utils import refresh_news_latest +from main.utils import refresh_latest from django.db.models.signals import pre_save, post_save -post_save.connect(refresh_news_latest, sender=News, +post_save.connect(refresh_latest, sender=News, dispatch_uid="news.models") pre_save.connect(set_news_fields, sender=News, dispatch_uid="news.models") -- cgit v1.2.3-55-g3dc8