diff options
author | jelle van der Waa <jelle@vdwaa.nl> | 2018-02-17 18:53:20 +0100 |
---|---|---|
committer | Angel Velásquez <angvp@archlinux.org> | 2018-02-17 12:53:20 -0500 |
commit | 73107e0a6ad446195e0656afeec378ae27396829 (patch) | |
tree | bfa1983977b56efe58e96b13343ab93fa50b5d25 /main | |
parent | 148692cd8fc79b3e43ef0f2c40a2da9d87c9da9e (diff) | |
download | archweb-73107e0a6ad446195e0656afeec378ae27396829.tar.gz archweb-73107e0a6ad446195e0656afeec378ae27396829.zip |
Add donation import management script (#81)
This script imports messages from a Maildir folder and creates new
unique Donors based on the name in the subject. The subject also
contains the amount and the email address, which is not stored by
Archweb.
Diffstat (limited to 'main')
-rw-r--r-- | main/management/__init__.py | 0 | ||||
-rw-r--r-- | main/management/commands/__init__.py | 0 | ||||
-rw-r--r-- | main/management/commands/donor_import.py | 121 | ||||
-rw-r--r-- | main/tests/__init__.py | 0 | ||||
-rw-r--r-- | main/tests/test_donor_import.py | 28 |
5 files changed, 149 insertions, 0 deletions
diff --git a/main/management/__init__.py b/main/management/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/main/management/__init__.py diff --git a/main/management/commands/__init__.py b/main/management/commands/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/main/management/commands/__init__.py diff --git a/main/management/commands/donor_import.py b/main/management/commands/donor_import.py new file mode 100644 index 00000000..fdc63f33 --- /dev/null +++ b/main/management/commands/donor_import.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +""" +donor_import + +Imports donators from the emails which are send to donate@archlinux.org, +the subject of them email contains the name of the donator, the amount and +the email address. Archweb's Donate model only contains the name, which is +unique. + +An example subject: + +Subject: Receipt [$25.00] By: John Doe [john.doe@archlinux.org] + +Usage: ./manage.py donor_import path/to/maildir/ +""" + +import logging +import mailbox +import sys + +from email.header import decode_header + +from parse import parse + +from django.db.utils import Error as DBError +from django.core.management.base import BaseCommand +from main.models import Donor + + +logging.basicConfig( + level=logging.WARNING, + format=u'%(asctime)s -> %(levelname)s: %(message)s', + datefmt=u'%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + + +class Command(BaseCommand): + + def add_arguments(self, parser): + parser.add_argument('maildir', type=str) + + + def decode_subject(self, subject): + subject = decode_header(subject) + default_charset = 'ASCII' + # Convert the list of tuples containing the decoded string and encoding to + # the same encoding. + return u''.join([unicode(s[0], s[1] or default_charset) for s in subject]) + + + def parse_subject(self, subject): + """Format of the subject is as following: Receipt [$amount] By: John Doe [mail]""" + + parsed = parse("Receipt [{amount}] By: {name} [{email}]", subject) + + if parsed: + return parsed['name'] + + + def sanitize_name(self, name): + """Sanitizes the parsed name and removes numbers, entries with no + valid characters and finaly trims all excess whitespace""" + + # Some submissions contain no alphabetic characters, skip them + if all(not l.isalpha() for l in name): + return u'' + + # Strip any numbers, they could be a bank account number + name = filter(lambda x: not x.isdigit(), name) + + # Normalize all capitalized names. (JOHN DOE) + name = u' '.join(l.capitalize() for l in name.split(u' ')) + + # Trim excess spaces + name = name.rstrip().lstrip() + + return name + + + def handle(self, *args, **options): + v = int(options.get('verbosity', 0)) + if v == 0: + logger.level = logging.ERROR + elif v == 1: + logger.level = logging.INFO + elif v >= 2: + logger.level = logging.DEBUG + + try: + directory = options['maildir'] + maildir = mailbox.Maildir(directory, create=False) + except mailbox.Error: + logger.error(u"Failed to open maildir: '%s'", directory) + return 0 + + for msg in maildir: + subject = msg.get('subject', '') + if 'utf-8' in subject: + # Decode UTF-8 encoded subjects + subject = self.decode_subject(subject) + + # Subject header can contain enters, replace them with a space + subject = subject.replace(u'\n', u' ') + + name = self.parse_subject(subject) + if not name: + logger.error(u'Unable to parse: %s', subject) + continue + + name = self.sanitize_name(name) + if not name: + logger.error(u'Invalid name in subject: %s', subject) + continue + + try: + _, created = Donor.objects.get_or_create(name=name) + if created: + logger.info(u'Adding donor: {}'.format(name)) + except DBError as e: + logger.info(u'Error while adding donor: %s, %s', name, e) diff --git a/main/tests/__init__.py b/main/tests/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/main/tests/__init__.py diff --git a/main/tests/test_donor_import.py b/main/tests/test_donor_import.py new file mode 100644 index 00000000..14609978 --- /dev/null +++ b/main/tests/test_donor_import.py @@ -0,0 +1,28 @@ +from django.test import SimpleTestCase + + +from main.management.commands.donor_import import Command + + +class DonorImportTest(SimpleTestCase): + + def setUp(self): + self.command = Command() + + def gen_parse_subject(self, data): + return self.command.parse_subject(valid.format(data)) + + def test_parse_subject(self): + self.assertIsNone(self.command.parse_subject('garbage')) + + # Valid + valid = u'Receipt [$25.00] By: John Doe [john.doe@archlinux.org]' + output = self.command.parse_subject(valid) + self.assertEqual(output, u'John Doe') + + + def test_parse_name(self): + self.assertEqual(self.command.sanitize_name(u'1244'), u'') + self.assertEqual(self.command.sanitize_name(u'John Doe'), u'John Doe') + self.assertEqual(self.command.sanitize_name(u' John Doe '), u'John Doe') + self.assertEqual(self.command.sanitize_name(u'John Doe 23'), u'John Doe') |