summaryrefslogtreecommitdiffstats
path: root/main
diff options
context:
space:
mode:
authorjelle van der Waa <jelle@vdwaa.nl>2018-02-17 18:53:20 +0100
committerAngel Velásquez <angvp@archlinux.org>2018-02-17 12:53:20 -0500
commit73107e0a6ad446195e0656afeec378ae27396829 (patch)
treebfa1983977b56efe58e96b13343ab93fa50b5d25 /main
parent148692cd8fc79b3e43ef0f2c40a2da9d87c9da9e (diff)
downloadarchweb-73107e0a6ad446195e0656afeec378ae27396829.tar.gz
archweb-73107e0a6ad446195e0656afeec378ae27396829.zip
Add donation import management script (#81)
This script imports messages from a Maildir folder and creates new unique Donors based on the name in the subject. The subject also contains the amount and the email address, which is not stored by Archweb.
Diffstat (limited to 'main')
-rw-r--r--main/management/__init__.py0
-rw-r--r--main/management/commands/__init__.py0
-rw-r--r--main/management/commands/donor_import.py121
-rw-r--r--main/tests/__init__.py0
-rw-r--r--main/tests/test_donor_import.py28
5 files changed, 149 insertions, 0 deletions
diff --git a/main/management/__init__.py b/main/management/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/main/management/__init__.py
diff --git a/main/management/commands/__init__.py b/main/management/commands/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/main/management/commands/__init__.py
diff --git a/main/management/commands/donor_import.py b/main/management/commands/donor_import.py
new file mode 100644
index 00000000..fdc63f33
--- /dev/null
+++ b/main/management/commands/donor_import.py
@@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+"""
+donor_import
+
+Imports donators from the emails which are send to donate@archlinux.org,
+the subject of them email contains the name of the donator, the amount and
+the email address. Archweb's Donate model only contains the name, which is
+unique.
+
+An example subject:
+
+Subject: Receipt [$25.00] By: John Doe [john.doe@archlinux.org]
+
+Usage: ./manage.py donor_import path/to/maildir/
+"""
+
+import logging
+import mailbox
+import sys
+
+from email.header import decode_header
+
+from parse import parse
+
+from django.db.utils import Error as DBError
+from django.core.management.base import BaseCommand
+from main.models import Donor
+
+
+logging.basicConfig(
+ level=logging.WARNING,
+ format=u'%(asctime)s -> %(levelname)s: %(message)s',
+ datefmt=u'%Y-%m-%d %H:%M:%S',
+ stream=sys.stderr)
+logger = logging.getLogger()
+
+
+class Command(BaseCommand):
+
+ def add_arguments(self, parser):
+ parser.add_argument('maildir', type=str)
+
+
+ def decode_subject(self, subject):
+ subject = decode_header(subject)
+ default_charset = 'ASCII'
+ # Convert the list of tuples containing the decoded string and encoding to
+ # the same encoding.
+ return u''.join([unicode(s[0], s[1] or default_charset) for s in subject])
+
+
+ def parse_subject(self, subject):
+ """Format of the subject is as following: Receipt [$amount] By: John Doe [mail]"""
+
+ parsed = parse("Receipt [{amount}] By: {name} [{email}]", subject)
+
+ if parsed:
+ return parsed['name']
+
+
+ def sanitize_name(self, name):
+ """Sanitizes the parsed name and removes numbers, entries with no
+ valid characters and finaly trims all excess whitespace"""
+
+ # Some submissions contain no alphabetic characters, skip them
+ if all(not l.isalpha() for l in name):
+ return u''
+
+ # Strip any numbers, they could be a bank account number
+ name = filter(lambda x: not x.isdigit(), name)
+
+ # Normalize all capitalized names. (JOHN DOE)
+ name = u' '.join(l.capitalize() for l in name.split(u' '))
+
+ # Trim excess spaces
+ name = name.rstrip().lstrip()
+
+ return name
+
+
+ def handle(self, *args, **options):
+ v = int(options.get('verbosity', 0))
+ if v == 0:
+ logger.level = logging.ERROR
+ elif v == 1:
+ logger.level = logging.INFO
+ elif v >= 2:
+ logger.level = logging.DEBUG
+
+ try:
+ directory = options['maildir']
+ maildir = mailbox.Maildir(directory, create=False)
+ except mailbox.Error:
+ logger.error(u"Failed to open maildir: '%s'", directory)
+ return 0
+
+ for msg in maildir:
+ subject = msg.get('subject', '')
+ if 'utf-8' in subject:
+ # Decode UTF-8 encoded subjects
+ subject = self.decode_subject(subject)
+
+ # Subject header can contain enters, replace them with a space
+ subject = subject.replace(u'\n', u' ')
+
+ name = self.parse_subject(subject)
+ if not name:
+ logger.error(u'Unable to parse: %s', subject)
+ continue
+
+ name = self.sanitize_name(name)
+ if not name:
+ logger.error(u'Invalid name in subject: %s', subject)
+ continue
+
+ try:
+ _, created = Donor.objects.get_or_create(name=name)
+ if created:
+ logger.info(u'Adding donor: {}'.format(name))
+ except DBError as e:
+ logger.info(u'Error while adding donor: %s, %s', name, e)
diff --git a/main/tests/__init__.py b/main/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/main/tests/__init__.py
diff --git a/main/tests/test_donor_import.py b/main/tests/test_donor_import.py
new file mode 100644
index 00000000..14609978
--- /dev/null
+++ b/main/tests/test_donor_import.py
@@ -0,0 +1,28 @@
+from django.test import SimpleTestCase
+
+
+from main.management.commands.donor_import import Command
+
+
+class DonorImportTest(SimpleTestCase):
+
+ def setUp(self):
+ self.command = Command()
+
+ def gen_parse_subject(self, data):
+ return self.command.parse_subject(valid.format(data))
+
+ def test_parse_subject(self):
+ self.assertIsNone(self.command.parse_subject('garbage'))
+
+ # Valid
+ valid = u'Receipt [$25.00] By: John Doe [john.doe@archlinux.org]'
+ output = self.command.parse_subject(valid)
+ self.assertEqual(output, u'John Doe')
+
+
+ def test_parse_name(self):
+ self.assertEqual(self.command.sanitize_name(u'1244'), u'')
+ self.assertEqual(self.command.sanitize_name(u'John Doe'), u'John Doe')
+ self.assertEqual(self.command.sanitize_name(u' John Doe '), u'John Doe')
+ self.assertEqual(self.command.sanitize_name(u'John Doe 23'), u'John Doe')