summaryrefslogtreecommitdiffstats
path: root/main/management/commands/donor_import.py
blob: 12dc0aa9c17af30d08c9f1eb85e171963b9f2ad3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# -*- coding: utf-8 -*-
"""
donor_import

Imports donators from the emails which are send to donate@archlinux.org,
the subject of them email contains the name of the donator, the amount and
the email address. Archweb's Donate model only contains the name, which is
unique.

An example subject:

Subject: Receipt [$25.00] By: John Doe [john.doe@archlinux.org]

Usage: ./manage.py donor_import path/to/maildir/
"""

import codecs
import logging
import mailbox
import sys

from email.header import decode_header

from parse import parse

from django.db.utils import Error as DBError
from django.core.management.base import BaseCommand, CommandError
from main.models import Donor


logging.basicConfig(
    level=logging.WARNING,
    format=u'%(asctime)s -> %(levelname)s: %(message)s',
    datefmt=u'%Y-%m-%d %H:%M:%S',
    stream=sys.stderr)
logger = logging.getLogger()


class Command(BaseCommand):

    def add_arguments(self, parser):
        parser.add_argument('maildir', type=str)


    def decode_subject(self, subject):
        subject = decode_header(subject)
        default_charset = 'utf-8'
        # Convert the list of tuples containing the decoded string and encoding to
        # UTF-8
        return ''.join([codecs.decode(s[0], s[1] or default_charset) for s in subject])


    def parse_subject(self, subject):
        """Format of the subject is as following: Receipt [$amount] By: John Doe [mail]"""

        parsed = parse("Receipt [{amount}] By: {name} [{email}]", subject)

        if parsed:
            return parsed['name']


    def sanitize_name(self, name):
            """Sanitizes the parsed name and removes numbers, entries with no
            valid characters and finally trims all excess whitespace"""

            # Some submissions contain no alphabetic characters, skip them
            if all(not l.isalpha() for l in name):
                return u''

            # Strip any numbers, they could be a bank account number
            name = u''.join([l for l in name if not l.isdigit()])

            # Normalize all capitalized names. (JOHN DOE)
            name = u' '.join(l.capitalize() for l in name.split(u' '))

            # Trim excess spaces
            name = name.rstrip().lstrip()

            return name


    def handle(self, *args, **options):
        v = int(options.get('verbosity', 0))
        if v == 0:
            logger.level = logging.ERROR
        elif v == 1:
            logger.level = logging.INFO
        elif v >= 2:
            logger.level = logging.DEBUG

        try:
            directory = options['maildir']
            maildir = mailbox.Maildir(directory, create=False)
        except mailbox.Error:
            raise CommandError(u"Failed to open maildir")

        for msg in maildir:
            subject = msg.get('subject', '')
            if 'utf-8' in subject:
                # Decode UTF-8 encoded subjects
                subject = self.decode_subject(subject)

            # Subject header can contain enters, replace them with a space
            subject = subject.replace(u'\n', u' ')

            name = self.parse_subject(subject)
            if not name:
                logger.error(u'Unable to parse: %s', subject)
                continue

            name = self.sanitize_name(name)
            if not name:
                logger.error(u'Invalid name in subject: %s', subject)
                continue

            try:
                _, created = Donor.objects.get_or_create(name=name)
                if created:
                    logger.info(u'Adding donor: {}'.format(name))
            except DBError as e:
                logger.info(u'Error while adding donor: %s, %s', name, e)