openmedialibrary_platform_w.../Tools/scripts/mailerdaemon.py

#!/usr/bin/env python3
"""Classes to parse mailer-daemon messages."""

import calendar
import email.message
import re
import os
import sys


class Unparseable(Exception):
    pass


class ErrorMessage(email.message.Message):
    def __init__(self):
        email.message.Message.__init__(self)
        self.sub = ''

    def is_warning(self):
        sub = self.get('Subject')
        if not sub:
            return 0
        sub = sub.lower()
        if sub.startswith('waiting mail'):
            return 1
        if 'warning' in sub:
            return 1
        self.sub = sub
        return 0

    def get_errors(self):
        for p in EMPARSERS:
            self.rewindbody()
            try:
                return p(self.fp, self.sub)
            except Unparseable:
                pass
        raise Unparseable

# List of re's or tuples of re's.
# If a re, it should contain at least a group (?P<email>...) which
# should refer to the email address.  The re can also contain a group
# (?P<reason>...) which should refer to the reason (error message).
# If no reason is present, the emparse_list_reason list is used to
# find a reason.
# If a tuple, the tuple should contain 2 re's.  The first re finds a
# location, the second re is repeated one or more times to find
# multiple email addresses.  The second re is matched (not searched)
# where the previous match ended.
# The re's are compiled using the re module.
emparse_list_list = [
    'error: (?P<reason>unresolvable): (?P<email>.+)',
    ('----- The following addresses had permanent fatal errors -----\n',
     '(?P<email>[^ \n].*)\n( .*\n)?'),
    'remote execution.*\n.*rmail (?P<email>.+)',
    ('The following recipients did not receive your message:\n\n',
     ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
    '------- Failure Reasons  --------\n\n(?P<reason>.*)\n(?P<email>.*)',
    '^<(?P<email>.*)>:\n(?P<reason>.*)',
    '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
    '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
    '^Original-Recipient: rfc822;(?P<email>.*)',
    '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
    '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
    '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
    '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
    '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
    ]
# compile the re's in the list and store them in-place.
for i in range(len(emparse_list_list)):
    x = emparse_list_list[i]
    if type(x) is type(''):
        x = re.compile(x, re.MULTILINE)
    else:
        xl = []
        for x in x:
            xl.append(re.compile(x, re.MULTILINE))
        x = tuple(xl)
        del xl
    emparse_list_list[i] = x
    del x
del i

# list of re's used to find reasons (error messages).
# if a string, "<>" is replaced by a copy of the email address.
# The expressions are searched for in order.  After the first match,
# no more expressions are searched for.  So, order is important.
emparse_list_reason = [
    r'^5\d{2} <>\.\.\. (?P<reason>.*)',
    r'<>\.\.\. (?P<reason>.*)',
    re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
    re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
    re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
    ]
emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
def emparse_list(fp, sub):
    data = fp.read()
    res = emparse_list_from.search(data)
    if res is None:
        from_index = len(data)
    else:
        from_index = res.start(0)
    errors = []
    emails = []
    reason = None
    for regexp in emparse_list_list:
        if type(regexp) is type(()):
            res = regexp[0].search(data, 0, from_index)
            if res is not None:
                try:
                    reason = res.group('reason')
                except IndexError:
                    pass
                while 1:
                    res = regexp[1].match(data, res.end(0), from_index)
                    if res is None:
                        break
                    emails.append(res.group('email'))
                break
        else:
            res = regexp.search(data, 0, from_index)
            if res is not None:
                emails.append(res.group('email'))
                try:
                    reason = res.group('reason')
                except IndexError:
                    pass
                break
    if not emails:
        raise Unparseable
    if not reason:
        reason = sub
        if reason[:15] == 'returned mail: ':
            reason = reason[15:]
        for regexp in emparse_list_reason:
            if type(regexp) is type(''):
                for i in range(len(emails)-1,-1,-1):
                    email = emails[i]
                    exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
                    res = exp.search(data)
                    if res is not None:
                        errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
                        del emails[i]
                continue
            res = regexp.search(data)
            if res is not None:
                reason = res.group('reason')
                break
    for email in emails:
        errors.append(' '.join((email.strip()+': '+reason).split()))
    return errors

EMPARSERS = [emparse_list]

def sort_numeric(a, b):
    a = int(a)
    b = int(b)
    if a < b:
        return -1
    elif a > b:
        return 1
    else:
        return 0

def parsedir(dir, modify):
    os.chdir(dir)
    pat = re.compile('^[0-9]*$')
    errordict = {}
    errorfirst = {}
    errorlast = {}
    nok = nwarn = nbad = 0

    # find all numeric file names and sort them
    files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
    files.sort(sort_numeric)

    for fn in files:
        # Lets try to parse the file.
        fp = open(fn)
        m = email.message_from_file(fp, _class=ErrorMessage)
        sender = m.getaddr('From')
        print('%s\t%-40s\t'%(fn, sender[1]), end=' ')

        if m.is_warning():
            fp.close()
            print('warning only')
            nwarn = nwarn + 1
            if modify:
                os.rename(fn, ','+fn)
##              os.unlink(fn)
            continue

        try:
            errors = m.get_errors()
        except Unparseable:
            print('** Not parseable')
            nbad = nbad + 1
            fp.close()
            continue
        print(len(errors), 'errors')

        # Remember them
        for e in errors:
            try:
                mm, dd = m.getdate('date')[1:1+2]
                date = '%s %02d' % (calendar.month_abbr[mm], dd)
            except:
                date = '??????'
            if e not in errordict:
                errordict[e] = 1
                errorfirst[e] = '%s (%s)' % (fn, date)
            else:
                errordict[e] = errordict[e] + 1
            errorlast[e] = '%s (%s)' % (fn, date)

        fp.close()
        nok = nok + 1
        if modify:
            os.rename(fn, ','+fn)
##          os.unlink(fn)

    print('--------------')
    print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
    print(nbad,'files unparseable')
    print('--------------')
    list = []
    for e in errordict.keys():
        list.append((errordict[e], errorfirst[e], errorlast[e], e))
    list.sort()
    for num, first, last, e in list:
        print('%d %s - %s\t%s' % (num, first, last, e))

def main():
    modify = 0
    if len(sys.argv) > 1 and sys.argv[1] == '-d':
        modify = 1
        del sys.argv[1]
    if len(sys.argv) > 1:
        for folder in sys.argv[1:]:
            parsedir(folder, modify)
    else:
        parsedir('/ufs/jack/Mail/errorsinbox', modify)

if __name__ == '__main__' or sys.argv[0] == __name__:
    main()
update windows build to Python 3.7 2019-01-20 10:35:31 +00:00			`#!/usr/bin/env python3`
			`"""Classes to parse mailer-daemon messages."""`

			`import calendar`
			`import email.message`
			`import re`
			`import os`
			`import sys`


			`class Unparseable(Exception):`
			`pass`


			`class ErrorMessage(email.message.Message):`
			`def __init__(self):`
			`email.message.Message.__init__(self)`
			`self.sub = ''`

			`def is_warning(self):`
			`sub = self.get('Subject')`
			`if not sub:`
			`return 0`
			`sub = sub.lower()`
			`if sub.startswith('waiting mail'):`
			`return 1`
			`if 'warning' in sub:`
			`return 1`
			`self.sub = sub`
			`return 0`

			`def get_errors(self):`
			`for p in EMPARSERS:`
			`self.rewindbody()`
			`try:`
			`return p(self.fp, self.sub)`
			`except Unparseable:`
			`pass`
			`raise Unparseable`

			`# List of re's or tuples of re's.`
			`# If a re, it should contain at least a group (?P<email>...) which`
			`# should refer to the email address. The re can also contain a group`
			`# (?P<reason>...) which should refer to the reason (error message).`
			`# If no reason is present, the emparse_list_reason list is used to`
			`# find a reason.`
			`# If a tuple, the tuple should contain 2 re's. The first re finds a`
			`# location, the second re is repeated one or more times to find`
			`# multiple email addresses. The second re is matched (not searched)`
			`# where the previous match ended.`
			`# The re's are compiled using the re module.`
			`emparse_list_list = [`
			`'error: (?P<reason>unresolvable): (?P<email>.+)',`
			`('----- The following addresses had permanent fatal errors -----\n',`
			`'(?P<email>[^ \n].)\n( .\n)?'),`
			`'remote execution.\n.rmail (?P<email>.+)',`
			`('The following recipients did not receive your message:\n\n',`
			`' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),`
			`'------- Failure Reasons --------\n\n(?P<reason>.)\n(?P<email>.)',`
			`'^<(?P<email>.)>:\n(?P<reason>.)',`
			`'^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',`
			`'^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',`
			`'^Original-Recipient: rfc822;(?P<email>.*)',`
			`'^did not reach the following recipient\\(s\\):\n\n(?P<email>.) on .\n +(?P<reason>.*)',`
			`'^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',`
			`'^Report on your message to: (?P<email>.)\nReason: (?P<reason>.)',`
			`'^Your message was not delivered to +(?P<email>.)\n +for the following reason:\n +(?P<reason>.)',`
			`'^ was not +(?P<email>[^ \n].?) \n.\n.\n.\n because:.\n +(?P<reason>[^ \n].?) \n',`
			`]`
			`# compile the re's in the list and store them in-place.`
			`for i in range(len(emparse_list_list)):`
			`x = emparse_list_list[i]`
			`if type(x) is type(''):`
			`x = re.compile(x, re.MULTILINE)`
			`else:`
			`xl = []`
			`for x in x:`
			`xl.append(re.compile(x, re.MULTILINE))`
			`x = tuple(xl)`
			`del xl`
			`emparse_list_list[i] = x`
			`del x`
			`del i`

			`# list of re's used to find reasons (error messages).`
			`# if a string, "<>" is replaced by a copy of the email address.`
			`# The expressions are searched for in order. After the first match,`
			`# no more expressions are searched for. So, order is important.`
			`emparse_list_reason = [`
			`r'^5\d{2} <>\.\.\. (?P<reason>.*)',`
			`r'<>\.\.\. (?P<reason>.*)',`
			`re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),`
			`re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),`
			`re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),`
			`]`
			`emparse_list_from = re.compile('^From:', re.IGNORECASE\|re.MULTILINE)`
			`def emparse_list(fp, sub):`
			`data = fp.read()`
			`res = emparse_list_from.search(data)`
			`if res is None:`
			`from_index = len(data)`
			`else:`
			`from_index = res.start(0)`
			`errors = []`
			`emails = []`
			`reason = None`
			`for regexp in emparse_list_list:`
			`if type(regexp) is type(()):`
			`res = regexp[0].search(data, 0, from_index)`
			`if res is not None:`
			`try:`
			`reason = res.group('reason')`
			`except IndexError:`
			`pass`
			`while 1:`
			`res = regexp[1].match(data, res.end(0), from_index)`
			`if res is None:`
			`break`
			`emails.append(res.group('email'))`
			`break`
			`else:`
			`res = regexp.search(data, 0, from_index)`
			`if res is not None:`
			`emails.append(res.group('email'))`
			`try:`
			`reason = res.group('reason')`
			`except IndexError:`
			`pass`
			`break`
			`if not emails:`
			`raise Unparseable`
			`if not reason:`
			`reason = sub`
			`if reason[:15] == 'returned mail: ':`
			`reason = reason[15:]`
			`for regexp in emparse_list_reason:`
			`if type(regexp) is type(''):`
			`for i in range(len(emails)-1,-1,-1):`
			`email = emails[i]`
			`exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)`
			`res = exp.search(data)`
			`if res is not None:`
			`errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))`
			`del emails[i]`
			`continue`
			`res = regexp.search(data)`
			`if res is not None:`
			`reason = res.group('reason')`
			`break`
			`for email in emails:`
			`errors.append(' '.join((email.strip()+': '+reason).split()))`
			`return errors`

			`EMPARSERS = [emparse_list]`

			`def sort_numeric(a, b):`
			`a = int(a)`
			`b = int(b)`
			`if a < b:`
			`return -1`
			`elif a > b:`
			`return 1`
			`else:`
			`return 0`

			`def parsedir(dir, modify):`
			`os.chdir(dir)`
			`pat = re.compile('^[0-9]*$')`
			`errordict = {}`
			`errorfirst = {}`
			`errorlast = {}`
			`nok = nwarn = nbad = 0`

			`# find all numeric file names and sort them`
			`files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))`
			`files.sort(sort_numeric)`

			`for fn in files:`
			`# Lets try to parse the file.`
			`fp = open(fn)`
			`m = email.message_from_file(fp, _class=ErrorMessage)`
			`sender = m.getaddr('From')`
			`print('%s\t%-40s\t'%(fn, sender[1]), end=' ')`

			`if m.is_warning():`
			`fp.close()`
			`print('warning only')`
			`nwarn = nwarn + 1`
			`if modify:`
			`os.rename(fn, ','+fn)`
			`## os.unlink(fn)`
			`continue`

			`try:`
			`errors = m.get_errors()`
			`except Unparseable:`
			`print('** Not parseable')`
			`nbad = nbad + 1`
			`fp.close()`
			`continue`
			`print(len(errors), 'errors')`

			`# Remember them`
			`for e in errors:`
			`try:`
			`mm, dd = m.getdate('date')[1:1+2]`
			`date = '%s %02d' % (calendar.month_abbr[mm], dd)`
			`except:`
			`date = '??????'`
			`if e not in errordict:`
			`errordict[e] = 1`
			`errorfirst[e] = '%s (%s)' % (fn, date)`
			`else:`
			`errordict[e] = errordict[e] + 1`
			`errorlast[e] = '%s (%s)' % (fn, date)`

			`fp.close()`
			`nok = nok + 1`
			`if modify:`
			`os.rename(fn, ','+fn)`
			`## os.unlink(fn)`

			`print('--------------')`
			`print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')`
			`print(nbad,'files unparseable')`
			`print('--------------')`
			`list = []`
			`for e in errordict.keys():`
			`list.append((errordict[e], errorfirst[e], errorlast[e], e))`
			`list.sort()`
			`for num, first, last, e in list:`
			`print('%d %s - %s\t%s' % (num, first, last, e))`

			`def main():`
			`modify = 0`
			`if len(sys.argv) > 1 and sys.argv[1] == '-d':`
			`modify = 1`
			`del sys.argv[1]`
			`if len(sys.argv) > 1:`
			`for folder in sys.argv[1:]:`
			`parsedir(folder, modify)`
			`else:`
			`parsedir('/ufs/jack/Mail/errorsinbox', modify)`

			`if __name__ == '__main__' or sys.argv[0] == __name__:`
			`main()`