From ce0666f23b5b5aa744b295e6cf39d409dbd7a3cc Mon Sep 17 00:00:00 2001 From: Dominik Chilla Date: Wed, 24 Apr 2019 20:39:13 +0200 Subject: [PATCH] import processing with keyword tagging instead of flagging SEEN/UNSEEN --- app/Entities.py | 16 ++---- app/Gulag.py | 70 ++++++++++++++++++------- app/GulagDB.py | 3 +- app/GulagMailbox.py | 73 ++++++++++++++++++++++++--- app/gulag_helpers.py | 11 ++-- db/gulag.sql | 4 +- docker-build.sh | 4 +- docker/gulag-server/debian/Dockerfile | 6 +-- snippets/IMAP_commands.txt | 11 ++++ 9 files changed, 148 insertions(+), 50 deletions(-) create mode 100644 snippets/IMAP_commands.txt diff --git a/app/Entities.py b/app/Entities.py index 99cff8c..d7b438f 100644 --- a/app/Entities.py +++ b/app/Entities.py @@ -56,9 +56,7 @@ class Mailbox: imap_security = None imap_user = None imap_pass = None - imap_mailbox = None - imap_mailbox_fp = None - imap_separator = None + imap_inbox = None mailrelay_id = None comment = None href = None @@ -90,15 +88,9 @@ class Mailbox: if 'imap_pass' not in mb_ref: raise MailboxException("'imap_pass' is mandatory!") self.imap_pass = mb_ref['imap_pass'] - if 'imap_mailbox' not in mb_ref: - raise MailboxException("'imap_mailbox' is mandatory!") - self.imap_mailbox = mb_ref['imap_mailbox'] - if 'imap_mailbox_fp' not in mb_ref: - raise MailboxException("'imap_mailbox_fp' is mandatory!") - self.imap_mailbox_fp = mb_ref['imap_mailbox_fp'] - if 'imap_separator' not in mb_ref: - raise MailboxException("'imap_separator' is mandatory!") - self.imap_seperator = mb_ref['imap_separator'] + if 'imap_inbox' not in mb_ref: + raise MailboxException("'imap_inbox' is mandatory!") + self.imap_inbox = mb_ref['imap_inbox'] if 'mailrelay_id' not in mb_ref: raise MailboxException("'mailrelay_id' is mandatory!") self.mailrelay_id = mb_ref['mailrelay_id'] diff --git a/app/Gulag.py b/app/Gulag.py index a04f782..2a4013f 100644 --- a/app/Gulag.py +++ b/app/Gulag.py @@ -1,5 +1,6 @@ import json, sys,os,logging,re,magic import email,email.header,email.message +from email import policy from GulagDB import ( GulagDB,GulagDBException,GulagDBNotFoundException,GulagDBBadInputException ) @@ -65,6 +66,15 @@ class Gulag: except GulagDBException as e: logging.warning(whoami(self) + e.message) raise GulagException(whoami(self) + e.message) from e + # Init mailboxes/folders + for mailbox in self.db.get_mailboxes(): + try: + imap_mb = IMAPmailbox(mailbox) + imap_mb.init_folders() + imap_mb.close + except IMAPmailboxException as e: + logging.warning(whoami(self) + e.message) + continue def check_filters(self,fields_target,filters): if fields_target not in self.fields: @@ -100,20 +110,27 @@ class Gulag: messages = [] try: imap_mb = IMAPmailbox(mailbox) - messages = imap_mb.get_unseen_messages() + messages = imap_mb.get_new_messages() except IMAPmailboxException as e: logging.warning(whoami(self) + e.message) continue - for unseen in messages: + for message in messages: quarmail_ids = [] attachments = [] uris = {} - uid = unseen['imap_uid'] - msg = email.message_from_bytes(unseen['msg']) + uid = message['imap_uid'] + msg = email.message_from_bytes(message['msg']) source_id = 'amavis' if 'X-Gulag-Source' in msg: source_id = email.header.decode_header(msg['X-Gulag-Source'])[0][0] - r5321_from = email.header.decode_header(msg['Return-Path'])[0][0] + try: + r5321_from = email.header.decode_header(msg['Return-Path'])[0][0] + except: + logging.warning(whoami(self) + + "Failed to get return-path header! Moving message to failed folder!" + ) + imap_mb.move_message(str(uid.decode()), 'failed') + continue if(r5321_from is not '<>'): r5321_from = r5321_from.replace("<","") r5321_from = r5321_from.replace(">","") @@ -122,17 +139,21 @@ class Gulag: r5321_rcpts = email.header.decode_header( msg['X-Envelope-To-Blocked'])[0][0] except: + # TODO: move_message to INBOX.failed logging.warning(whoami(self) + - "Failed to extract envelope recipients! Skipping mail" + "Failed to extract envelope recipients! Moving message to failed folder!" ) + imap_mb.move_message(str(uid.decode()), 'failed') continue r5322_from = None try: r5322_from = email.header.decode_header(msg['From'])[0][0] except: + # TODO: move_message to INBOX.failed logging.warning(whoami(self) + - "Failed to extract from header! Skipping mail" + "Failed to extract from header! Moving message to failed folder!" ) + imap_mb.move_message(str(uid.decode()), 'failed') continue subject = email.header.decode_header(msg['Subject'])[0][0] msg_id = None @@ -155,6 +176,13 @@ class Gulag: r5321_rcpts = r5321_rcpts.replace(" ", "") r5321_rcpts = r5321_rcpts.replace("<", "") r5321_rcpts = r5321_rcpts.replace(">", "") + try: + msg_serialized = msg.as_string() + except LookupError: + # LookupError: unknown encoding: _iso-2022-jp$esc + # https://github.com/coddingtonbear/django-mailbox/commit/aa59199c9b98ed317c6c95dc4018e21d1302858c + msg.set_payload(msg.get_payload(decode=True).decode('ascii','ignore')) + msg_serialized = msg.as_string() # Pro Envelope-RCPT einen Eintrag in die DB schreiben. # Die E-Mail im IMAP-Backend existiert jedoch nur ein Mal und wird # über die mailbox_id sowie die imap_uid mehrfach referenziert. @@ -165,9 +193,9 @@ class Gulag: 'env_rcpt': r5321_rcpt, 'hdr_cf': x_spam_status, 'hdr_from': r5322_from, 'hdr_subject': subject, 'hdr_msgid': msg_id, 'hdr_date': date, 'cf_meta': 'cf_meta', - 'mailbox_id': 'quarantine@zwackl.de', 'imap_uid': uid, - 'source_id': source_id, 'msg_size': len(msg.as_string()), - 'ssdeep': ssdeep.hash(msg.as_string()) + 'mailbox_id': mailbox['id'], 'imap_uid': uid, + 'source_id': source_id, 'msg_size': len(msg_serialized), + 'ssdeep': ssdeep.hash(msg_serialized) }) except GulagDBBadInputException as e: logging.warn(whoami(self) + e.message) @@ -180,6 +208,8 @@ class Gulag: ) quarmail_ids.append(quarmail_id) # End for rcpts + # Tag message as 'gulag_quarantined' in IMAP backend + imap_mb.retag_message(uid, 'gulag_quarantined') # Iterate through all MIME-parts and extract all # attachments (parts with a name/filename attribute) for part in msg.walk(): @@ -193,12 +223,18 @@ class Gulag: # filename isn´t encoded filename = filename[0][0] attach_decoded = part.get_payload(decode=True) + try: + mgc = magic.from_buffer(attach_decoded) + mime_type = magic.from_buffer(attach_decoded, mime=True) + except TypeError as e: + logging.warning(whoami(self) + str(e)) + continue attach_id = self.db.add_attachment({ 'filename': filename, 'content_type': part.get_content_type(), 'content_encoding': part['Content-Transfer-Encoding'], - 'magic': magic.from_buffer(attach_decoded), - 'mime_type': magic.from_buffer(attach_decoded, mime=True), + 'magic': mgc, + 'mime_type': mime_type, 'sha256': hashlib.sha256(attach_decoded).hexdigest(), 'ssdeep': ssdeep.hash(attach_decoded), 'size': len(attach_decoded) @@ -239,7 +275,7 @@ class Gulag: ) except GulagDBException as e: logging.error(whoami(self) + e.message) - # End for(unseen) + # End for(messages) imap_mb.close() # End for get_mailboxes @@ -425,12 +461,12 @@ class Gulag: mailrelay = GulagMailrelay(mailrelay_ref) mailrelay.release_quarmail(quarmail) logging.info(whoami(self) + - "QuarMail("+quarmail['id']+") released. env_rcpt: "+quarmail['env_rcpt'] + "QuarMail("+str(quarmail['id'])+") released. env_rcpt: "+quarmail['env_rcpt'] ) if 'purge' in args: self.delete_quarmail({"quarmail_id": args['quarmail_id']}) logging.info(whoami(self) + - "QuarMail(" + quarmail['id'] + ") deleted" + "QuarMail(" + str(quarmail['id']) + ") deleted" ) except GulagNotFoundException as e: raise GulagNotFoundException(whoami(self) + e.message) from e @@ -454,12 +490,12 @@ class Gulag: mailrelay = GulagMailrelay(mailrelay_ref) mailrelay.bounce_quarmail(quarmail) logging.info(whoami(self) + - "QuarMail("+quarmail['id']+") bounced back to "+quarmail['env_from'] + "QuarMail("+str(quarmail['id'])+") bounced back to "+quarmail['env_from'] ) if 'purge' in args: self.delete_quarmail({"quarmail_id": args['quarmail_id']}) logging.info(whoami(self) + - "QuarMail(" + quarmail['id'] + ") deleted" + "QuarMail(" + str(quarmail['id']) + ") deleted" ) except GulagNotFoundException as e: raise GulagNotFoundException(whoami(self) + e.message) from e diff --git a/app/GulagDB.py b/app/GulagDB.py index 2fc91a4..473cbd0 100644 --- a/app/GulagDB.py +++ b/app/GulagDB.py @@ -263,7 +263,8 @@ class GulagDB: "(mx_queue_id,env_from,env_rcpt,"+ "hdr_cf,hdr_from,hdr_subject,"+ "hdr_msgid,hdr_date,cf_meta,"+ - "mailbox_id,imap_uid,msg_size,ssdeep,source_id) " + + "mailbox_id,imap_uid,msg_size,ssdeep,"+ + "source_id) " + "values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", (quarmail['mx_queue_id'],quarmail['env_from'],quarmail['env_rcpt'], quarmail['hdr_cf'],quarmail['hdr_from'],quarmail['hdr_subject'], diff --git a/app/GulagMailbox.py b/app/GulagMailbox.py index 3018814..44daaf6 100644 --- a/app/GulagMailbox.py +++ b/app/GulagMailbox.py @@ -5,6 +5,8 @@ from email.parser import HeaderParser import time import re from GulagUtils import whoami +import logging + class IMAPmailboxException(Exception): message = None @@ -16,15 +18,20 @@ class IMAPmailbox: imap_server = None imap_user = None imap_pass = None - imap_mailbox = None + imap_inbox = None mailbox = None + tags = ( + 'gulag_quarantined', + 'gulag_released', + 'gulag_bounced' + ) def __init__(self, mb_ref): self.id = mb_ref['id'] self.imap_server = mb_ref['imap_server'] self.imap_user = mb_ref['imap_user'] self.imap_pass = mb_ref['imap_pass'] - self.imap_mailbox = mb_ref['imap_mailbox'] + self.imap_inbox = mb_ref['imap_inbox'] try: self.mailbox = imaplib.IMAP4(self.imap_server) rv, data = self.mailbox.login(self.imap_user, self.imap_pass) @@ -36,20 +43,50 @@ class IMAPmailbox: raise IMAPmailboxException(whoami(self) + self.imap_user + ": IMAP server " + self.imap_server + " refused connection" ) from e - - rv, data = self.mailbox.select(self.imap_mailbox) + rv, data = self.mailbox.select(self.imap_inbox) if rv != 'OK': raise IMAPmailboxException(whoami(self) + - "ERROR: Unable to select mailbox: " + self.imap_mailbox + "ERROR: Unable to select mailbox: " + self.imap_inbox ) + def init_folders(self): + # Check for all mandatory folders + mandatory_folders = { + "failed": False + } + rv, data = self.mailbox.list('""', '*') + if rv != 'OK': + raise IMAPmailboxException(whoami(self) + + "ERROR: Unable to list mailbox: " + self.imap_inbox + ) + for folder in data: + # (\HasChildren \Trash) "." Trash + p = re.compile(r'^.+".+" (\S+)$') + m = p.search(folder.decode()) + name = m.group(1) + if name == 'failed': + mandatory_folders['failed'] = True + # create mandatory folders if needed + for folder in mandatory_folders: + if mandatory_folders[folder] == False: + rv, data = self.mailbox.create(folder) + if rv != 'OK': + raise IMAPmailboxException(whoami(self) + + "ERROR: Unable to create folder: " + folder + ) + def close(self): self.mailbox.close() self.mailbox.logout() - def get_unseen_messages(self): + def get_new_messages(self): results = [] - rv, data = self.mailbox.uid('SEARCH', 'UNSEEN') + search_criteria = str( + 'UNKEYWORD gulag_quarantined' + + ' UNKEYWORD gulag_released' + + ' UNKEYWORD gulag_bounced' + ) + rv, data = self.mailbox.uid('SEARCH', search_criteria) if rv != 'OK': return for uid in data[0].split(): @@ -65,11 +102,16 @@ class IMAPmailbox: return results def add_message(self,message,unseen=False): + rv, data = self.mailbox.select(self.imap_inbox) + if rv != 'OK': + raise IMAPmailboxException(whoami(self) + + "ERROR: Unable to select mailbox: " + self.imap_inbox + ) flags = '' if(unseen == True): flags = 'UNSEEN' rv, data = self.mailbox.append( - self.imap_mailbox, + self.imap_inbox, flags , imaplib.Time2Internaldate(time.time()), str(message).encode('utf-8') @@ -91,6 +133,21 @@ class IMAPmailbox: ) return data[0][1] + def move_message(self,imap_uid,dest_mbox): + rv, data = self.mailbox.uid('MOVE', str(imap_uid), dest_mbox) + if rv != 'OK': + raise IMAPmailboxException(whoami(self) + + "ERROR moving message: %s", str(imap_uid) + ) + + def retag_message(self,imap_uid,tag): + logging.info(whoami(self) + "UID: " + str(imap_uid)) + rv, data = self.mailbox.uid('STORE', str(imap_uid.decode()), 'FLAGS', tag) + if rv != 'OK': + raise IMAPmailboxException(whoami(self) + + "ERROR flagging message for deletion: %s", str(imap_uid) + ) + def delete_message(self,imap_uid): rv, data = self.mailbox.uid('STORE', str(imap_uid), '+FLAGS', '(\\Deleted)') if rv != 'OK': diff --git a/app/gulag_helpers.py b/app/gulag_helpers.py index b02d73b..72660a5 100755 --- a/app/gulag_helpers.py +++ b/app/gulag_helpers.py @@ -2,6 +2,7 @@ import argparse,sys,os,time,signal,logging from Gulag import Gulag,GulagException +import traceback parser = argparse.ArgumentParser() parser.add_argument('--config', required=True, help="Path to config file") @@ -14,8 +15,10 @@ if(importer_pid == 0): try: gulag = Gulag(args.config) except GulagException as e: - print(e.message) + logging.info("Gulag-Importer Exception: " + e.message) sys.exit(1) + except: + logging.info("Gulag-Importer Exception: " + str(sys.exc_info())) logging.info("Gulag-Importer: starting") while True: try: @@ -23,7 +26,7 @@ if(importer_pid == 0): except GulagException as e: logging.error("Gulag-Importer-Exception: " + e.message) except: - logging.error("Gulag-Importer-Exception: " + str(sys.exc_info())) + logging.error("Gulag-Importer-Exception: " + traceback.format_exc()) time.sleep(gulag.config['importer']['interval']) cleaner_pid = os.fork() @@ -41,7 +44,7 @@ if(cleaner_pid == 0): except GulagException as e: logging.info("Cleaner-Exception: " + e.message) except: - logging.info("Cleaner-Exception: " + str(sys.exc_info())) + logging.info("Cleaner-Exception: " + traceback.format_exc()) time.sleep(gulag.config['cleaner']['interval']) # Parent @@ -51,7 +54,7 @@ try: while True: time.sleep(10) except: - logging.info("Helpers MAIN-EXCEPTION: " + str(sys.exc_info())) + logging.info("Helpers MAIN-EXCEPTION: " + traceback.format_exc()) # Destroy childs for child_pid in child_pids: logging.info("Helpers parent: Killing child pid: %s", child_pid) diff --git a/db/gulag.sql b/db/gulag.sql index 74b448f..8812592 100644 --- a/db/gulag.sql +++ b/db/gulag.sql @@ -21,9 +21,7 @@ create table Mailboxes( imap_security varchar(32) not null default 'plain' collate 'ascii_general_ci', imap_user varchar(256) not null collate 'ascii_general_ci', imap_pass varchar(1024) not null collate 'ascii_general_ci', - imap_mailbox varchar(256) not null default 'INBOX', - imap_mailbox_fp varchar(256) not null default 'false-positives', - imap_separator varchar(4) not null default '/', + imap_inbox varchar(256) not null default 'INBOX', mailrelay_id varchar(128) not null, foreign key (mailrelay_id) references Mailrelays (id) on update cascade on delete restrict, comment varchar(256) default null diff --git a/docker-build.sh b/docker-build.sh index 7483879..9a56c5d 100755 --- a/docker-build.sh +++ b/docker-build.sh @@ -21,10 +21,10 @@ fi IMAGES="gulag-server gulag-db" +# --build-arg http_proxy=http://wprx-zdf.zwackl.local:3128 \ +# --build-arg https_proxy=http://wprx-zdf.zwackl.local:3128 \ for IMAGE in ${IMAGES}; do /usr/bin/docker build \ - --build-arg http_proxy=http://wprx-zdf.zwackl.local:3128 \ - --build-arg https_proxy=http://wprx-zdf.zwackl.local:3128 \ -t "${IMAGE}/${BASEOS}:${VERSION}_${BRANCH}" \ -f "docker/${IMAGE}/${BASEOS}/Dockerfile" . # /usr/bin/docker tag "${IMAGE}/${BASEOS}:${VERSION}_${BRANCH}" "${REGISTRY}/${IMAGE}/${BASEOS}:${VERSION}_${BRANCH}" diff --git a/docker/gulag-server/debian/Dockerfile b/docker/gulag-server/debian/Dockerfile index 0b4dcb6..5aea77e 100644 --- a/docker/gulag-server/debian/Dockerfile +++ b/docker/gulag-server/debian/Dockerfile @@ -1,6 +1,6 @@ ARG http_proxy ARG https_proxy -FROM debian +FROM debian:buster LABEL maintainer="Dominik Chilla" ENV DEBIAN_FRONTEND=noninteractive \ @@ -10,10 +10,10 @@ RUN env; set -ex ; \ apt-get -qq update \ && apt-get -qq --no-install-recommends install \ uwsgi-plugin-python3 python3-setuptools python3-flask \ - python3-flask-restful python3-mysql.connector \ + python3-flask-restful \ uwsgi uwsgi-plugin-python3 procps net-tools \ python3-pip libmagic1 python3-ssdeep \ - && pip3 install python-magic \ + && pip3 install python-magic mysql-connector \ && /bin/mkdir /config /socket /app \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/snippets/IMAP_commands.txt b/snippets/IMAP_commands.txt new file mode 100644 index 0000000..1b7a77a --- /dev/null +++ b/snippets/IMAP_commands.txt @@ -0,0 +1,11 @@ +# add multiple flags +. store 1365 +FLAGS blah blah2 + +# remove a flag +. store 1365 -FLAGS blah + +# replace flags +. store 1365 FLAGS blah3 + +# search by multiple missing flags +. search UNKEYWORD asdfasdfasdf UNKEYWORD blah2