import processing with keyword tagging instead of flagging SEEN/UNSEEN

This commit is contained in:
Dominik Chilla 2019-04-24 20:39:13 +02:00
parent 437f2e2ac2
commit ce0666f23b
9 changed files with 148 additions and 50 deletions

View File

@ -56,9 +56,7 @@ class Mailbox:
imap_security = None
imap_user = None
imap_pass = None
imap_mailbox = None
imap_mailbox_fp = None
imap_separator = None
imap_inbox = None
mailrelay_id = None
comment = None
href = None
@ -90,15 +88,9 @@ class Mailbox:
if 'imap_pass' not in mb_ref:
raise MailboxException("'imap_pass' is mandatory!")
self.imap_pass = mb_ref['imap_pass']
if 'imap_mailbox' not in mb_ref:
raise MailboxException("'imap_mailbox' is mandatory!")
self.imap_mailbox = mb_ref['imap_mailbox']
if 'imap_mailbox_fp' not in mb_ref:
raise MailboxException("'imap_mailbox_fp' is mandatory!")
self.imap_mailbox_fp = mb_ref['imap_mailbox_fp']
if 'imap_separator' not in mb_ref:
raise MailboxException("'imap_separator' is mandatory!")
self.imap_seperator = mb_ref['imap_separator']
if 'imap_inbox' not in mb_ref:
raise MailboxException("'imap_inbox' is mandatory!")
self.imap_inbox = mb_ref['imap_inbox']
if 'mailrelay_id' not in mb_ref:
raise MailboxException("'mailrelay_id' is mandatory!")
self.mailrelay_id = mb_ref['mailrelay_id']

View File

@ -1,5 +1,6 @@
import json, sys,os,logging,re,magic
import email,email.header,email.message
from email import policy
from GulagDB import (
GulagDB,GulagDBException,GulagDBNotFoundException,GulagDBBadInputException
)
@ -65,6 +66,15 @@ class Gulag:
except GulagDBException as e:
logging.warning(whoami(self) + e.message)
raise GulagException(whoami(self) + e.message) from e
# Init mailboxes/folders
for mailbox in self.db.get_mailboxes():
try:
imap_mb = IMAPmailbox(mailbox)
imap_mb.init_folders()
imap_mb.close
except IMAPmailboxException as e:
logging.warning(whoami(self) + e.message)
continue
def check_filters(self,fields_target,filters):
if fields_target not in self.fields:
@ -100,20 +110,27 @@ class Gulag:
messages = []
try:
imap_mb = IMAPmailbox(mailbox)
messages = imap_mb.get_unseen_messages()
messages = imap_mb.get_new_messages()
except IMAPmailboxException as e:
logging.warning(whoami(self) + e.message)
continue
for unseen in messages:
for message in messages:
quarmail_ids = []
attachments = []
uris = {}
uid = unseen['imap_uid']
msg = email.message_from_bytes(unseen['msg'])
uid = message['imap_uid']
msg = email.message_from_bytes(message['msg'])
source_id = 'amavis'
if 'X-Gulag-Source' in msg:
source_id = email.header.decode_header(msg['X-Gulag-Source'])[0][0]
r5321_from = email.header.decode_header(msg['Return-Path'])[0][0]
try:
r5321_from = email.header.decode_header(msg['Return-Path'])[0][0]
except:
logging.warning(whoami(self) +
"Failed to get return-path header! Moving message to failed folder!"
)
imap_mb.move_message(str(uid.decode()), 'failed')
continue
if(r5321_from is not '<>'):
r5321_from = r5321_from.replace("<","")
r5321_from = r5321_from.replace(">","")
@ -122,17 +139,21 @@ class Gulag:
r5321_rcpts = email.header.decode_header(
msg['X-Envelope-To-Blocked'])[0][0]
except:
# TODO: move_message to INBOX.failed
logging.warning(whoami(self) +
"Failed to extract envelope recipients! Skipping mail"
"Failed to extract envelope recipients! Moving message to failed folder!"
)
imap_mb.move_message(str(uid.decode()), 'failed')
continue
r5322_from = None
try:
r5322_from = email.header.decode_header(msg['From'])[0][0]
except:
# TODO: move_message to INBOX.failed
logging.warning(whoami(self) +
"Failed to extract from header! Skipping mail"
"Failed to extract from header! Moving message to failed folder!"
)
imap_mb.move_message(str(uid.decode()), 'failed')
continue
subject = email.header.decode_header(msg['Subject'])[0][0]
msg_id = None
@ -155,6 +176,13 @@ class Gulag:
r5321_rcpts = r5321_rcpts.replace(" ", "")
r5321_rcpts = r5321_rcpts.replace("<", "")
r5321_rcpts = r5321_rcpts.replace(">", "")
try:
msg_serialized = msg.as_string()
except LookupError:
# LookupError: unknown encoding: _iso-2022-jp$esc
# https://github.com/coddingtonbear/django-mailbox/commit/aa59199c9b98ed317c6c95dc4018e21d1302858c
msg.set_payload(msg.get_payload(decode=True).decode('ascii','ignore'))
msg_serialized = msg.as_string()
# Pro Envelope-RCPT einen Eintrag in die DB schreiben.
# Die E-Mail im IMAP-Backend existiert jedoch nur ein Mal und wird
# über die mailbox_id sowie die imap_uid mehrfach referenziert.
@ -165,9 +193,9 @@ class Gulag:
'env_rcpt': r5321_rcpt, 'hdr_cf': x_spam_status,
'hdr_from': r5322_from, 'hdr_subject': subject,
'hdr_msgid': msg_id, 'hdr_date': date, 'cf_meta': 'cf_meta',
'mailbox_id': 'quarantine@zwackl.de', 'imap_uid': uid,
'source_id': source_id, 'msg_size': len(msg.as_string()),
'ssdeep': ssdeep.hash(msg.as_string())
'mailbox_id': mailbox['id'], 'imap_uid': uid,
'source_id': source_id, 'msg_size': len(msg_serialized),
'ssdeep': ssdeep.hash(msg_serialized)
})
except GulagDBBadInputException as e:
logging.warn(whoami(self) + e.message)
@ -180,6 +208,8 @@ class Gulag:
)
quarmail_ids.append(quarmail_id)
# End for rcpts
# Tag message as 'gulag_quarantined' in IMAP backend
imap_mb.retag_message(uid, 'gulag_quarantined')
# Iterate through all MIME-parts and extract all
# attachments (parts with a name/filename attribute)
for part in msg.walk():
@ -193,12 +223,18 @@ class Gulag:
# filename isn´t encoded
filename = filename[0][0]
attach_decoded = part.get_payload(decode=True)
try:
mgc = magic.from_buffer(attach_decoded)
mime_type = magic.from_buffer(attach_decoded, mime=True)
except TypeError as e:
logging.warning(whoami(self) + str(e))
continue
attach_id = self.db.add_attachment({
'filename': filename,
'content_type': part.get_content_type(),
'content_encoding': part['Content-Transfer-Encoding'],
'magic': magic.from_buffer(attach_decoded),
'mime_type': magic.from_buffer(attach_decoded, mime=True),
'magic': mgc,
'mime_type': mime_type,
'sha256': hashlib.sha256(attach_decoded).hexdigest(),
'ssdeep': ssdeep.hash(attach_decoded),
'size': len(attach_decoded)
@ -239,7 +275,7 @@ class Gulag:
)
except GulagDBException as e:
logging.error(whoami(self) + e.message)
# End for(unseen)
# End for(messages)
imap_mb.close()
# End for get_mailboxes
@ -425,12 +461,12 @@ class Gulag:
mailrelay = GulagMailrelay(mailrelay_ref)
mailrelay.release_quarmail(quarmail)
logging.info(whoami(self) +
"QuarMail("+quarmail['id']+") released. env_rcpt: "+quarmail['env_rcpt']
"QuarMail("+str(quarmail['id'])+") released. env_rcpt: "+quarmail['env_rcpt']
)
if 'purge' in args:
self.delete_quarmail({"quarmail_id": args['quarmail_id']})
logging.info(whoami(self) +
"QuarMail(" + quarmail['id'] + ") deleted"
"QuarMail(" + str(quarmail['id']) + ") deleted"
)
except GulagNotFoundException as e:
raise GulagNotFoundException(whoami(self) + e.message) from e
@ -454,12 +490,12 @@ class Gulag:
mailrelay = GulagMailrelay(mailrelay_ref)
mailrelay.bounce_quarmail(quarmail)
logging.info(whoami(self) +
"QuarMail("+quarmail['id']+") bounced back to "+quarmail['env_from']
"QuarMail("+str(quarmail['id'])+") bounced back to "+quarmail['env_from']
)
if 'purge' in args:
self.delete_quarmail({"quarmail_id": args['quarmail_id']})
logging.info(whoami(self) +
"QuarMail(" + quarmail['id'] + ") deleted"
"QuarMail(" + str(quarmail['id']) + ") deleted"
)
except GulagNotFoundException as e:
raise GulagNotFoundException(whoami(self) + e.message) from e

View File

@ -263,7 +263,8 @@ class GulagDB:
"(mx_queue_id,env_from,env_rcpt,"+
"hdr_cf,hdr_from,hdr_subject,"+
"hdr_msgid,hdr_date,cf_meta,"+
"mailbox_id,imap_uid,msg_size,ssdeep,source_id) " +
"mailbox_id,imap_uid,msg_size,ssdeep,"+
"source_id) " +
"values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
(quarmail['mx_queue_id'],quarmail['env_from'],quarmail['env_rcpt'],
quarmail['hdr_cf'],quarmail['hdr_from'],quarmail['hdr_subject'],

View File

@ -5,6 +5,8 @@ from email.parser import HeaderParser
import time
import re
from GulagUtils import whoami
import logging
class IMAPmailboxException(Exception):
message = None
@ -16,15 +18,20 @@ class IMAPmailbox:
imap_server = None
imap_user = None
imap_pass = None
imap_mailbox = None
imap_inbox = None
mailbox = None
tags = (
'gulag_quarantined',
'gulag_released',
'gulag_bounced'
)
def __init__(self, mb_ref):
self.id = mb_ref['id']
self.imap_server = mb_ref['imap_server']
self.imap_user = mb_ref['imap_user']
self.imap_pass = mb_ref['imap_pass']
self.imap_mailbox = mb_ref['imap_mailbox']
self.imap_inbox = mb_ref['imap_inbox']
try:
self.mailbox = imaplib.IMAP4(self.imap_server)
rv, data = self.mailbox.login(self.imap_user, self.imap_pass)
@ -36,20 +43,50 @@ class IMAPmailbox:
raise IMAPmailboxException(whoami(self) +
self.imap_user + ": IMAP server " + self.imap_server + " refused connection"
) from e
rv, data = self.mailbox.select(self.imap_mailbox)
rv, data = self.mailbox.select(self.imap_inbox)
if rv != 'OK':
raise IMAPmailboxException(whoami(self) +
"ERROR: Unable to select mailbox: " + self.imap_mailbox
"ERROR: Unable to select mailbox: " + self.imap_inbox
)
def init_folders(self):
# Check for all mandatory folders
mandatory_folders = {
"failed": False
}
rv, data = self.mailbox.list('""', '*')
if rv != 'OK':
raise IMAPmailboxException(whoami(self) +
"ERROR: Unable to list mailbox: " + self.imap_inbox
)
for folder in data:
# (\HasChildren \Trash) "." Trash
p = re.compile(r'^.+".+" (\S+)$')
m = p.search(folder.decode())
name = m.group(1)
if name == 'failed':
mandatory_folders['failed'] = True
# create mandatory folders if needed
for folder in mandatory_folders:
if mandatory_folders[folder] == False:
rv, data = self.mailbox.create(folder)
if rv != 'OK':
raise IMAPmailboxException(whoami(self) +
"ERROR: Unable to create folder: " + folder
)
def close(self):
self.mailbox.close()
self.mailbox.logout()
def get_unseen_messages(self):
def get_new_messages(self):
results = []
rv, data = self.mailbox.uid('SEARCH', 'UNSEEN')
search_criteria = str(
'UNKEYWORD gulag_quarantined'
+ ' UNKEYWORD gulag_released'
+ ' UNKEYWORD gulag_bounced'
)
rv, data = self.mailbox.uid('SEARCH', search_criteria)
if rv != 'OK':
return
for uid in data[0].split():
@ -65,11 +102,16 @@ class IMAPmailbox:
return results
def add_message(self,message,unseen=False):
rv, data = self.mailbox.select(self.imap_inbox)
if rv != 'OK':
raise IMAPmailboxException(whoami(self) +
"ERROR: Unable to select mailbox: " + self.imap_inbox
)
flags = ''
if(unseen == True):
flags = 'UNSEEN'
rv, data = self.mailbox.append(
self.imap_mailbox,
self.imap_inbox,
flags ,
imaplib.Time2Internaldate(time.time()),
str(message).encode('utf-8')
@ -91,6 +133,21 @@ class IMAPmailbox:
)
return data[0][1]
def move_message(self,imap_uid,dest_mbox):
rv, data = self.mailbox.uid('MOVE', str(imap_uid), dest_mbox)
if rv != 'OK':
raise IMAPmailboxException(whoami(self) +
"ERROR moving message: %s", str(imap_uid)
)
def retag_message(self,imap_uid,tag):
logging.info(whoami(self) + "UID: " + str(imap_uid))
rv, data = self.mailbox.uid('STORE', str(imap_uid.decode()), 'FLAGS', tag)
if rv != 'OK':
raise IMAPmailboxException(whoami(self) +
"ERROR flagging message for deletion: %s", str(imap_uid)
)
def delete_message(self,imap_uid):
rv, data = self.mailbox.uid('STORE', str(imap_uid), '+FLAGS', '(\\Deleted)')
if rv != 'OK':

View File

@ -2,6 +2,7 @@
import argparse,sys,os,time,signal,logging
from Gulag import Gulag,GulagException
import traceback
parser = argparse.ArgumentParser()
parser.add_argument('--config', required=True, help="Path to config file")
@ -14,8 +15,10 @@ if(importer_pid == 0):
try:
gulag = Gulag(args.config)
except GulagException as e:
print(e.message)
logging.info("Gulag-Importer Exception: " + e.message)
sys.exit(1)
except:
logging.info("Gulag-Importer Exception: " + str(sys.exc_info()))
logging.info("Gulag-Importer: starting")
while True:
try:
@ -23,7 +26,7 @@ if(importer_pid == 0):
except GulagException as e:
logging.error("Gulag-Importer-Exception: " + e.message)
except:
logging.error("Gulag-Importer-Exception: " + str(sys.exc_info()))
logging.error("Gulag-Importer-Exception: " + traceback.format_exc())
time.sleep(gulag.config['importer']['interval'])
cleaner_pid = os.fork()
@ -41,7 +44,7 @@ if(cleaner_pid == 0):
except GulagException as e:
logging.info("Cleaner-Exception: " + e.message)
except:
logging.info("Cleaner-Exception: " + str(sys.exc_info()))
logging.info("Cleaner-Exception: " + traceback.format_exc())
time.sleep(gulag.config['cleaner']['interval'])
# Parent
@ -51,7 +54,7 @@ try:
while True:
time.sleep(10)
except:
logging.info("Helpers MAIN-EXCEPTION: " + str(sys.exc_info()))
logging.info("Helpers MAIN-EXCEPTION: " + traceback.format_exc())
# Destroy childs
for child_pid in child_pids:
logging.info("Helpers parent: Killing child pid: %s", child_pid)

View File

@ -21,9 +21,7 @@ create table Mailboxes(
imap_security varchar(32) not null default 'plain' collate 'ascii_general_ci',
imap_user varchar(256) not null collate 'ascii_general_ci',
imap_pass varchar(1024) not null collate 'ascii_general_ci',
imap_mailbox varchar(256) not null default 'INBOX',
imap_mailbox_fp varchar(256) not null default 'false-positives',
imap_separator varchar(4) not null default '/',
imap_inbox varchar(256) not null default 'INBOX',
mailrelay_id varchar(128) not null,
foreign key (mailrelay_id) references Mailrelays (id) on update cascade on delete restrict,
comment varchar(256) default null

View File

@ -21,10 +21,10 @@ fi
IMAGES="gulag-server gulag-db"
# --build-arg http_proxy=http://wprx-zdf.zwackl.local:3128 \
# --build-arg https_proxy=http://wprx-zdf.zwackl.local:3128 \
for IMAGE in ${IMAGES}; do
/usr/bin/docker build \
--build-arg http_proxy=http://wprx-zdf.zwackl.local:3128 \
--build-arg https_proxy=http://wprx-zdf.zwackl.local:3128 \
-t "${IMAGE}/${BASEOS}:${VERSION}_${BRANCH}" \
-f "docker/${IMAGE}/${BASEOS}/Dockerfile" .
# /usr/bin/docker tag "${IMAGE}/${BASEOS}:${VERSION}_${BRANCH}" "${REGISTRY}/${IMAGE}/${BASEOS}:${VERSION}_${BRANCH}"

View File

@ -1,6 +1,6 @@
ARG http_proxy
ARG https_proxy
FROM debian
FROM debian:buster
LABEL maintainer="Dominik Chilla"
ENV DEBIAN_FRONTEND=noninteractive \
@ -10,10 +10,10 @@ RUN env; set -ex ; \
apt-get -qq update \
&& apt-get -qq --no-install-recommends install \
uwsgi-plugin-python3 python3-setuptools python3-flask \
python3-flask-restful python3-mysql.connector \
python3-flask-restful \
uwsgi uwsgi-plugin-python3 procps net-tools \
python3-pip libmagic1 python3-ssdeep \
&& pip3 install python-magic \
&& pip3 install python-magic mysql-connector \
&& /bin/mkdir /config /socket /app \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

View File

@ -0,0 +1,11 @@
# add multiple flags
. store 1365 +FLAGS blah blah2
# remove a flag
. store 1365 -FLAGS blah
# replace flags
. store 1365 FLAGS blah3
# search by multiple missing flags
. search UNKEYWORD asdfasdfasdf UNKEYWORD blah2