attachment handling with magic and sub-URI parsing

This commit is contained in:
Dominik Chilla 2018-12-16 23:35:10 +01:00
parent 5723b18367
commit acda4c8e11
7 changed files with 62 additions and 23 deletions

View File

@ -97,6 +97,7 @@ class QuarMail:
msg_size = None
href = None
attach_count = None
uri_count = None
def __init__(self,qm_ref):
if 'id' not in qm_ref:
@ -139,6 +140,8 @@ class QuarMail:
self.href = qm_ref['href']
if 'attach_count' in qm_ref:
self.attach_count = qm_ref['attach_count']
if 'uri_count' in qm_ref:
self.uri_count = qm_ref['uri_count']
class AttachmentException(Exception):
message = None
@ -150,6 +153,7 @@ class Attachment:
filename = None
content_type = None
content_encoding = None
magic = None
comment = None
mailbox_id = None
imap_uid = None
@ -167,6 +171,9 @@ class Attachment:
self.content_type = at_ref['content_type']
if 'content_encoding' in at_ref:
self.content_encoding = at_ref['content_encoding']
if 'magic' not in at_ref:
raise AttachmentException("'magic' is mandatory!")
self.magic = at_ref['magic']
if 'comment' in at_ref:
self.comment = at_ref['comment']
if 'mailbox_id' not in at_ref:

View File

@ -1,4 +1,4 @@
import json,sys,os,logging,re
import json,sys,os,logging,re,magic
import email,email.header,email.message
from GulagDB import GulagDB,GulagDBException
from GulagMailbox import IMAPmailbox,IMAPmailboxException
@ -30,6 +30,7 @@ class Gulag:
raise GulagException(whoami(self) + "Logging not configured!")
if('filename' in self.config['logging'] and
len(self.config['logging']['filename']) > 0):
# TODO: Exception handling
logging.basicConfig(
filename=self.config['logging']['filename'],
format='%(asctime)s %(levelname)s %(message)s',
@ -148,10 +149,16 @@ class Gulag:
else:
# filename isn´t encoded
filename = filename[0][0]
attach_magic = None
try:
attach_magic = magic.from_buffer(part.get_payload(decode=True))
except:
logging.info(whoami(self) + ": " + str(sys.exc_info()))
attach_id = self.db.add_attachment({
'filename': filename,
'content_type': part.get_content_type(),
'content_encoding': part['Content-Transfer-Encoding']
'content_encoding': part['Content-Transfer-Encoding'],
'magic': attach_magic
})
attachments.append(attach_id)
# End if part.get_filename()
@ -168,6 +175,9 @@ class Gulag:
for quarmail_id in quarmail_ids:
for attachment_id in attachments:
self.db.quarmail2attachment(str(quarmail_id), str(attachment_id))
logging.info(whoami(self) +
"Attachment("+str(attachment_id)+")@QuarMail("+str(quarmail_id)+") imported"
)
# link message with uris
if(len(uris) > 0):
for quarmail_id in quarmail_ids:
@ -178,6 +188,9 @@ class Gulag:
"fqdn": extract_fqdn(uri)
})
self.db.quarmail2uri(str(quarmail_id), str(uri_id))
logging.info(whoami(self) +
"URI("+str(uri_id)+")@QuarMail("+str(quarmail_id)+") imported"
)
except GulagDBException as e:
logging.error(whoami(self) + e.message)
# End for(unseen)

View File

@ -185,7 +185,9 @@ class GulagDB:
try:
cursor = self.conn.cursor()
query = "select *,(select count(*) from QuarMail2Attachment"
query += " where QuarMails.id=QuarMail2Attachment.quarmail_id) as attach_count"
query += " where QuarMails.id=QuarMail2Attachment.quarmail_id) as attach_count,"
query += " (select count(*) from QuarMail2URI"
query += " where QuarMails.id=QuarMail2URI.quarmail_id) as uri_count"
query += " from QuarMails " + self.get_where_clause(args)
query += " " + self.get_limit_clause(args) + " ;"
cursor.execute(query)
@ -213,10 +215,10 @@ class GulagDB:
def get_quarmail(self,args):
try:
cursor = self.conn.cursor()
# TODO: build SQL query by args
#query = "select * from QuarMails where id='" + args['id'] + "';"
query = "select *,(select count(*) from QuarMail2Attachment"
query += " where QuarMails.id=QuarMail2Attachment.quarmail_id) as attach_count"
query += " where QuarMails.id=QuarMail2Attachment.quarmail_id) as attach_count,"
query += " (select count(*) from QuarMail2URI"
query += " where QuarMails.id=QuarMail2URI.quarmail_id) as uri_count"
query += " from QuarMails where QuarMails.id="+ str(args['id']) +";"
cursor.execute(query)
data = cursor.fetchall()
@ -266,8 +268,9 @@ class GulagDB:
try:
cursor = self.conn.cursor()
cursor.execute("insert into Attachments " +
"(filename, content_type, content_encoding) values (%s,%s,%s)",
(attach['filename'], attach['content_type'], attach['content_encoding'])
"(filename,content_type,content_encoding,magic) values (%s,%s,%s,%s)",
(attach['filename'],attach['content_type'],
attach['content_encoding'],attach['magic'])
)
return cursor.lastrowid
except mariadb.Error as e:

View File

@ -1,4 +1,5 @@
import sys,re
import sys,re,urllib
from urllib.parse import urlparse
from smtplib import SMTP
def whoami(obj):
@ -6,7 +7,7 @@ def whoami(obj):
def send_mail(args):
try:
# FIXME: SMTP tranaport security and authentication!
# FIXME: SMTP transport security and authentication!
# with SMTP(host=mailbox['smtp_server'],port=mailbox['smtp_port']) as smtp:
# try:
# smtp.sendmail(
@ -20,17 +21,22 @@ def send_mail(args):
except TimeoutError as e:
raise Exception('xyz') from e
def extract_uris(string):
def extract_uris(input_text):
uris = {}
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
for m in re.finditer(uri_pattern, string):
uris[m.group(0)] = {}
for m in re.finditer(uri_pattern, input_text):
uri = urllib.parse.unquote(m.group(0))
uris[uri] = {}
# extract sub-URIs (google redirector: https://www.google.de/url?sa=t&url=...)
for m2 in re.finditer(uri_pattern, uri):
suburi = urllib.parse.unquote(m2.group(0))
uris[suburi] = {"suburi": True}
return uris
def extract_fqdn(uri):
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
if(re.match(uri_pattern,uri)):
m = re.match(r'https?:\/\/([^:\/]+)', uri)
return m.group(1)
else:
puri = None
try:
puri = urlparse(uri)
return puri.hostname
except ValueError as e:
return None

View File

@ -46,6 +46,7 @@ create table Attachments (
filename varchar(256) not null,
content_type varchar(256) not null,
content_encoding varchar(64),
magic varchar(128),
comment varchar(256)
)ENGINE = InnoDB;

View File

@ -5,11 +5,13 @@ ENV DEBIAN_FRONTEND=noninteractive \
TZ=Europe/Berlin
RUN set -ex ; \
apt-get -qq update \
&& apt-get -qq --no-install-recommends install \
uwsgi-plugin-python3 python3-setuptools python3-flask \
python3-flask-restful python3-mysql.connector \
uwsgi uwsgi-plugin-python3 procps net-tools
apt-get -qq update \
&& apt-get -qq --no-install-recommends install \
uwsgi-plugin-python3 python3-setuptools python3-flask \
python3-flask-restful python3-mysql.connector \
uwsgi uwsgi-plugin-python3 procps net-tools \
python3-pip libmagic1 \
&& pip3 install python-magic
RUN /bin/mkdir /config /socket /app
COPY app/*.py /app/

View File

@ -307,6 +307,9 @@ definitions:
attach_count:
type: integer
description: number of attachments
uri_count:
type: integer
description: number of uris
rfc822_message:
type: string
description: full RFC822 email message
@ -317,6 +320,7 @@ definitions:
- filename
- content_encoding
- content_type
- magic
- mailbox_id
- imap_uid
- href
@ -336,6 +340,9 @@ definitions:
content_type:
type: string
example: image/jpeg
magic:
type: string
example: "PDF document, version 1.2"
href:
type: string
description: hypermedia