URI/FQDN extraction

This commit is contained in:
Dominik Chilla 2018-12-15 02:01:07 +01:00
parent a50740d60f
commit 70550c4021
9 changed files with 243 additions and 16 deletions

View File

@ -178,3 +178,27 @@ class Attachment:
if 'href' in at_ref:
self.href = at_ref['href']
class URIException(Exception):
message = None
def __init__(self,message):
self.message = message
class URI:
id = None
uri = None
fqdn = None
href = None
def __init__(self,uri_ref):
if 'id' not in uri_ref:
raise URIException("'id' is mandatory!")
self.id = uri_ref['id']
if 'uri' not in uri_ref:
raise URIException("'uri' is mandatory!")
self.uri = uri_ref['uri']
if 'fqdn' not in uri_ref:
raise URIException("'fqdn' is mandatory!")
self.fqdn = uri_ref['fqdn']
if 'href' in uri_ref:
self.href = uri_ref['href']

View File

@ -1,8 +1,8 @@
import json,sys,os,logging
import json,sys,os,logging,re
import email,email.header,email.message
from GulagDB import GulagDB,GulagDBException
from GulagMailbox import IMAPmailbox,IMAPmailboxException
from GulagUtils import whoami
from GulagUtils import whoami,extract_uris,extract_fqdn
class GulagException(Exception):
message = None
@ -78,6 +78,7 @@ class Gulag:
for unseen in imap_mb.get_unseen_messages():
quarmail_ids = []
attachments = []
uris = {}
uid = unseen['imap_uid']
msg = email.message_from_bytes(unseen['msg'])
msg_size = len(msg)
@ -154,12 +155,30 @@ class Gulag:
})
attachments.append(attach_id)
# Ende if part.get_filename()
# get all URIs
ctype = part.get_content_type()
if(ctype == 'text/plain' or ctype == 'text/html'):
curis = {}
curis = extract_uris(part.get_payload(decode=True).decode("utf-8"))
if(len(curis) > 0):
uris = {**uris, **curis}
# Ende for msg.walk()
# QuarMail und Attachments verknüpfen
if(len(attachments) > 0):
for quarmail_id in quarmail_ids:
for attachment_id in attachments:
self.db.quarmail2attachment(str(quarmail_id), str(attachment_id))
if(len(uris) > 0):
for quarmail_id in quarmail_ids:
for uri in uris:
try:
uri_id = self.db.add_uri({
"uri": uri,
"fqdn": extract_fqdn(uri)
})
self.db.quarmail2uri(str(quarmail_id), str(uri_id))
except GulagDBException as e:
logging.error(whoami(self) + e.message)
# Ende for(unseen)
imap_mb.close()
# Ende for get_mailboxes
@ -256,9 +275,37 @@ class Gulag:
if 'data' not in args:
return at_db
def get_uris(self):
# https://stackoverflow.com/questions/1792366/extract-urls-out-of-email-in-python
return True
def get_quarmail_uris(self,args):
if('from_rfc822_message' not in args):
try:
return self.db.get_quarmail_uris(args['quarmail_id'])
except GulagDBException as e:
raise GulagException(whoami(self) + e.message) from e
qm_db = None
try:
qm_db = self.db.get_quarmail({"id": args['quarmail_id']})
except GulagDBException as e:
logging.warning(whoami(self) + e.message)
raise GulagException(whoami(self) + e.message) from e
mailbox = None
try:
mailbox = self.db.get_mailbox(qm_db['mailbox_id'])
except GulagDBException as e:
logging.warning(whoami(self) + e.message)
raise GulagException(whoami(self) + e.message) from e
imap_mb = None
try:
imap_mb = IMAPmailbox(mailbox)
mparts = imap_mb.get_main_parts(qm_db['imap_uid'])
uris = []
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
for part in mparts:
for m in re.finditer(uri_pattern, part.decode("utf-8")):
uris.append(m.group(0))
return uris
except IMAPmailboxException as e:
logging.warning(whoami(self) + e.message)
raise GulagException(whoami(self) + e.message) from e
def rspamd_http2imap(self,args):
mailbox = None
@ -302,7 +349,7 @@ class Gulag:
)
logging.error(err)
raise GulagException(err)
if('rfc822_message' not in args['rfc822_message']):
if('rfc822_message' not in args):
err = str(whoami(self)
+ "Missing rfc822_message!"
)

View File

@ -2,7 +2,7 @@ import mysql.connector as mariadb
from Entities import(
Mailbox,MailboxException,QuarMail,
QuarMailException,Attachment,
AttachmentException
AttachmentException,URI,URIException
)
from GulagUtils import whoami
@ -99,6 +99,11 @@ class GulagDB:
cnt += 1
return where_clause
def parse_filters(self,filters):
# TODO
# {"groupOp":"AND","rules":[{"field":"Customer","op":"eq","data":"eosp"}]}
return True
def get_mailboxes(self):
try:
cursor = self.conn.cursor()
@ -217,7 +222,7 @@ class GulagDB:
data = cursor.fetchall()
if not data:
raise GulagDBException(whoami(self)
+ "Quarmail with id '"+ args['id'] + "' does not exist!"
+ "Quarmail with id '"+ str(args['id']) + "' does not exist!"
)
desc = cursor.description
cursor.close()
@ -380,3 +385,61 @@ class GulagDB:
except mariadb.Error as e:
raise GulagDBException(whoami(self) + str(e)) from e
def add_uri(self,args):
try:
cursor = self.conn.cursor()
cursor.execute("insert into URIs " +
"(uri, fqdn) values (%s,%s)",
(args['uri'], args['fqdn'])
)
return cursor.lastrowid
except mariadb.Error as e:
raise GulagDBException(whoami(self) + str(e)) from e
def del_uri(self,uri_id):
try:
cursor = self.conn.cursor()
cursor.execute(
"delete from URIs where uri_id=" + uri_id + ";"
)
return cursor.lastrowid
except mariadb.Error as e:
raise GulagDBException(whoami(self) + str(e)) from e
def quarmail2uri(self,quarmail_id,uri_id):
try:
cursor = self.conn.cursor()
cursor.execute("insert into QuarMail2URI " +
"(quarmail_id, uri_id) values (%s,%s)",
(quarmail_id, uri_id)
)
except mariadb.Error as e:
raise GulagDBException(whoami(self) + str(e)) from e
def get_quarmail_uris(self,quarmail_id):
try:
query = "select URIs.*"
query += " from QuarMail2URI"
query += " left join QuarMails ON QuarMails.id = QuarMail2URI.quarmail_id"
query += " left join URIs ON URIs.id = QuarMail2URI.uri_id"
query += " where QuarMails.id = " + str(quarmail_id) + ";"
cursor = self.conn.cursor()
cursor.execute(query)
results = []
data = cursor.fetchall()
if not data:
raise GulagDBException(whoami(self)
+ "QuarMail("+ str(quarmail_id) +") has no uris!"
)
desc = cursor.description
for tuple in data:
dict = {}
for (name, value) in zip(desc, tuple):
dict[name[0]] = value
dict['href'] = self.uri_prefixes['quarmails'] + str(quarmail_id)
dict['href'] += "/uris/" + str(dict['id'])
results.append(URI(dict).__dict__)
return results
except mariadb.Error as e:
raise GulagDBException(whoami(self) + str(e)) from e

View File

@ -91,6 +91,19 @@ class IMAPmailbox:
+ str(self.email_address) + " not found!"
)
def get_main_parts(self,imap_uid):
msg = email.message_from_bytes(self.get_message(imap_uid))
mparts = []
for part in msg.walk():
ctype = part.get_content_type()
if(ctype == 'text/plain' or ctype == 'text/html'):
mparts.append(part.get_payload(decode=True))
if(len(mparts) > 0):
return mparts
raise IMAPmailboxException(whoami(self) +
"IMAP_UID(" + str(imap_uid)+")@"+str(self.email_address)+" has no main parts!"
)
def append_message(self,message):
rv, data = self.mailbox.append(
self.imap_mailbox,

View File

@ -1,4 +1,4 @@
import sys
import sys,re
from smtplib import SMTP
def whoami(obj):
@ -20,3 +20,17 @@ def send_mail(args):
except TimeoutError as e:
raise Exception('xyz') from e
def extract_uris(string):
uris = {}
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
for m in re.finditer(uri_pattern, string):
uris[m.group(0)] = {}
return uris
def extract_fqdn(uri):
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
if(re.match(uri_pattern,uri)):
m = re.match(r'https?:\/\/([^:\/]+)', uri)
return m.group(1)
else:
return None

View File

@ -82,6 +82,18 @@ class ResQuarMailAttachment(GulagResource):
except GulagException as e:
abort(400, message=e.message)
class ResQuarMailURIs(GulagResource):
def get(self,quarmail_id):
args = {
"quarmail_id": quarmail_id
}
if(request.args.get('from_rfc822_message')):
args['from_rfc822_message'] = True
try:
return self.gulag.get_quarmail_uris(args)
except GulagException as e:
abort(400, message=e.message)
class ResAttachments(GulagResource):
def get(self):
return {"resource": "Attachments"}

View File

@ -7,7 +7,7 @@ from Gulag import Gulag,GulagException
from Resources import (ResRoot,ResMailboxes,
ResQuarMails,ResQuarMail,ResQuarMailAttachments,
ResQuarMailAttachment,ResAttachments,ResAttachment,
ResRSPAMDImporter
ResRSPAMDImporter,ResQuarMailURIs
)
parser = argparse.ArgumentParser()
parser.add_argument('--config', required=True, help="Path to config file")
@ -44,6 +44,10 @@ try:
'/api/v1/quarmails/<int:quarmail_id>/attachments/<int:attachment_id>',
resource_class_kwargs={'gulag_object': gulag}
)
api.add_resource(ResQuarMailURIs,
'/api/v1/quarmails/<int:quarmail_id>/uris',
resource_class_kwargs={'gulag_object': gulag}
)
api.add_resource(ResAttachments,
'/api/v1/attachments',
resource_class_kwargs={'gulag_object': gulag}

View File

@ -20,10 +20,6 @@ create table Mailboxes(
smtp_pass varchar(2048) default null,
comment varchar(256) default null
)ENGINE = InnoDB;
insert into Mailboxes (email_address,name,imap_user,imap_pass)
values('quarantine-in@example.org','E-Mail inbound quarantine','quarantine-in','quarantine-in_secure_password');
insert into Mailboxes (email_address,name,imap_user,imap_pass)
values('quarantine-out@example.org','E-Mail outbound quarantine','quarantine-out','quarantine-out_secure_password');
insert into Mailboxes (email_address,name,imap_user,imap_pass)
values('quarantine-sandbox@example.org','E-Mail sandbox quarantine','quarantine-sb','quarantine-sb_secure_password');
@ -60,3 +56,15 @@ create table QuarMail2Attachment (
foreign key (attachment_id) references Attachments (id) on delete cascade on update cascade
)ENGINE = InnoDB;
create table URIs (
id int unsigned auto_increment primary key,
uri varchar(2048),
fqdn varchar(512)
)ENGINE = InnoDB;
create table QuarMail2URI (
quarmail_id int unsigned,
uri_id int unsigned,
foreign key (quarmail_id) references QuarMails (id) on delete cascade on update cascade,
foreign key (uri_id) references URIs (id) on delete cascade on update cascade
)ENGINE = InnoDB;

View File

@ -1,7 +1,7 @@
swagger: '2.0'
info:
description: Gulag quarantine REST API
version: '18.12'
version: "1.0.0"
title: Gulag quarantine REST API
contact:
email: info@dc-it-con.de
@ -208,6 +208,35 @@ paths:
500:
description: server error
/quarmails/{quarmail_id}/uris:
get:
summary: "retrieves all URIS from any main MIME part (text/plain,text/html)"
operationId: get_quarmail_uris
produces:
- application/json
parameters:
- in: path
name: quarmail_id
description: unique id of quarantined email
required: true
type: string
- in: query
name: from_rfc822_message
description: fetch all URIs from RFC822 message not from database
required: false
type: string
responses:
200:
description: array of URIs
schema:
type: array
items:
$ref: '#/definitions/URI'
400:
description: bad input parameter
500:
description: server error
definitions:
QuarMail:
type: object
@ -321,3 +350,16 @@ definitions:
data:
type: string
description: raw/encoded (see content_encoding) attachment payload
URI:
type: object
required:
- id
- uri
- fqdn
properties:
id:
type: integer
uri:
type: string
fqdn:
type: string