mirror of
https://github.com/chillout2k/gulag.git
synced 2025-12-13 16:00:18 +00:00
URI/FQDN extraction
This commit is contained in:
parent
a50740d60f
commit
70550c4021
@ -178,3 +178,27 @@ class Attachment:
|
|||||||
if 'href' in at_ref:
|
if 'href' in at_ref:
|
||||||
self.href = at_ref['href']
|
self.href = at_ref['href']
|
||||||
|
|
||||||
|
class URIException(Exception):
|
||||||
|
message = None
|
||||||
|
def __init__(self,message):
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
class URI:
|
||||||
|
id = None
|
||||||
|
uri = None
|
||||||
|
fqdn = None
|
||||||
|
href = None
|
||||||
|
|
||||||
|
def __init__(self,uri_ref):
|
||||||
|
if 'id' not in uri_ref:
|
||||||
|
raise URIException("'id' is mandatory!")
|
||||||
|
self.id = uri_ref['id']
|
||||||
|
if 'uri' not in uri_ref:
|
||||||
|
raise URIException("'uri' is mandatory!")
|
||||||
|
self.uri = uri_ref['uri']
|
||||||
|
if 'fqdn' not in uri_ref:
|
||||||
|
raise URIException("'fqdn' is mandatory!")
|
||||||
|
self.fqdn = uri_ref['fqdn']
|
||||||
|
if 'href' in uri_ref:
|
||||||
|
self.href = uri_ref['href']
|
||||||
|
|
||||||
|
|||||||
61
app/Gulag.py
61
app/Gulag.py
@ -1,8 +1,8 @@
|
|||||||
import json,sys,os,logging
|
import json,sys,os,logging,re
|
||||||
import email,email.header,email.message
|
import email,email.header,email.message
|
||||||
from GulagDB import GulagDB,GulagDBException
|
from GulagDB import GulagDB,GulagDBException
|
||||||
from GulagMailbox import IMAPmailbox,IMAPmailboxException
|
from GulagMailbox import IMAPmailbox,IMAPmailboxException
|
||||||
from GulagUtils import whoami
|
from GulagUtils import whoami,extract_uris,extract_fqdn
|
||||||
|
|
||||||
class GulagException(Exception):
|
class GulagException(Exception):
|
||||||
message = None
|
message = None
|
||||||
@ -78,6 +78,7 @@ class Gulag:
|
|||||||
for unseen in imap_mb.get_unseen_messages():
|
for unseen in imap_mb.get_unseen_messages():
|
||||||
quarmail_ids = []
|
quarmail_ids = []
|
||||||
attachments = []
|
attachments = []
|
||||||
|
uris = {}
|
||||||
uid = unseen['imap_uid']
|
uid = unseen['imap_uid']
|
||||||
msg = email.message_from_bytes(unseen['msg'])
|
msg = email.message_from_bytes(unseen['msg'])
|
||||||
msg_size = len(msg)
|
msg_size = len(msg)
|
||||||
@ -154,12 +155,30 @@ class Gulag:
|
|||||||
})
|
})
|
||||||
attachments.append(attach_id)
|
attachments.append(attach_id)
|
||||||
# Ende if part.get_filename()
|
# Ende if part.get_filename()
|
||||||
|
# get all URIs
|
||||||
|
ctype = part.get_content_type()
|
||||||
|
if(ctype == 'text/plain' or ctype == 'text/html'):
|
||||||
|
curis = {}
|
||||||
|
curis = extract_uris(part.get_payload(decode=True).decode("utf-8"))
|
||||||
|
if(len(curis) > 0):
|
||||||
|
uris = {**uris, **curis}
|
||||||
# Ende for msg.walk()
|
# Ende for msg.walk()
|
||||||
# QuarMail und Attachments verknüpfen
|
# QuarMail und Attachments verknüpfen
|
||||||
if(len(attachments) > 0):
|
if(len(attachments) > 0):
|
||||||
for quarmail_id in quarmail_ids:
|
for quarmail_id in quarmail_ids:
|
||||||
for attachment_id in attachments:
|
for attachment_id in attachments:
|
||||||
self.db.quarmail2attachment(str(quarmail_id), str(attachment_id))
|
self.db.quarmail2attachment(str(quarmail_id), str(attachment_id))
|
||||||
|
if(len(uris) > 0):
|
||||||
|
for quarmail_id in quarmail_ids:
|
||||||
|
for uri in uris:
|
||||||
|
try:
|
||||||
|
uri_id = self.db.add_uri({
|
||||||
|
"uri": uri,
|
||||||
|
"fqdn": extract_fqdn(uri)
|
||||||
|
})
|
||||||
|
self.db.quarmail2uri(str(quarmail_id), str(uri_id))
|
||||||
|
except GulagDBException as e:
|
||||||
|
logging.error(whoami(self) + e.message)
|
||||||
# Ende for(unseen)
|
# Ende for(unseen)
|
||||||
imap_mb.close()
|
imap_mb.close()
|
||||||
# Ende for get_mailboxes
|
# Ende for get_mailboxes
|
||||||
@ -256,10 +275,38 @@ class Gulag:
|
|||||||
if 'data' not in args:
|
if 'data' not in args:
|
||||||
return at_db
|
return at_db
|
||||||
|
|
||||||
def get_uris(self):
|
def get_quarmail_uris(self,args):
|
||||||
# https://stackoverflow.com/questions/1792366/extract-urls-out-of-email-in-python
|
if('from_rfc822_message' not in args):
|
||||||
return True
|
try:
|
||||||
|
return self.db.get_quarmail_uris(args['quarmail_id'])
|
||||||
|
except GulagDBException as e:
|
||||||
|
raise GulagException(whoami(self) + e.message) from e
|
||||||
|
qm_db = None
|
||||||
|
try:
|
||||||
|
qm_db = self.db.get_quarmail({"id": args['quarmail_id']})
|
||||||
|
except GulagDBException as e:
|
||||||
|
logging.warning(whoami(self) + e.message)
|
||||||
|
raise GulagException(whoami(self) + e.message) from e
|
||||||
|
mailbox = None
|
||||||
|
try:
|
||||||
|
mailbox = self.db.get_mailbox(qm_db['mailbox_id'])
|
||||||
|
except GulagDBException as e:
|
||||||
|
logging.warning(whoami(self) + e.message)
|
||||||
|
raise GulagException(whoami(self) + e.message) from e
|
||||||
|
imap_mb = None
|
||||||
|
try:
|
||||||
|
imap_mb = IMAPmailbox(mailbox)
|
||||||
|
mparts = imap_mb.get_main_parts(qm_db['imap_uid'])
|
||||||
|
uris = []
|
||||||
|
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
||||||
|
for part in mparts:
|
||||||
|
for m in re.finditer(uri_pattern, part.decode("utf-8")):
|
||||||
|
uris.append(m.group(0))
|
||||||
|
return uris
|
||||||
|
except IMAPmailboxException as e:
|
||||||
|
logging.warning(whoami(self) + e.message)
|
||||||
|
raise GulagException(whoami(self) + e.message) from e
|
||||||
|
|
||||||
def rspamd_http2imap(self,args):
|
def rspamd_http2imap(self,args):
|
||||||
mailbox = None
|
mailbox = None
|
||||||
try:
|
try:
|
||||||
@ -302,7 +349,7 @@ class Gulag:
|
|||||||
)
|
)
|
||||||
logging.error(err)
|
logging.error(err)
|
||||||
raise GulagException(err)
|
raise GulagException(err)
|
||||||
if('rfc822_message' not in args['rfc822_message']):
|
if('rfc822_message' not in args):
|
||||||
err = str(whoami(self)
|
err = str(whoami(self)
|
||||||
+ "Missing rfc822_message!"
|
+ "Missing rfc822_message!"
|
||||||
)
|
)
|
||||||
|
|||||||
@ -2,7 +2,7 @@ import mysql.connector as mariadb
|
|||||||
from Entities import(
|
from Entities import(
|
||||||
Mailbox,MailboxException,QuarMail,
|
Mailbox,MailboxException,QuarMail,
|
||||||
QuarMailException,Attachment,
|
QuarMailException,Attachment,
|
||||||
AttachmentException
|
AttachmentException,URI,URIException
|
||||||
)
|
)
|
||||||
from GulagUtils import whoami
|
from GulagUtils import whoami
|
||||||
|
|
||||||
@ -99,6 +99,11 @@ class GulagDB:
|
|||||||
cnt += 1
|
cnt += 1
|
||||||
return where_clause
|
return where_clause
|
||||||
|
|
||||||
|
def parse_filters(self,filters):
|
||||||
|
# TODO
|
||||||
|
# {"groupOp":"AND","rules":[{"field":"Customer","op":"eq","data":"eosp"}]}
|
||||||
|
return True
|
||||||
|
|
||||||
def get_mailboxes(self):
|
def get_mailboxes(self):
|
||||||
try:
|
try:
|
||||||
cursor = self.conn.cursor()
|
cursor = self.conn.cursor()
|
||||||
@ -217,7 +222,7 @@ class GulagDB:
|
|||||||
data = cursor.fetchall()
|
data = cursor.fetchall()
|
||||||
if not data:
|
if not data:
|
||||||
raise GulagDBException(whoami(self)
|
raise GulagDBException(whoami(self)
|
||||||
+ "Quarmail with id '"+ args['id'] + "' does not exist!"
|
+ "Quarmail with id '"+ str(args['id']) + "' does not exist!"
|
||||||
)
|
)
|
||||||
desc = cursor.description
|
desc = cursor.description
|
||||||
cursor.close()
|
cursor.close()
|
||||||
@ -380,3 +385,61 @@ class GulagDB:
|
|||||||
except mariadb.Error as e:
|
except mariadb.Error as e:
|
||||||
raise GulagDBException(whoami(self) + str(e)) from e
|
raise GulagDBException(whoami(self) + str(e)) from e
|
||||||
|
|
||||||
|
def add_uri(self,args):
|
||||||
|
try:
|
||||||
|
cursor = self.conn.cursor()
|
||||||
|
cursor.execute("insert into URIs " +
|
||||||
|
"(uri, fqdn) values (%s,%s)",
|
||||||
|
(args['uri'], args['fqdn'])
|
||||||
|
)
|
||||||
|
return cursor.lastrowid
|
||||||
|
except mariadb.Error as e:
|
||||||
|
raise GulagDBException(whoami(self) + str(e)) from e
|
||||||
|
|
||||||
|
def del_uri(self,uri_id):
|
||||||
|
try:
|
||||||
|
cursor = self.conn.cursor()
|
||||||
|
cursor.execute(
|
||||||
|
"delete from URIs where uri_id=" + uri_id + ";"
|
||||||
|
)
|
||||||
|
return cursor.lastrowid
|
||||||
|
except mariadb.Error as e:
|
||||||
|
raise GulagDBException(whoami(self) + str(e)) from e
|
||||||
|
|
||||||
|
|
||||||
|
def quarmail2uri(self,quarmail_id,uri_id):
|
||||||
|
try:
|
||||||
|
cursor = self.conn.cursor()
|
||||||
|
cursor.execute("insert into QuarMail2URI " +
|
||||||
|
"(quarmail_id, uri_id) values (%s,%s)",
|
||||||
|
(quarmail_id, uri_id)
|
||||||
|
)
|
||||||
|
except mariadb.Error as e:
|
||||||
|
raise GulagDBException(whoami(self) + str(e)) from e
|
||||||
|
|
||||||
|
def get_quarmail_uris(self,quarmail_id):
|
||||||
|
try:
|
||||||
|
query = "select URIs.*"
|
||||||
|
query += " from QuarMail2URI"
|
||||||
|
query += " left join QuarMails ON QuarMails.id = QuarMail2URI.quarmail_id"
|
||||||
|
query += " left join URIs ON URIs.id = QuarMail2URI.uri_id"
|
||||||
|
query += " where QuarMails.id = " + str(quarmail_id) + ";"
|
||||||
|
cursor = self.conn.cursor()
|
||||||
|
cursor.execute(query)
|
||||||
|
results = []
|
||||||
|
data = cursor.fetchall()
|
||||||
|
if not data:
|
||||||
|
raise GulagDBException(whoami(self)
|
||||||
|
+ "QuarMail("+ str(quarmail_id) +") has no uris!"
|
||||||
|
)
|
||||||
|
desc = cursor.description
|
||||||
|
for tuple in data:
|
||||||
|
dict = {}
|
||||||
|
for (name, value) in zip(desc, tuple):
|
||||||
|
dict[name[0]] = value
|
||||||
|
dict['href'] = self.uri_prefixes['quarmails'] + str(quarmail_id)
|
||||||
|
dict['href'] += "/uris/" + str(dict['id'])
|
||||||
|
results.append(URI(dict).__dict__)
|
||||||
|
return results
|
||||||
|
except mariadb.Error as e:
|
||||||
|
raise GulagDBException(whoami(self) + str(e)) from e
|
||||||
|
|||||||
@ -91,6 +91,19 @@ class IMAPmailbox:
|
|||||||
+ str(self.email_address) + " not found!"
|
+ str(self.email_address) + " not found!"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def get_main_parts(self,imap_uid):
|
||||||
|
msg = email.message_from_bytes(self.get_message(imap_uid))
|
||||||
|
mparts = []
|
||||||
|
for part in msg.walk():
|
||||||
|
ctype = part.get_content_type()
|
||||||
|
if(ctype == 'text/plain' or ctype == 'text/html'):
|
||||||
|
mparts.append(part.get_payload(decode=True))
|
||||||
|
if(len(mparts) > 0):
|
||||||
|
return mparts
|
||||||
|
raise IMAPmailboxException(whoami(self) +
|
||||||
|
"IMAP_UID(" + str(imap_uid)+")@"+str(self.email_address)+" has no main parts!"
|
||||||
|
)
|
||||||
|
|
||||||
def append_message(self,message):
|
def append_message(self,message):
|
||||||
rv, data = self.mailbox.append(
|
rv, data = self.mailbox.append(
|
||||||
self.imap_mailbox,
|
self.imap_mailbox,
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
import sys
|
import sys,re
|
||||||
from smtplib import SMTP
|
from smtplib import SMTP
|
||||||
|
|
||||||
def whoami(obj):
|
def whoami(obj):
|
||||||
@ -20,3 +20,17 @@ def send_mail(args):
|
|||||||
except TimeoutError as e:
|
except TimeoutError as e:
|
||||||
raise Exception('xyz') from e
|
raise Exception('xyz') from e
|
||||||
|
|
||||||
|
def extract_uris(string):
|
||||||
|
uris = {}
|
||||||
|
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
||||||
|
for m in re.finditer(uri_pattern, string):
|
||||||
|
uris[m.group(0)] = {}
|
||||||
|
return uris
|
||||||
|
|
||||||
|
def extract_fqdn(uri):
|
||||||
|
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
||||||
|
if(re.match(uri_pattern,uri)):
|
||||||
|
m = re.match(r'https?:\/\/([^:\/]+)', uri)
|
||||||
|
return m.group(1)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|||||||
@ -82,6 +82,18 @@ class ResQuarMailAttachment(GulagResource):
|
|||||||
except GulagException as e:
|
except GulagException as e:
|
||||||
abort(400, message=e.message)
|
abort(400, message=e.message)
|
||||||
|
|
||||||
|
class ResQuarMailURIs(GulagResource):
|
||||||
|
def get(self,quarmail_id):
|
||||||
|
args = {
|
||||||
|
"quarmail_id": quarmail_id
|
||||||
|
}
|
||||||
|
if(request.args.get('from_rfc822_message')):
|
||||||
|
args['from_rfc822_message'] = True
|
||||||
|
try:
|
||||||
|
return self.gulag.get_quarmail_uris(args)
|
||||||
|
except GulagException as e:
|
||||||
|
abort(400, message=e.message)
|
||||||
|
|
||||||
class ResAttachments(GulagResource):
|
class ResAttachments(GulagResource):
|
||||||
def get(self):
|
def get(self):
|
||||||
return {"resource": "Attachments"}
|
return {"resource": "Attachments"}
|
||||||
|
|||||||
@ -7,7 +7,7 @@ from Gulag import Gulag,GulagException
|
|||||||
from Resources import (ResRoot,ResMailboxes,
|
from Resources import (ResRoot,ResMailboxes,
|
||||||
ResQuarMails,ResQuarMail,ResQuarMailAttachments,
|
ResQuarMails,ResQuarMail,ResQuarMailAttachments,
|
||||||
ResQuarMailAttachment,ResAttachments,ResAttachment,
|
ResQuarMailAttachment,ResAttachments,ResAttachment,
|
||||||
ResRSPAMDImporter
|
ResRSPAMDImporter,ResQuarMailURIs
|
||||||
)
|
)
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--config', required=True, help="Path to config file")
|
parser.add_argument('--config', required=True, help="Path to config file")
|
||||||
@ -44,6 +44,10 @@ try:
|
|||||||
'/api/v1/quarmails/<int:quarmail_id>/attachments/<int:attachment_id>',
|
'/api/v1/quarmails/<int:quarmail_id>/attachments/<int:attachment_id>',
|
||||||
resource_class_kwargs={'gulag_object': gulag}
|
resource_class_kwargs={'gulag_object': gulag}
|
||||||
)
|
)
|
||||||
|
api.add_resource(ResQuarMailURIs,
|
||||||
|
'/api/v1/quarmails/<int:quarmail_id>/uris',
|
||||||
|
resource_class_kwargs={'gulag_object': gulag}
|
||||||
|
)
|
||||||
api.add_resource(ResAttachments,
|
api.add_resource(ResAttachments,
|
||||||
'/api/v1/attachments',
|
'/api/v1/attachments',
|
||||||
resource_class_kwargs={'gulag_object': gulag}
|
resource_class_kwargs={'gulag_object': gulag}
|
||||||
|
|||||||
16
db/gulag.sql
16
db/gulag.sql
@ -20,10 +20,6 @@ create table Mailboxes(
|
|||||||
smtp_pass varchar(2048) default null,
|
smtp_pass varchar(2048) default null,
|
||||||
comment varchar(256) default null
|
comment varchar(256) default null
|
||||||
)ENGINE = InnoDB;
|
)ENGINE = InnoDB;
|
||||||
insert into Mailboxes (email_address,name,imap_user,imap_pass)
|
|
||||||
values('quarantine-in@example.org','E-Mail inbound quarantine','quarantine-in','quarantine-in_secure_password');
|
|
||||||
insert into Mailboxes (email_address,name,imap_user,imap_pass)
|
|
||||||
values('quarantine-out@example.org','E-Mail outbound quarantine','quarantine-out','quarantine-out_secure_password');
|
|
||||||
insert into Mailboxes (email_address,name,imap_user,imap_pass)
|
insert into Mailboxes (email_address,name,imap_user,imap_pass)
|
||||||
values('quarantine-sandbox@example.org','E-Mail sandbox quarantine','quarantine-sb','quarantine-sb_secure_password');
|
values('quarantine-sandbox@example.org','E-Mail sandbox quarantine','quarantine-sb','quarantine-sb_secure_password');
|
||||||
|
|
||||||
@ -60,3 +56,15 @@ create table QuarMail2Attachment (
|
|||||||
foreign key (attachment_id) references Attachments (id) on delete cascade on update cascade
|
foreign key (attachment_id) references Attachments (id) on delete cascade on update cascade
|
||||||
)ENGINE = InnoDB;
|
)ENGINE = InnoDB;
|
||||||
|
|
||||||
|
create table URIs (
|
||||||
|
id int unsigned auto_increment primary key,
|
||||||
|
uri varchar(2048),
|
||||||
|
fqdn varchar(512)
|
||||||
|
)ENGINE = InnoDB;
|
||||||
|
|
||||||
|
create table QuarMail2URI (
|
||||||
|
quarmail_id int unsigned,
|
||||||
|
uri_id int unsigned,
|
||||||
|
foreign key (quarmail_id) references QuarMails (id) on delete cascade on update cascade,
|
||||||
|
foreign key (uri_id) references URIs (id) on delete cascade on update cascade
|
||||||
|
)ENGINE = InnoDB;
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
swagger: '2.0'
|
swagger: '2.0'
|
||||||
info:
|
info:
|
||||||
description: Gulag quarantine REST API
|
description: Gulag quarantine REST API
|
||||||
version: '18.12'
|
version: "1.0.0"
|
||||||
title: Gulag quarantine REST API
|
title: Gulag quarantine REST API
|
||||||
contact:
|
contact:
|
||||||
email: info@dc-it-con.de
|
email: info@dc-it-con.de
|
||||||
@ -207,6 +207,35 @@ paths:
|
|||||||
description: bad input parameter
|
description: bad input parameter
|
||||||
500:
|
500:
|
||||||
description: server error
|
description: server error
|
||||||
|
|
||||||
|
/quarmails/{quarmail_id}/uris:
|
||||||
|
get:
|
||||||
|
summary: "retrieves all URIS from any main MIME part (text/plain,text/html)"
|
||||||
|
operationId: get_quarmail_uris
|
||||||
|
produces:
|
||||||
|
- application/json
|
||||||
|
parameters:
|
||||||
|
- in: path
|
||||||
|
name: quarmail_id
|
||||||
|
description: unique id of quarantined email
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
- in: query
|
||||||
|
name: from_rfc822_message
|
||||||
|
description: fetch all URIs from RFC822 message not from database
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: array of URIs
|
||||||
|
schema:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/definitions/URI'
|
||||||
|
400:
|
||||||
|
description: bad input parameter
|
||||||
|
500:
|
||||||
|
description: server error
|
||||||
|
|
||||||
definitions:
|
definitions:
|
||||||
QuarMail:
|
QuarMail:
|
||||||
@ -321,3 +350,16 @@ definitions:
|
|||||||
data:
|
data:
|
||||||
type: string
|
type: string
|
||||||
description: raw/encoded (see content_encoding) attachment payload
|
description: raw/encoded (see content_encoding) attachment payload
|
||||||
|
URI:
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- uri
|
||||||
|
- fqdn
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: integer
|
||||||
|
uri:
|
||||||
|
type: string
|
||||||
|
fqdn:
|
||||||
|
type: string
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user