mirror of
https://github.com/chillout2k/gulag.git
synced 2025-12-13 16:00:18 +00:00
URI/FQDN extraction
This commit is contained in:
parent
a50740d60f
commit
70550c4021
@ -178,3 +178,27 @@ class Attachment:
|
||||
if 'href' in at_ref:
|
||||
self.href = at_ref['href']
|
||||
|
||||
class URIException(Exception):
|
||||
message = None
|
||||
def __init__(self,message):
|
||||
self.message = message
|
||||
|
||||
class URI:
|
||||
id = None
|
||||
uri = None
|
||||
fqdn = None
|
||||
href = None
|
||||
|
||||
def __init__(self,uri_ref):
|
||||
if 'id' not in uri_ref:
|
||||
raise URIException("'id' is mandatory!")
|
||||
self.id = uri_ref['id']
|
||||
if 'uri' not in uri_ref:
|
||||
raise URIException("'uri' is mandatory!")
|
||||
self.uri = uri_ref['uri']
|
||||
if 'fqdn' not in uri_ref:
|
||||
raise URIException("'fqdn' is mandatory!")
|
||||
self.fqdn = uri_ref['fqdn']
|
||||
if 'href' in uri_ref:
|
||||
self.href = uri_ref['href']
|
||||
|
||||
|
||||
61
app/Gulag.py
61
app/Gulag.py
@ -1,8 +1,8 @@
|
||||
import json,sys,os,logging
|
||||
import json,sys,os,logging,re
|
||||
import email,email.header,email.message
|
||||
from GulagDB import GulagDB,GulagDBException
|
||||
from GulagMailbox import IMAPmailbox,IMAPmailboxException
|
||||
from GulagUtils import whoami
|
||||
from GulagUtils import whoami,extract_uris,extract_fqdn
|
||||
|
||||
class GulagException(Exception):
|
||||
message = None
|
||||
@ -78,6 +78,7 @@ class Gulag:
|
||||
for unseen in imap_mb.get_unseen_messages():
|
||||
quarmail_ids = []
|
||||
attachments = []
|
||||
uris = {}
|
||||
uid = unseen['imap_uid']
|
||||
msg = email.message_from_bytes(unseen['msg'])
|
||||
msg_size = len(msg)
|
||||
@ -154,12 +155,30 @@ class Gulag:
|
||||
})
|
||||
attachments.append(attach_id)
|
||||
# Ende if part.get_filename()
|
||||
# get all URIs
|
||||
ctype = part.get_content_type()
|
||||
if(ctype == 'text/plain' or ctype == 'text/html'):
|
||||
curis = {}
|
||||
curis = extract_uris(part.get_payload(decode=True).decode("utf-8"))
|
||||
if(len(curis) > 0):
|
||||
uris = {**uris, **curis}
|
||||
# Ende for msg.walk()
|
||||
# QuarMail und Attachments verknüpfen
|
||||
if(len(attachments) > 0):
|
||||
for quarmail_id in quarmail_ids:
|
||||
for attachment_id in attachments:
|
||||
self.db.quarmail2attachment(str(quarmail_id), str(attachment_id))
|
||||
if(len(uris) > 0):
|
||||
for quarmail_id in quarmail_ids:
|
||||
for uri in uris:
|
||||
try:
|
||||
uri_id = self.db.add_uri({
|
||||
"uri": uri,
|
||||
"fqdn": extract_fqdn(uri)
|
||||
})
|
||||
self.db.quarmail2uri(str(quarmail_id), str(uri_id))
|
||||
except GulagDBException as e:
|
||||
logging.error(whoami(self) + e.message)
|
||||
# Ende for(unseen)
|
||||
imap_mb.close()
|
||||
# Ende for get_mailboxes
|
||||
@ -256,10 +275,38 @@ class Gulag:
|
||||
if 'data' not in args:
|
||||
return at_db
|
||||
|
||||
def get_uris(self):
|
||||
# https://stackoverflow.com/questions/1792366/extract-urls-out-of-email-in-python
|
||||
return True
|
||||
|
||||
def get_quarmail_uris(self,args):
|
||||
if('from_rfc822_message' not in args):
|
||||
try:
|
||||
return self.db.get_quarmail_uris(args['quarmail_id'])
|
||||
except GulagDBException as e:
|
||||
raise GulagException(whoami(self) + e.message) from e
|
||||
qm_db = None
|
||||
try:
|
||||
qm_db = self.db.get_quarmail({"id": args['quarmail_id']})
|
||||
except GulagDBException as e:
|
||||
logging.warning(whoami(self) + e.message)
|
||||
raise GulagException(whoami(self) + e.message) from e
|
||||
mailbox = None
|
||||
try:
|
||||
mailbox = self.db.get_mailbox(qm_db['mailbox_id'])
|
||||
except GulagDBException as e:
|
||||
logging.warning(whoami(self) + e.message)
|
||||
raise GulagException(whoami(self) + e.message) from e
|
||||
imap_mb = None
|
||||
try:
|
||||
imap_mb = IMAPmailbox(mailbox)
|
||||
mparts = imap_mb.get_main_parts(qm_db['imap_uid'])
|
||||
uris = []
|
||||
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
||||
for part in mparts:
|
||||
for m in re.finditer(uri_pattern, part.decode("utf-8")):
|
||||
uris.append(m.group(0))
|
||||
return uris
|
||||
except IMAPmailboxException as e:
|
||||
logging.warning(whoami(self) + e.message)
|
||||
raise GulagException(whoami(self) + e.message) from e
|
||||
|
||||
def rspamd_http2imap(self,args):
|
||||
mailbox = None
|
||||
try:
|
||||
@ -302,7 +349,7 @@ class Gulag:
|
||||
)
|
||||
logging.error(err)
|
||||
raise GulagException(err)
|
||||
if('rfc822_message' not in args['rfc822_message']):
|
||||
if('rfc822_message' not in args):
|
||||
err = str(whoami(self)
|
||||
+ "Missing rfc822_message!"
|
||||
)
|
||||
|
||||
@ -2,7 +2,7 @@ import mysql.connector as mariadb
|
||||
from Entities import(
|
||||
Mailbox,MailboxException,QuarMail,
|
||||
QuarMailException,Attachment,
|
||||
AttachmentException
|
||||
AttachmentException,URI,URIException
|
||||
)
|
||||
from GulagUtils import whoami
|
||||
|
||||
@ -99,6 +99,11 @@ class GulagDB:
|
||||
cnt += 1
|
||||
return where_clause
|
||||
|
||||
def parse_filters(self,filters):
|
||||
# TODO
|
||||
# {"groupOp":"AND","rules":[{"field":"Customer","op":"eq","data":"eosp"}]}
|
||||
return True
|
||||
|
||||
def get_mailboxes(self):
|
||||
try:
|
||||
cursor = self.conn.cursor()
|
||||
@ -217,7 +222,7 @@ class GulagDB:
|
||||
data = cursor.fetchall()
|
||||
if not data:
|
||||
raise GulagDBException(whoami(self)
|
||||
+ "Quarmail with id '"+ args['id'] + "' does not exist!"
|
||||
+ "Quarmail with id '"+ str(args['id']) + "' does not exist!"
|
||||
)
|
||||
desc = cursor.description
|
||||
cursor.close()
|
||||
@ -380,3 +385,61 @@ class GulagDB:
|
||||
except mariadb.Error as e:
|
||||
raise GulagDBException(whoami(self) + str(e)) from e
|
||||
|
||||
def add_uri(self,args):
|
||||
try:
|
||||
cursor = self.conn.cursor()
|
||||
cursor.execute("insert into URIs " +
|
||||
"(uri, fqdn) values (%s,%s)",
|
||||
(args['uri'], args['fqdn'])
|
||||
)
|
||||
return cursor.lastrowid
|
||||
except mariadb.Error as e:
|
||||
raise GulagDBException(whoami(self) + str(e)) from e
|
||||
|
||||
def del_uri(self,uri_id):
|
||||
try:
|
||||
cursor = self.conn.cursor()
|
||||
cursor.execute(
|
||||
"delete from URIs where uri_id=" + uri_id + ";"
|
||||
)
|
||||
return cursor.lastrowid
|
||||
except mariadb.Error as e:
|
||||
raise GulagDBException(whoami(self) + str(e)) from e
|
||||
|
||||
|
||||
def quarmail2uri(self,quarmail_id,uri_id):
|
||||
try:
|
||||
cursor = self.conn.cursor()
|
||||
cursor.execute("insert into QuarMail2URI " +
|
||||
"(quarmail_id, uri_id) values (%s,%s)",
|
||||
(quarmail_id, uri_id)
|
||||
)
|
||||
except mariadb.Error as e:
|
||||
raise GulagDBException(whoami(self) + str(e)) from e
|
||||
|
||||
def get_quarmail_uris(self,quarmail_id):
|
||||
try:
|
||||
query = "select URIs.*"
|
||||
query += " from QuarMail2URI"
|
||||
query += " left join QuarMails ON QuarMails.id = QuarMail2URI.quarmail_id"
|
||||
query += " left join URIs ON URIs.id = QuarMail2URI.uri_id"
|
||||
query += " where QuarMails.id = " + str(quarmail_id) + ";"
|
||||
cursor = self.conn.cursor()
|
||||
cursor.execute(query)
|
||||
results = []
|
||||
data = cursor.fetchall()
|
||||
if not data:
|
||||
raise GulagDBException(whoami(self)
|
||||
+ "QuarMail("+ str(quarmail_id) +") has no uris!"
|
||||
)
|
||||
desc = cursor.description
|
||||
for tuple in data:
|
||||
dict = {}
|
||||
for (name, value) in zip(desc, tuple):
|
||||
dict[name[0]] = value
|
||||
dict['href'] = self.uri_prefixes['quarmails'] + str(quarmail_id)
|
||||
dict['href'] += "/uris/" + str(dict['id'])
|
||||
results.append(URI(dict).__dict__)
|
||||
return results
|
||||
except mariadb.Error as e:
|
||||
raise GulagDBException(whoami(self) + str(e)) from e
|
||||
|
||||
@ -91,6 +91,19 @@ class IMAPmailbox:
|
||||
+ str(self.email_address) + " not found!"
|
||||
)
|
||||
|
||||
def get_main_parts(self,imap_uid):
|
||||
msg = email.message_from_bytes(self.get_message(imap_uid))
|
||||
mparts = []
|
||||
for part in msg.walk():
|
||||
ctype = part.get_content_type()
|
||||
if(ctype == 'text/plain' or ctype == 'text/html'):
|
||||
mparts.append(part.get_payload(decode=True))
|
||||
if(len(mparts) > 0):
|
||||
return mparts
|
||||
raise IMAPmailboxException(whoami(self) +
|
||||
"IMAP_UID(" + str(imap_uid)+")@"+str(self.email_address)+" has no main parts!"
|
||||
)
|
||||
|
||||
def append_message(self,message):
|
||||
rv, data = self.mailbox.append(
|
||||
self.imap_mailbox,
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import sys
|
||||
import sys,re
|
||||
from smtplib import SMTP
|
||||
|
||||
def whoami(obj):
|
||||
@ -20,3 +20,17 @@ def send_mail(args):
|
||||
except TimeoutError as e:
|
||||
raise Exception('xyz') from e
|
||||
|
||||
def extract_uris(string):
|
||||
uris = {}
|
||||
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
||||
for m in re.finditer(uri_pattern, string):
|
||||
uris[m.group(0)] = {}
|
||||
return uris
|
||||
|
||||
def extract_fqdn(uri):
|
||||
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
||||
if(re.match(uri_pattern,uri)):
|
||||
m = re.match(r'https?:\/\/([^:\/]+)', uri)
|
||||
return m.group(1)
|
||||
else:
|
||||
return None
|
||||
|
||||
@ -82,6 +82,18 @@ class ResQuarMailAttachment(GulagResource):
|
||||
except GulagException as e:
|
||||
abort(400, message=e.message)
|
||||
|
||||
class ResQuarMailURIs(GulagResource):
|
||||
def get(self,quarmail_id):
|
||||
args = {
|
||||
"quarmail_id": quarmail_id
|
||||
}
|
||||
if(request.args.get('from_rfc822_message')):
|
||||
args['from_rfc822_message'] = True
|
||||
try:
|
||||
return self.gulag.get_quarmail_uris(args)
|
||||
except GulagException as e:
|
||||
abort(400, message=e.message)
|
||||
|
||||
class ResAttachments(GulagResource):
|
||||
def get(self):
|
||||
return {"resource": "Attachments"}
|
||||
|
||||
@ -7,7 +7,7 @@ from Gulag import Gulag,GulagException
|
||||
from Resources import (ResRoot,ResMailboxes,
|
||||
ResQuarMails,ResQuarMail,ResQuarMailAttachments,
|
||||
ResQuarMailAttachment,ResAttachments,ResAttachment,
|
||||
ResRSPAMDImporter
|
||||
ResRSPAMDImporter,ResQuarMailURIs
|
||||
)
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--config', required=True, help="Path to config file")
|
||||
@ -44,6 +44,10 @@ try:
|
||||
'/api/v1/quarmails/<int:quarmail_id>/attachments/<int:attachment_id>',
|
||||
resource_class_kwargs={'gulag_object': gulag}
|
||||
)
|
||||
api.add_resource(ResQuarMailURIs,
|
||||
'/api/v1/quarmails/<int:quarmail_id>/uris',
|
||||
resource_class_kwargs={'gulag_object': gulag}
|
||||
)
|
||||
api.add_resource(ResAttachments,
|
||||
'/api/v1/attachments',
|
||||
resource_class_kwargs={'gulag_object': gulag}
|
||||
|
||||
16
db/gulag.sql
16
db/gulag.sql
@ -20,10 +20,6 @@ create table Mailboxes(
|
||||
smtp_pass varchar(2048) default null,
|
||||
comment varchar(256) default null
|
||||
)ENGINE = InnoDB;
|
||||
insert into Mailboxes (email_address,name,imap_user,imap_pass)
|
||||
values('quarantine-in@example.org','E-Mail inbound quarantine','quarantine-in','quarantine-in_secure_password');
|
||||
insert into Mailboxes (email_address,name,imap_user,imap_pass)
|
||||
values('quarantine-out@example.org','E-Mail outbound quarantine','quarantine-out','quarantine-out_secure_password');
|
||||
insert into Mailboxes (email_address,name,imap_user,imap_pass)
|
||||
values('quarantine-sandbox@example.org','E-Mail sandbox quarantine','quarantine-sb','quarantine-sb_secure_password');
|
||||
|
||||
@ -60,3 +56,15 @@ create table QuarMail2Attachment (
|
||||
foreign key (attachment_id) references Attachments (id) on delete cascade on update cascade
|
||||
)ENGINE = InnoDB;
|
||||
|
||||
create table URIs (
|
||||
id int unsigned auto_increment primary key,
|
||||
uri varchar(2048),
|
||||
fqdn varchar(512)
|
||||
)ENGINE = InnoDB;
|
||||
|
||||
create table QuarMail2URI (
|
||||
quarmail_id int unsigned,
|
||||
uri_id int unsigned,
|
||||
foreign key (quarmail_id) references QuarMails (id) on delete cascade on update cascade,
|
||||
foreign key (uri_id) references URIs (id) on delete cascade on update cascade
|
||||
)ENGINE = InnoDB;
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
swagger: '2.0'
|
||||
info:
|
||||
description: Gulag quarantine REST API
|
||||
version: '18.12'
|
||||
version: "1.0.0"
|
||||
title: Gulag quarantine REST API
|
||||
contact:
|
||||
email: info@dc-it-con.de
|
||||
@ -207,6 +207,35 @@ paths:
|
||||
description: bad input parameter
|
||||
500:
|
||||
description: server error
|
||||
|
||||
/quarmails/{quarmail_id}/uris:
|
||||
get:
|
||||
summary: "retrieves all URIS from any main MIME part (text/plain,text/html)"
|
||||
operationId: get_quarmail_uris
|
||||
produces:
|
||||
- application/json
|
||||
parameters:
|
||||
- in: path
|
||||
name: quarmail_id
|
||||
description: unique id of quarantined email
|
||||
required: true
|
||||
type: string
|
||||
- in: query
|
||||
name: from_rfc822_message
|
||||
description: fetch all URIs from RFC822 message not from database
|
||||
required: false
|
||||
type: string
|
||||
responses:
|
||||
200:
|
||||
description: array of URIs
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/definitions/URI'
|
||||
400:
|
||||
description: bad input parameter
|
||||
500:
|
||||
description: server error
|
||||
|
||||
definitions:
|
||||
QuarMail:
|
||||
@ -321,3 +350,16 @@ definitions:
|
||||
data:
|
||||
type: string
|
||||
description: raw/encoded (see content_encoding) attachment payload
|
||||
URI:
|
||||
type: object
|
||||
required:
|
||||
- id
|
||||
- uri
|
||||
- fqdn
|
||||
properties:
|
||||
id:
|
||||
type: integer
|
||||
uri:
|
||||
type: string
|
||||
fqdn:
|
||||
type: string
|
||||
|
||||
Loading…
Reference in New Issue
Block a user