mirror of
https://github.com/chillout2k/gulag.git
synced 2025-12-13 16:00:18 +00:00
attachment handling with magic and sub-URI parsing
This commit is contained in:
parent
5723b18367
commit
acda4c8e11
@ -97,6 +97,7 @@ class QuarMail:
|
|||||||
msg_size = None
|
msg_size = None
|
||||||
href = None
|
href = None
|
||||||
attach_count = None
|
attach_count = None
|
||||||
|
uri_count = None
|
||||||
|
|
||||||
def __init__(self,qm_ref):
|
def __init__(self,qm_ref):
|
||||||
if 'id' not in qm_ref:
|
if 'id' not in qm_ref:
|
||||||
@ -139,6 +140,8 @@ class QuarMail:
|
|||||||
self.href = qm_ref['href']
|
self.href = qm_ref['href']
|
||||||
if 'attach_count' in qm_ref:
|
if 'attach_count' in qm_ref:
|
||||||
self.attach_count = qm_ref['attach_count']
|
self.attach_count = qm_ref['attach_count']
|
||||||
|
if 'uri_count' in qm_ref:
|
||||||
|
self.uri_count = qm_ref['uri_count']
|
||||||
|
|
||||||
class AttachmentException(Exception):
|
class AttachmentException(Exception):
|
||||||
message = None
|
message = None
|
||||||
@ -150,6 +153,7 @@ class Attachment:
|
|||||||
filename = None
|
filename = None
|
||||||
content_type = None
|
content_type = None
|
||||||
content_encoding = None
|
content_encoding = None
|
||||||
|
magic = None
|
||||||
comment = None
|
comment = None
|
||||||
mailbox_id = None
|
mailbox_id = None
|
||||||
imap_uid = None
|
imap_uid = None
|
||||||
@ -167,6 +171,9 @@ class Attachment:
|
|||||||
self.content_type = at_ref['content_type']
|
self.content_type = at_ref['content_type']
|
||||||
if 'content_encoding' in at_ref:
|
if 'content_encoding' in at_ref:
|
||||||
self.content_encoding = at_ref['content_encoding']
|
self.content_encoding = at_ref['content_encoding']
|
||||||
|
if 'magic' not in at_ref:
|
||||||
|
raise AttachmentException("'magic' is mandatory!")
|
||||||
|
self.magic = at_ref['magic']
|
||||||
if 'comment' in at_ref:
|
if 'comment' in at_ref:
|
||||||
self.comment = at_ref['comment']
|
self.comment = at_ref['comment']
|
||||||
if 'mailbox_id' not in at_ref:
|
if 'mailbox_id' not in at_ref:
|
||||||
|
|||||||
17
app/Gulag.py
17
app/Gulag.py
@ -1,4 +1,4 @@
|
|||||||
import json,sys,os,logging,re
|
import json,sys,os,logging,re,magic
|
||||||
import email,email.header,email.message
|
import email,email.header,email.message
|
||||||
from GulagDB import GulagDB,GulagDBException
|
from GulagDB import GulagDB,GulagDBException
|
||||||
from GulagMailbox import IMAPmailbox,IMAPmailboxException
|
from GulagMailbox import IMAPmailbox,IMAPmailboxException
|
||||||
@ -30,6 +30,7 @@ class Gulag:
|
|||||||
raise GulagException(whoami(self) + "Logging not configured!")
|
raise GulagException(whoami(self) + "Logging not configured!")
|
||||||
if('filename' in self.config['logging'] and
|
if('filename' in self.config['logging'] and
|
||||||
len(self.config['logging']['filename']) > 0):
|
len(self.config['logging']['filename']) > 0):
|
||||||
|
# TODO: Exception handling
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
filename=self.config['logging']['filename'],
|
filename=self.config['logging']['filename'],
|
||||||
format='%(asctime)s %(levelname)s %(message)s',
|
format='%(asctime)s %(levelname)s %(message)s',
|
||||||
@ -148,10 +149,16 @@ class Gulag:
|
|||||||
else:
|
else:
|
||||||
# filename isn´t encoded
|
# filename isn´t encoded
|
||||||
filename = filename[0][0]
|
filename = filename[0][0]
|
||||||
|
attach_magic = None
|
||||||
|
try:
|
||||||
|
attach_magic = magic.from_buffer(part.get_payload(decode=True))
|
||||||
|
except:
|
||||||
|
logging.info(whoami(self) + ": " + str(sys.exc_info()))
|
||||||
attach_id = self.db.add_attachment({
|
attach_id = self.db.add_attachment({
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'content_type': part.get_content_type(),
|
'content_type': part.get_content_type(),
|
||||||
'content_encoding': part['Content-Transfer-Encoding']
|
'content_encoding': part['Content-Transfer-Encoding'],
|
||||||
|
'magic': attach_magic
|
||||||
})
|
})
|
||||||
attachments.append(attach_id)
|
attachments.append(attach_id)
|
||||||
# End if part.get_filename()
|
# End if part.get_filename()
|
||||||
@ -168,6 +175,9 @@ class Gulag:
|
|||||||
for quarmail_id in quarmail_ids:
|
for quarmail_id in quarmail_ids:
|
||||||
for attachment_id in attachments:
|
for attachment_id in attachments:
|
||||||
self.db.quarmail2attachment(str(quarmail_id), str(attachment_id))
|
self.db.quarmail2attachment(str(quarmail_id), str(attachment_id))
|
||||||
|
logging.info(whoami(self) +
|
||||||
|
"Attachment("+str(attachment_id)+")@QuarMail("+str(quarmail_id)+") imported"
|
||||||
|
)
|
||||||
# link message with uris
|
# link message with uris
|
||||||
if(len(uris) > 0):
|
if(len(uris) > 0):
|
||||||
for quarmail_id in quarmail_ids:
|
for quarmail_id in quarmail_ids:
|
||||||
@ -178,6 +188,9 @@ class Gulag:
|
|||||||
"fqdn": extract_fqdn(uri)
|
"fqdn": extract_fqdn(uri)
|
||||||
})
|
})
|
||||||
self.db.quarmail2uri(str(quarmail_id), str(uri_id))
|
self.db.quarmail2uri(str(quarmail_id), str(uri_id))
|
||||||
|
logging.info(whoami(self) +
|
||||||
|
"URI("+str(uri_id)+")@QuarMail("+str(quarmail_id)+") imported"
|
||||||
|
)
|
||||||
except GulagDBException as e:
|
except GulagDBException as e:
|
||||||
logging.error(whoami(self) + e.message)
|
logging.error(whoami(self) + e.message)
|
||||||
# End for(unseen)
|
# End for(unseen)
|
||||||
|
|||||||
@ -185,7 +185,9 @@ class GulagDB:
|
|||||||
try:
|
try:
|
||||||
cursor = self.conn.cursor()
|
cursor = self.conn.cursor()
|
||||||
query = "select *,(select count(*) from QuarMail2Attachment"
|
query = "select *,(select count(*) from QuarMail2Attachment"
|
||||||
query += " where QuarMails.id=QuarMail2Attachment.quarmail_id) as attach_count"
|
query += " where QuarMails.id=QuarMail2Attachment.quarmail_id) as attach_count,"
|
||||||
|
query += " (select count(*) from QuarMail2URI"
|
||||||
|
query += " where QuarMails.id=QuarMail2URI.quarmail_id) as uri_count"
|
||||||
query += " from QuarMails " + self.get_where_clause(args)
|
query += " from QuarMails " + self.get_where_clause(args)
|
||||||
query += " " + self.get_limit_clause(args) + " ;"
|
query += " " + self.get_limit_clause(args) + " ;"
|
||||||
cursor.execute(query)
|
cursor.execute(query)
|
||||||
@ -213,10 +215,10 @@ class GulagDB:
|
|||||||
def get_quarmail(self,args):
|
def get_quarmail(self,args):
|
||||||
try:
|
try:
|
||||||
cursor = self.conn.cursor()
|
cursor = self.conn.cursor()
|
||||||
# TODO: build SQL query by args
|
|
||||||
#query = "select * from QuarMails where id='" + args['id'] + "';"
|
|
||||||
query = "select *,(select count(*) from QuarMail2Attachment"
|
query = "select *,(select count(*) from QuarMail2Attachment"
|
||||||
query += " where QuarMails.id=QuarMail2Attachment.quarmail_id) as attach_count"
|
query += " where QuarMails.id=QuarMail2Attachment.quarmail_id) as attach_count,"
|
||||||
|
query += " (select count(*) from QuarMail2URI"
|
||||||
|
query += " where QuarMails.id=QuarMail2URI.quarmail_id) as uri_count"
|
||||||
query += " from QuarMails where QuarMails.id="+ str(args['id']) +";"
|
query += " from QuarMails where QuarMails.id="+ str(args['id']) +";"
|
||||||
cursor.execute(query)
|
cursor.execute(query)
|
||||||
data = cursor.fetchall()
|
data = cursor.fetchall()
|
||||||
@ -266,8 +268,9 @@ class GulagDB:
|
|||||||
try:
|
try:
|
||||||
cursor = self.conn.cursor()
|
cursor = self.conn.cursor()
|
||||||
cursor.execute("insert into Attachments " +
|
cursor.execute("insert into Attachments " +
|
||||||
"(filename, content_type, content_encoding) values (%s,%s,%s)",
|
"(filename,content_type,content_encoding,magic) values (%s,%s,%s,%s)",
|
||||||
(attach['filename'], attach['content_type'], attach['content_encoding'])
|
(attach['filename'],attach['content_type'],
|
||||||
|
attach['content_encoding'],attach['magic'])
|
||||||
)
|
)
|
||||||
return cursor.lastrowid
|
return cursor.lastrowid
|
||||||
except mariadb.Error as e:
|
except mariadb.Error as e:
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
import sys,re
|
import sys,re,urllib
|
||||||
|
from urllib.parse import urlparse
|
||||||
from smtplib import SMTP
|
from smtplib import SMTP
|
||||||
|
|
||||||
def whoami(obj):
|
def whoami(obj):
|
||||||
@ -6,7 +7,7 @@ def whoami(obj):
|
|||||||
|
|
||||||
def send_mail(args):
|
def send_mail(args):
|
||||||
try:
|
try:
|
||||||
# FIXME: SMTP tranaport security and authentication!
|
# FIXME: SMTP transport security and authentication!
|
||||||
# with SMTP(host=mailbox['smtp_server'],port=mailbox['smtp_port']) as smtp:
|
# with SMTP(host=mailbox['smtp_server'],port=mailbox['smtp_port']) as smtp:
|
||||||
# try:
|
# try:
|
||||||
# smtp.sendmail(
|
# smtp.sendmail(
|
||||||
@ -20,17 +21,22 @@ def send_mail(args):
|
|||||||
except TimeoutError as e:
|
except TimeoutError as e:
|
||||||
raise Exception('xyz') from e
|
raise Exception('xyz') from e
|
||||||
|
|
||||||
def extract_uris(string):
|
def extract_uris(input_text):
|
||||||
uris = {}
|
uris = {}
|
||||||
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
||||||
for m in re.finditer(uri_pattern, string):
|
for m in re.finditer(uri_pattern, input_text):
|
||||||
uris[m.group(0)] = {}
|
uri = urllib.parse.unquote(m.group(0))
|
||||||
|
uris[uri] = {}
|
||||||
|
# extract sub-URIs (google redirector: https://www.google.de/url?sa=t&url=...)
|
||||||
|
for m2 in re.finditer(uri_pattern, uri):
|
||||||
|
suburi = urllib.parse.unquote(m2.group(0))
|
||||||
|
uris[suburi] = {"suburi": True}
|
||||||
return uris
|
return uris
|
||||||
|
|
||||||
def extract_fqdn(uri):
|
def extract_fqdn(uri):
|
||||||
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
puri = None
|
||||||
if(re.match(uri_pattern,uri)):
|
try:
|
||||||
m = re.match(r'https?:\/\/([^:\/]+)', uri)
|
puri = urlparse(uri)
|
||||||
return m.group(1)
|
return puri.hostname
|
||||||
else:
|
except ValueError as e:
|
||||||
return None
|
return None
|
||||||
|
|||||||
@ -46,6 +46,7 @@ create table Attachments (
|
|||||||
filename varchar(256) not null,
|
filename varchar(256) not null,
|
||||||
content_type varchar(256) not null,
|
content_type varchar(256) not null,
|
||||||
content_encoding varchar(64),
|
content_encoding varchar(64),
|
||||||
|
magic varchar(128),
|
||||||
comment varchar(256)
|
comment varchar(256)
|
||||||
)ENGINE = InnoDB;
|
)ENGINE = InnoDB;
|
||||||
|
|
||||||
|
|||||||
@ -5,11 +5,13 @@ ENV DEBIAN_FRONTEND=noninteractive \
|
|||||||
TZ=Europe/Berlin
|
TZ=Europe/Berlin
|
||||||
|
|
||||||
RUN set -ex ; \
|
RUN set -ex ; \
|
||||||
apt-get -qq update \
|
apt-get -qq update \
|
||||||
&& apt-get -qq --no-install-recommends install \
|
&& apt-get -qq --no-install-recommends install \
|
||||||
uwsgi-plugin-python3 python3-setuptools python3-flask \
|
uwsgi-plugin-python3 python3-setuptools python3-flask \
|
||||||
python3-flask-restful python3-mysql.connector \
|
python3-flask-restful python3-mysql.connector \
|
||||||
uwsgi uwsgi-plugin-python3 procps net-tools
|
uwsgi uwsgi-plugin-python3 procps net-tools \
|
||||||
|
python3-pip libmagic1 \
|
||||||
|
&& pip3 install python-magic
|
||||||
|
|
||||||
RUN /bin/mkdir /config /socket /app
|
RUN /bin/mkdir /config /socket /app
|
||||||
COPY app/*.py /app/
|
COPY app/*.py /app/
|
||||||
|
|||||||
@ -307,6 +307,9 @@ definitions:
|
|||||||
attach_count:
|
attach_count:
|
||||||
type: integer
|
type: integer
|
||||||
description: number of attachments
|
description: number of attachments
|
||||||
|
uri_count:
|
||||||
|
type: integer
|
||||||
|
description: number of uris
|
||||||
rfc822_message:
|
rfc822_message:
|
||||||
type: string
|
type: string
|
||||||
description: full RFC822 email message
|
description: full RFC822 email message
|
||||||
@ -317,6 +320,7 @@ definitions:
|
|||||||
- filename
|
- filename
|
||||||
- content_encoding
|
- content_encoding
|
||||||
- content_type
|
- content_type
|
||||||
|
- magic
|
||||||
- mailbox_id
|
- mailbox_id
|
||||||
- imap_uid
|
- imap_uid
|
||||||
- href
|
- href
|
||||||
@ -336,6 +340,9 @@ definitions:
|
|||||||
content_type:
|
content_type:
|
||||||
type: string
|
type: string
|
||||||
example: image/jpeg
|
example: image/jpeg
|
||||||
|
magic:
|
||||||
|
type: string
|
||||||
|
example: "PDF document, version 1.2"
|
||||||
href:
|
href:
|
||||||
type: string
|
type: string
|
||||||
description: hypermedia
|
description: hypermedia
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user