mirror of
https://github.com/chillout2k/gulag.git
synced 2025-12-13 16:00:18 +00:00
attachment handling with magic and sub-URI parsing
This commit is contained in:
parent
5723b18367
commit
acda4c8e11
@ -97,6 +97,7 @@ class QuarMail:
|
||||
msg_size = None
|
||||
href = None
|
||||
attach_count = None
|
||||
uri_count = None
|
||||
|
||||
def __init__(self,qm_ref):
|
||||
if 'id' not in qm_ref:
|
||||
@ -139,6 +140,8 @@ class QuarMail:
|
||||
self.href = qm_ref['href']
|
||||
if 'attach_count' in qm_ref:
|
||||
self.attach_count = qm_ref['attach_count']
|
||||
if 'uri_count' in qm_ref:
|
||||
self.uri_count = qm_ref['uri_count']
|
||||
|
||||
class AttachmentException(Exception):
|
||||
message = None
|
||||
@ -150,6 +153,7 @@ class Attachment:
|
||||
filename = None
|
||||
content_type = None
|
||||
content_encoding = None
|
||||
magic = None
|
||||
comment = None
|
||||
mailbox_id = None
|
||||
imap_uid = None
|
||||
@ -167,6 +171,9 @@ class Attachment:
|
||||
self.content_type = at_ref['content_type']
|
||||
if 'content_encoding' in at_ref:
|
||||
self.content_encoding = at_ref['content_encoding']
|
||||
if 'magic' not in at_ref:
|
||||
raise AttachmentException("'magic' is mandatory!")
|
||||
self.magic = at_ref['magic']
|
||||
if 'comment' in at_ref:
|
||||
self.comment = at_ref['comment']
|
||||
if 'mailbox_id' not in at_ref:
|
||||
|
||||
17
app/Gulag.py
17
app/Gulag.py
@ -1,4 +1,4 @@
|
||||
import json,sys,os,logging,re
|
||||
import json,sys,os,logging,re,magic
|
||||
import email,email.header,email.message
|
||||
from GulagDB import GulagDB,GulagDBException
|
||||
from GulagMailbox import IMAPmailbox,IMAPmailboxException
|
||||
@ -30,6 +30,7 @@ class Gulag:
|
||||
raise GulagException(whoami(self) + "Logging not configured!")
|
||||
if('filename' in self.config['logging'] and
|
||||
len(self.config['logging']['filename']) > 0):
|
||||
# TODO: Exception handling
|
||||
logging.basicConfig(
|
||||
filename=self.config['logging']['filename'],
|
||||
format='%(asctime)s %(levelname)s %(message)s',
|
||||
@ -148,10 +149,16 @@ class Gulag:
|
||||
else:
|
||||
# filename isn´t encoded
|
||||
filename = filename[0][0]
|
||||
attach_magic = None
|
||||
try:
|
||||
attach_magic = magic.from_buffer(part.get_payload(decode=True))
|
||||
except:
|
||||
logging.info(whoami(self) + ": " + str(sys.exc_info()))
|
||||
attach_id = self.db.add_attachment({
|
||||
'filename': filename,
|
||||
'content_type': part.get_content_type(),
|
||||
'content_encoding': part['Content-Transfer-Encoding']
|
||||
'content_encoding': part['Content-Transfer-Encoding'],
|
||||
'magic': attach_magic
|
||||
})
|
||||
attachments.append(attach_id)
|
||||
# End if part.get_filename()
|
||||
@ -168,6 +175,9 @@ class Gulag:
|
||||
for quarmail_id in quarmail_ids:
|
||||
for attachment_id in attachments:
|
||||
self.db.quarmail2attachment(str(quarmail_id), str(attachment_id))
|
||||
logging.info(whoami(self) +
|
||||
"Attachment("+str(attachment_id)+")@QuarMail("+str(quarmail_id)+") imported"
|
||||
)
|
||||
# link message with uris
|
||||
if(len(uris) > 0):
|
||||
for quarmail_id in quarmail_ids:
|
||||
@ -178,6 +188,9 @@ class Gulag:
|
||||
"fqdn": extract_fqdn(uri)
|
||||
})
|
||||
self.db.quarmail2uri(str(quarmail_id), str(uri_id))
|
||||
logging.info(whoami(self) +
|
||||
"URI("+str(uri_id)+")@QuarMail("+str(quarmail_id)+") imported"
|
||||
)
|
||||
except GulagDBException as e:
|
||||
logging.error(whoami(self) + e.message)
|
||||
# End for(unseen)
|
||||
|
||||
@ -185,7 +185,9 @@ class GulagDB:
|
||||
try:
|
||||
cursor = self.conn.cursor()
|
||||
query = "select *,(select count(*) from QuarMail2Attachment"
|
||||
query += " where QuarMails.id=QuarMail2Attachment.quarmail_id) as attach_count"
|
||||
query += " where QuarMails.id=QuarMail2Attachment.quarmail_id) as attach_count,"
|
||||
query += " (select count(*) from QuarMail2URI"
|
||||
query += " where QuarMails.id=QuarMail2URI.quarmail_id) as uri_count"
|
||||
query += " from QuarMails " + self.get_where_clause(args)
|
||||
query += " " + self.get_limit_clause(args) + " ;"
|
||||
cursor.execute(query)
|
||||
@ -213,10 +215,10 @@ class GulagDB:
|
||||
def get_quarmail(self,args):
|
||||
try:
|
||||
cursor = self.conn.cursor()
|
||||
# TODO: build SQL query by args
|
||||
#query = "select * from QuarMails where id='" + args['id'] + "';"
|
||||
query = "select *,(select count(*) from QuarMail2Attachment"
|
||||
query += " where QuarMails.id=QuarMail2Attachment.quarmail_id) as attach_count"
|
||||
query += " where QuarMails.id=QuarMail2Attachment.quarmail_id) as attach_count,"
|
||||
query += " (select count(*) from QuarMail2URI"
|
||||
query += " where QuarMails.id=QuarMail2URI.quarmail_id) as uri_count"
|
||||
query += " from QuarMails where QuarMails.id="+ str(args['id']) +";"
|
||||
cursor.execute(query)
|
||||
data = cursor.fetchall()
|
||||
@ -266,8 +268,9 @@ class GulagDB:
|
||||
try:
|
||||
cursor = self.conn.cursor()
|
||||
cursor.execute("insert into Attachments " +
|
||||
"(filename, content_type, content_encoding) values (%s,%s,%s)",
|
||||
(attach['filename'], attach['content_type'], attach['content_encoding'])
|
||||
"(filename,content_type,content_encoding,magic) values (%s,%s,%s,%s)",
|
||||
(attach['filename'],attach['content_type'],
|
||||
attach['content_encoding'],attach['magic'])
|
||||
)
|
||||
return cursor.lastrowid
|
||||
except mariadb.Error as e:
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import sys,re
|
||||
import sys,re,urllib
|
||||
from urllib.parse import urlparse
|
||||
from smtplib import SMTP
|
||||
|
||||
def whoami(obj):
|
||||
@ -6,7 +7,7 @@ def whoami(obj):
|
||||
|
||||
def send_mail(args):
|
||||
try:
|
||||
# FIXME: SMTP tranaport security and authentication!
|
||||
# FIXME: SMTP transport security and authentication!
|
||||
# with SMTP(host=mailbox['smtp_server'],port=mailbox['smtp_port']) as smtp:
|
||||
# try:
|
||||
# smtp.sendmail(
|
||||
@ -20,17 +21,22 @@ def send_mail(args):
|
||||
except TimeoutError as e:
|
||||
raise Exception('xyz') from e
|
||||
|
||||
def extract_uris(string):
|
||||
def extract_uris(input_text):
|
||||
uris = {}
|
||||
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
||||
for m in re.finditer(uri_pattern, string):
|
||||
uris[m.group(0)] = {}
|
||||
for m in re.finditer(uri_pattern, input_text):
|
||||
uri = urllib.parse.unquote(m.group(0))
|
||||
uris[uri] = {}
|
||||
# extract sub-URIs (google redirector: https://www.google.de/url?sa=t&url=...)
|
||||
for m2 in re.finditer(uri_pattern, uri):
|
||||
suburi = urllib.parse.unquote(m2.group(0))
|
||||
uris[suburi] = {"suburi": True}
|
||||
return uris
|
||||
|
||||
def extract_fqdn(uri):
|
||||
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
||||
if(re.match(uri_pattern,uri)):
|
||||
m = re.match(r'https?:\/\/([^:\/]+)', uri)
|
||||
return m.group(1)
|
||||
else:
|
||||
puri = None
|
||||
try:
|
||||
puri = urlparse(uri)
|
||||
return puri.hostname
|
||||
except ValueError as e:
|
||||
return None
|
||||
|
||||
@ -46,6 +46,7 @@ create table Attachments (
|
||||
filename varchar(256) not null,
|
||||
content_type varchar(256) not null,
|
||||
content_encoding varchar(64),
|
||||
magic varchar(128),
|
||||
comment varchar(256)
|
||||
)ENGINE = InnoDB;
|
||||
|
||||
|
||||
@ -5,11 +5,13 @@ ENV DEBIAN_FRONTEND=noninteractive \
|
||||
TZ=Europe/Berlin
|
||||
|
||||
RUN set -ex ; \
|
||||
apt-get -qq update \
|
||||
&& apt-get -qq --no-install-recommends install \
|
||||
uwsgi-plugin-python3 python3-setuptools python3-flask \
|
||||
python3-flask-restful python3-mysql.connector \
|
||||
uwsgi uwsgi-plugin-python3 procps net-tools
|
||||
apt-get -qq update \
|
||||
&& apt-get -qq --no-install-recommends install \
|
||||
uwsgi-plugin-python3 python3-setuptools python3-flask \
|
||||
python3-flask-restful python3-mysql.connector \
|
||||
uwsgi uwsgi-plugin-python3 procps net-tools \
|
||||
python3-pip libmagic1 \
|
||||
&& pip3 install python-magic
|
||||
|
||||
RUN /bin/mkdir /config /socket /app
|
||||
COPY app/*.py /app/
|
||||
|
||||
@ -307,6 +307,9 @@ definitions:
|
||||
attach_count:
|
||||
type: integer
|
||||
description: number of attachments
|
||||
uri_count:
|
||||
type: integer
|
||||
description: number of uris
|
||||
rfc822_message:
|
||||
type: string
|
||||
description: full RFC822 email message
|
||||
@ -317,6 +320,7 @@ definitions:
|
||||
- filename
|
||||
- content_encoding
|
||||
- content_type
|
||||
- magic
|
||||
- mailbox_id
|
||||
- imap_uid
|
||||
- href
|
||||
@ -336,6 +340,9 @@ definitions:
|
||||
content_type:
|
||||
type: string
|
||||
example: image/jpeg
|
||||
magic:
|
||||
type: string
|
||||
example: "PDF document, version 1.2"
|
||||
href:
|
||||
type: string
|
||||
description: hypermedia
|
||||
|
||||
Loading…
Reference in New Issue
Block a user