mirror of
https://github.com/chillout2k/gulag.git
synced 2025-12-13 16:00:18 +00:00
sub uri extraction bug fixed
This commit is contained in:
parent
acda4c8e11
commit
eb3a1e9bf2
@ -166,8 +166,10 @@ class Gulag:
|
|||||||
ctype = part.get_content_type()
|
ctype = part.get_content_type()
|
||||||
if(ctype == 'text/plain' or ctype == 'text/html'):
|
if(ctype == 'text/plain' or ctype == 'text/html'):
|
||||||
curis = {}
|
curis = {}
|
||||||
curis = extract_uris(part.get_payload(decode=True).decode("utf-8"))
|
# curis = extract_uris(part.get_payload(decode=True).decode("utf-8"))
|
||||||
|
curis = extract_uris(part.get_payload(decode=True).decode("utf-8","replace"))
|
||||||
if(len(curis) > 0):
|
if(len(curis) > 0):
|
||||||
|
logging.info(whoami(self) + "CURIS: " + str(curis))
|
||||||
uris = {**uris, **curis}
|
uris = {**uris, **curis}
|
||||||
# End for msg.walk()
|
# End for msg.walk()
|
||||||
# link message with attachments
|
# link message with attachments
|
||||||
|
|||||||
@ -24,12 +24,13 @@ def send_mail(args):
|
|||||||
def extract_uris(input_text):
|
def extract_uris(input_text):
|
||||||
uris = {}
|
uris = {}
|
||||||
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
||||||
|
suburi_pattern = r'^.+(https?:\/\/[^\s<>"]+)'
|
||||||
for m in re.finditer(uri_pattern, input_text):
|
for m in re.finditer(uri_pattern, input_text):
|
||||||
uri = urllib.parse.unquote(m.group(0))
|
uri = urllib.parse.unquote(m.group(0))
|
||||||
uris[uri] = {}
|
uris[uri] = {}
|
||||||
# extract sub-URIs (google redirector: https://www.google.de/url?sa=t&url=...)
|
# extract sub-URIs (google redirector: https://www.google.de/url?sa=t&url=...)
|
||||||
for m2 in re.finditer(uri_pattern, uri):
|
for m2 in re.finditer(suburi_pattern, uri):
|
||||||
suburi = urllib.parse.unquote(m2.group(0))
|
suburi = urllib.parse.unquote(m2.group(1))
|
||||||
uris[suburi] = {"suburi": True}
|
uris[suburi] = {"suburi": True}
|
||||||
return uris
|
return uris
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user