mirror of
https://github.com/chillout2k/gulag.git
synced 2025-12-13 16:00:18 +00:00
sub uri extraction bug fixed
This commit is contained in:
parent
acda4c8e11
commit
eb3a1e9bf2
@ -166,8 +166,10 @@ class Gulag:
|
||||
ctype = part.get_content_type()
|
||||
if(ctype == 'text/plain' or ctype == 'text/html'):
|
||||
curis = {}
|
||||
curis = extract_uris(part.get_payload(decode=True).decode("utf-8"))
|
||||
# curis = extract_uris(part.get_payload(decode=True).decode("utf-8"))
|
||||
curis = extract_uris(part.get_payload(decode=True).decode("utf-8","replace"))
|
||||
if(len(curis) > 0):
|
||||
logging.info(whoami(self) + "CURIS: " + str(curis))
|
||||
uris = {**uris, **curis}
|
||||
# End for msg.walk()
|
||||
# link message with attachments
|
||||
|
||||
@ -24,12 +24,13 @@ def send_mail(args):
|
||||
def extract_uris(input_text):
|
||||
uris = {}
|
||||
uri_pattern = r'(https?:\/\/[^\s<>"]+)'
|
||||
suburi_pattern = r'^.+(https?:\/\/[^\s<>"]+)'
|
||||
for m in re.finditer(uri_pattern, input_text):
|
||||
uri = urllib.parse.unquote(m.group(0))
|
||||
uris[uri] = {}
|
||||
# extract sub-URIs (google redirector: https://www.google.de/url?sa=t&url=...)
|
||||
for m2 in re.finditer(uri_pattern, uri):
|
||||
suburi = urllib.parse.unquote(m2.group(0))
|
||||
for m2 in re.finditer(suburi_pattern, uri):
|
||||
suburi = urllib.parse.unquote(m2.group(1))
|
||||
uris[suburi] = {"suburi": True}
|
||||
return uris
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user