2c1bf23c987f6bcf649741e7ff3ad5dcdc5e20d5
2 __author__
= 'Eric Van Cleve; Oleksandr Vlasiuk'
3 __license__
= 'GPL v.3'
5 __email__
= 'oleksandr.vlasiuk@gmail.com'
11 from base64
import urlsafe_b64decode
12 # Uncomment for Python 2
14 # if sys.version_info[0] < 3:
15 # from urllib import unquote
17 # htmlparser = HTMLParser.HTMLParser()
18 # unescape = htmlparser.unescape
19 # from string import maketrans
21 from urllib
.parse
import unquote
22 from html
import unescape
23 maketrans
= str.maketrans
29 URLDecoder
.ud_pattern
= re
.compile(r
'http.+?urldefense(?:\.proofpoint)?\.com/(v[0-9])/')
30 URLDecoder
.v1_pattern
= re
.compile(r
'u=(?P<url>.+?)&k=')
31 URLDecoder
.v2_pattern
= re
.compile(r
'http.+?urldefense\.proofpoint\.com/v2/url\?u=(?P<url>.+?)&[dc]=.*?&e=')
32 URLDecoder
.v3_pattern
= re
.compile(r
'http.+?urldefense\.com/v3/__(?P<url>.+?)__;(?P<enc_bytes>.*?)!.*?\$')
33 URLDecoder
.v3_token_pattern
= re
.compile("\*(\*.)?")
34 URLDecoder
.v3_run_mapping
= {'A': 2, 'B': 3, 'C': 4, 'D': 5, 'E':
35 6, 'F': 7, 'G': 8, 'H': 9, 'I': 10,
36 'J': 11, 'K': 12, 'L': 13, 'M': 14,
37 'N': 15, 'O': 16, 'P': 17, 'Q': 18,
38 'R': 19, 'S': 20, 'T': 21, 'U': 22,
39 'V': 23, 'W': 24, 'X': 25, 'Y': 26,
40 'Z': 27, 'a': 28, 'b': 29, 'c': 30,
41 'd': 31, 'e': 32, 'f': 33, 'g': 34,
42 'h': 35, 'i': 36, 'j': 37, 'k': 38,
43 'l': 39, 'm': 40, 'n': 41, 'o': 42,
44 'p': 43, 'q': 44, 'r': 45, 's': 46,
45 't': 47, 'u': 48, 'v': 49, 'w': 50,
46 'x': 51, 'y': 52, 'z': 53, '0': 54,
47 '1': 55, '2': 56, '3': 57, '4': 58,
48 '5': 59, '6': 60, '7': 61, '8': 62,
49 '9': 63, '-': 64, '_': 65}
50 URLDecoder
.safelinks
= re
.compile(r
'http.+?\.safelinks\.protection\.outlook\.com/\?url=(?P<url>.+?)&(?:amp;)?data=.*?reserved=0')
52 def decode(self
, line
):
53 match
= self
.ud_pattern
.search(line
)
54 match_safelinks
= self
.safelinks
.search(line
)
56 if match
.group(1) == 'v1':
57 line
= self
.v1_pattern
.sub(self
.unescquote
, line
)
58 elif match
.group(1) == 'v2':
59 line
= self
.v2_pattern
.sub(self
.decode_v2
, line
)
60 elif match
.group(1) == 'v3':
61 line
= self
.v3_pattern
.sub(self
.decode_v3
, line
)
63 line
= self
.safelinks
.sub(self
.unescquote
, line
)
66 def unescquote(self
, match
):
67 return unescape(unquote(match
.group('url')))
69 def decode_v2(self
, match
):
70 trans
= maketrans('-_', '%/')
71 url_encoded_url
= match
.group('url').translate(trans
)
72 return unescape(unquote(url_encoded_url
))
74 def decode_v3(self
, match
):
75 def replace_token(token
):
77 character
= self
.dec_bytes
[self
.current_marker
]
78 self
.current_marker
+= 1
80 if token
.startswith('**'):
81 run_length
= self
.v3_run_mapping
[token
[-1]]//2
82 run
= self
.dec_bytes
[self
.current_marker
:self
.current_marker
+run_length
]
83 self
.current_marker
+= run_length
86 def substitute_tokens(text
, start_pos
=0):
87 match
= self
.v3_token_pattern
.search(text
, start_pos
)
89 start
= text
[start_pos
:match
.start()]
91 token
= text
[match
.start():match
.end()]
92 built_string
+= replace_token(token
)
93 built_string
+= substitute_tokens(text
, match
.end())
96 return text
[start_pos
:len(text
)]
98 url
= match
.group('url')
99 encoded_url
= unquote(url
)
100 enc_bytes
= match
.group('enc_bytes')
102 self
.dec_bytes
= (urlsafe_b64decode(enc_bytes
)).decode('utf-8')
103 self
.current_marker
= 0
104 return substitute_tokens(encoded_url
)
109 urldec
= URLDecoder()
110 for line
in sys
.stdin
:
111 # sys.stdout.write(urldec.decode(line))
112 sys
.stdout
.write(line
)
113 if __name__
== '__main__':