many small updates
[dotfiles.git] / email / .local / bin / proofpoint_decoder
1 #!/usr/bin/env python
2 __author__ = 'Eric Van Cleve; Oleksandr Vlasiuk'
3 __license__ = 'GPL v.3'
4 __version__ = '4.0'
5 __email__ = 'oleksandr.vlasiuk@gmail.com'
6 __status__ = 'beta'
7
8
9 import sys
10 import re
11 from base64 import urlsafe_b64decode
12 # Uncomment for Python 2
13 # import string
14 # if sys.version_info[0] < 3:
15 # from urllib import unquote
16 # import HTMLParser
17 # htmlparser = HTMLParser.HTMLParser()
18 # unescape = htmlparser.unescape
19 # from string import maketrans
20 # else:
21 from urllib.parse import unquote
22 from html import unescape
23 maketrans = str.maketrans
24
25
26 class URLDecoder:
27
28 def __init__(self):
29 URLDecoder.ud_pattern = re.compile(r'http.+?urldefense(?:\.proofpoint)?\.com/(v[0-9])/')
30 URLDecoder.v1_pattern = re.compile(r'u=(?P<url>.+?)&k=')
31 URLDecoder.v2_pattern = re.compile(r'http.+?urldefense\.proofpoint\.com/v2/url\?u=(?P<url>.+?)&[dc]=.*?&e=')
32 URLDecoder.v3_pattern = re.compile(r'http.+?urldefense\.com/v3/__(?P<url>.+?)__;(?P<enc_bytes>.*?)!.*?\$')
33 URLDecoder.v3_token_pattern = re.compile("\*(\*.)?")
34 URLDecoder.v3_run_mapping = {'A': 2, 'B': 3, 'C': 4, 'D': 5, 'E':
35 6, 'F': 7, 'G': 8, 'H': 9, 'I': 10,
36 'J': 11, 'K': 12, 'L': 13, 'M': 14,
37 'N': 15, 'O': 16, 'P': 17, 'Q': 18,
38 'R': 19, 'S': 20, 'T': 21, 'U': 22,
39 'V': 23, 'W': 24, 'X': 25, 'Y': 26,
40 'Z': 27, 'a': 28, 'b': 29, 'c': 30,
41 'd': 31, 'e': 32, 'f': 33, 'g': 34,
42 'h': 35, 'i': 36, 'j': 37, 'k': 38,
43 'l': 39, 'm': 40, 'n': 41, 'o': 42,
44 'p': 43, 'q': 44, 'r': 45, 's': 46,
45 't': 47, 'u': 48, 'v': 49, 'w': 50,
46 'x': 51, 'y': 52, 'z': 53, '0': 54,
47 '1': 55, '2': 56, '3': 57, '4': 58,
48 '5': 59, '6': 60, '7': 61, '8': 62,
49 '9': 63, '-': 64, '_': 65}
50 URLDecoder.safelinks = re.compile(r'http.+?\.safelinks\.protection\.outlook\.com/\?url=(?P<url>.+?)&(?:amp;)?data=.*?reserved=0')
51
52 def decode(self, line):
53 match = self.ud_pattern.search(line)
54 match_safelinks = self.safelinks.search(line)
55 if match:
56 if match.group(1) == 'v1':
57 line = self.v1_pattern.sub(self.unescquote, line)
58 elif match.group(1) == 'v2':
59 line = self.v2_pattern.sub(self.decode_v2, line)
60 elif match.group(1) == 'v3':
61 line = self.v3_pattern.sub(self.decode_v3, line)
62 if match_safelinks:
63 line = self.safelinks.sub(self.unescquote, line)
64 return line
65
66 def unescquote(self, match):
67 return unescape(unquote(match.group('url')))
68
69 def decode_v2(self, match):
70 trans = maketrans('-_', '%/')
71 url_encoded_url = match.group('url').translate(trans)
72 return unescape(unquote(url_encoded_url))
73
74 def decode_v3(self, match):
75 def replace_token(token):
76 if token == '*':
77 character = self.dec_bytes[self.current_marker]
78 self.current_marker += 1
79 return character
80 if token.startswith('**'):
81 run_length = self.v3_run_mapping[token[-1]]//2
82 run = self.dec_bytes[self.current_marker:self.current_marker+run_length]
83 self.current_marker += run_length
84 return run
85
86 def substitute_tokens(text, start_pos=0):
87 match = self.v3_token_pattern.search(text, start_pos)
88 if match:
89 start = text[start_pos:match.start()]
90 built_string = start
91 token = text[match.start():match.end()]
92 built_string += replace_token(token)
93 built_string += substitute_tokens(text, match.end())
94 return built_string
95 else:
96 return text[start_pos:len(text)]
97
98 url = match.group('url')
99 encoded_url = unquote(url)
100 enc_bytes = match.group('enc_bytes')
101 enc_bytes += '=='
102 self.dec_bytes = (urlsafe_b64decode(enc_bytes)).decode('utf-8')
103 self.current_marker = 0
104 return substitute_tokens(encoded_url)
105
106
107
108 def main():
109 urldec = URLDecoder()
110 for line in sys.stdin:
111 # sys.stdout.write(urldec.decode(line))
112 sys.stdout.write(line)
113 if __name__ == '__main__':
114 main()