Pasted as Python [Remove this paste ]
Description: how to parse mails
URL: http://rafb.ath.cx/pastes/cPqFXS19.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
                        if _email_message.is_multipart():
                            html = _email_message.get_payload(0)
                            _text = html.get_payload()
 
                            try:
                                while type(_text) is not str:
                                    _text = _text[0].get_payload()
                            except Exception:
                                _text = _text
 
                            # search content-transfer-encoding
                            if html['content-transfer-encoding'] == None or html['content-transfer-encoding'] == "":
                                html2 = _email_message.get_payload(1)
 
                                # protonmail
                                try:
                                    _encoding = html._payload[0]._payload[0]._headers[0][1]
                                except Exception:
                                    if str(html2['content-transfer-encoding']) != None or str(html2['content-transfer-encoding']) != "":
                                        _encoding =  html2['content-transfer-encoding']
                                    else:
                                        _encoding = None
                            else:
                                _encoding = html['content-transfer-encoding']
 
                            #import pdb
                            #pdb.set_trace()
 
                            import base64
                            import quopri
 
                            if _encoding == "quoted-printable":
                                try:
                                    _text = quopri.decodestring(_text)
 
                                    try:
                                        _text = _text.decode('utf-8')
                                    except Exception:
                                        _text = _text.decode('latin-1')
                                except Exception:
                                    _text = str(_text)
 
                            elif _encoding == "base64":
                                # check if the string is base64
                                _text2 = _text.replace("\r", "")
                                _text2 = _text2.replace("\n", "<br>")
 
                                #import pdb
                                #pdb.set_trace()
 
 
                                try:
                                    if base64.b64encode(base64.b64decode(_text)).decode("ascii") == _text2:
                                        # is base64
                                        _text = base64.b64decode(_text)
                                        _text = _text.decode("ascii", "ignore")
                                    else:
                                        # is not base64, maybe quoted-printable
                                        try:
                                            _text = quopri.decodestring(_text)
                                            try:
                                                _text = _text.decode('utf-8')
                                            except Exception:
                                                _text = _text.decode('latin-1')
                                        except Exception:
                                            _text = str(_text)
                                except Exception:
                                    _text = str(_text)
 
                            else:
                                try:
                                    _text = _text.decode(html['content-transfer-encoding'])
                                except Exception:
                                    _text = str(_text)
 
                            _text = _text.replace("\r", "")
                            _text = _text.replace("\n", "<br>")
 
                            try:
                                _text = quopri.decodestring(_text)
                                try:
                                    _text = _text.decode('utf-8')
                                except Exception:
                                    _text = _text.decode('latin-1')
                            except Exception:
                                _text = str(_text)
 
                            _text = _text.replace("\r", "")
                            _text = _text.replace("\n", "<br>")
 
                            #import pdb
                            #pdb.set_trace()
 
                        else:
                            _text = _email_message.get_payload(None, True)
 
                            try:
                                import quopri
                                _text = quopri.decodestring(_text)
                                try:
                                    _text = _text.decode('utf-8')
                                except Exception:
                                    _text = _text.decode('latin-1')
                            except Exception:
                                _text = str(_text)
 
                            _text = _text.replace("\r", "")
                            _text = _text.replace("\n", "<br>")