【问题描述】使用Python如何读取收件箱指定时期的邮件正文呢?
比如,如何读取下图收件箱中,2021/06/03以后,不含广告和银行通知类邮件的正文内容呢?
【小千解答】使用imapclient库。广告和银行通知类邮件的get_content_type()通常为text/html,跳过不解析。
示例代码如下:
import imapclient import datetime as dt from email.parser import BytesParser from email.policy import default def mail_charset(msg): # 先从msg对象获取编码 mycharset = msg.get_charset() if mycharset is None: # 如果获取不到,再从Content-Type字段获取 content_type = msg.get('Content-Type', '').lower() pos = content_type.find('charset=') if pos >= 0: mycharset = content_type[pos + 8:].strip() return mycharset with imapclient.IMAPClient('imap.163.com') as xqmail: xqmail.login('shaoqian9527@163.com','xqoffice.cn') xqmail.id_({'name':'IMAPClient','version':'2.2.0'}) xqmail.select_folder('INBOX',True) mids = xqmail.search([u'SINCE',dt.date(2021,6,3)]) for idx,mid in enumerate(mids): mail = xqmail.fetch(mid,['BODY[]']) raw_msg = mail[mid][b'BODY[]'] msg = BytesParser(policy=default).parsebytes(raw_msg) for part in msg.walk(): if part.is_multipart(): continue if part.get_filename() != None: continue # print(part.get_content_type()) if part.get_content_type() == 'text/html': # 通常是广告或银行类的通知邮件 continue xqcontent = part.get_payload(decode=True) xqcharset = mail_charset(part) # print(xqcharset) if xqcharset: try: xqcontent = xqcontent.decode(xqcharset) except: pass print('\n' + str(idx + 1) + '#') print('主题:' + msg['Subject']) print('正文:') print(xqcontent.strip())
运行结果:
【参考资料】