# -*- coding: utf-8 -*-
import email
import imaplib
import re
from openerp import http
from email import header
class Advertisefor(http.Controller):
@http.route('/advertisefor/getemaillist/', type='http', csrf=False, auth='user')
def get_eamil_list_action(self, **kw):
query = []
recv_config = http.request.env['emailrecvconfig'].search_read(query,order='create_date desc')
# 取最新的配置
recv_config = recv_config[0]
email_address = recv_config['address']
email_password = recv_config['pwd']
imap_server_host = recv_config['imap_server']
imap_server_port = recv_config['server_port']
try:
email_server = imaplib.IMAP4_SSL(host=imap_server_host, port=imap_server_port)
print "imap4----connect server success, now will check username"
except:
print"imap4----sorry the given email server address connect time out"
exit(1)
try:
email_server.login(email_address,email_password)
print "imap4----username exist, now will check password"
except:
print"imap4----sorry the given email address or password seem do not correct"
exit(1)
email_server.select()
email_count = len(email_server.search(None, 'ALL')[1][0].split())
for e in reversed(range(1,email_count+1)):
typ, email_content = email_server.fetch('{}'.format(e).encode(), '(RFC822)')
email_content = email.message_from_string(email_content[0][1])
msg_id = email.utils.parseaddr(email_content.get('message-id'))[1]
model_email = http.request.env['emailinfos']
msg_ret = model_email.search_read([('email_msg_id','=',msg_id)])
if len(msg_ret) > 0:
continue
else:
# 头
res_head = self.parseHeader(email_content)
# 内容 及 附件
res_body = self.parseBody(email_content)
res_head['email_body'] = res_body['email_body']
res_head['email_msg_id'] = msg_id
new_obj = model_email.create(res_head)
if res_body.has_key('attachment'):
dic_att = res_body['attachment']
model_attachment = http.request.env['ir.attachment']
dic_att['res_id'] = new_obj['id']
new_att = model_attachment.create(dic_att)
print new_att
# 关闭select
email_server.close()
# 关闭连接
email_server.logout()
def parseHeader(self,message):
""" 解析邮件首部 """
subject = self.parseMailSubject(message)
rslt = self.transformDate(email.utils.parseaddr(message.get_all('date'))[1])
return {'email_subject':subject,
'email_from':email.utils.parseaddr(message.get('from'))[1],
'email_to':email.utils.parseaddr(message.get('to'))[1],
'email_cc':email.utils.parseaddr(message.get_all('cc'))[1],
'email_date':rslt}
def parseBody(self,message):
""" 解析邮件/信体 """
# 循环信件中的每一个mime的数据块
tmp_dict = {}
for part in message.walk():
# 这里要判断是否是multipart,是的话,里面的数据是一个message 列表
if not part.is_multipart():
charset = part.get_charset()
contenttype = part.get_content_type()
finame = part.get_filename()
name = part.get_param("name") # 如果是附件,这里就会取出附件的文件名
if name:
# 有附件
fh = email.Header.Header(name)
fdh = email.Header.decode_header(fh)
fname = fdh[0][0]
encodeStr = fdh[0][1]
if encodeStr != None:
if charset == None:
fname = fname.decode(encodeStr, 'gbk')
else:
fname = fname.decode(encodeStr, charset)
print '附件名:', fname
attach_data = part.get_payload(decode=True) # 解码出附件数据,然后存储
tmp_dict['attachment'] = {'datas_fname': fname,'db_datas':attach_data,'name':fname,'res_model':'emailinfos'}
else:
if contenttype == 'text/plain' or contenttype == 'text/html':
# 纯文本或HTML内容:
content = part.get_payload(decode=True)
# 要检测文本编码:
charset = self.guess_charset(part)
if charset:
content = content.decode(charset)
tmp_dict['email_body'] = content
return tmp_dict
def getYear(self,date):
rslt = re.search(r'\b2\d{3}\b', date)
return int(rslt.group())
def getMonth(self,date):
monthMap = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6,
'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12,}
rslt = re.findall(r'\b\w{3}\b', date)
for i in range(len(rslt)):
month = monthMap.get(rslt[i])
if None != month:
break
return month
def getDay(self,date):
rslt = re.search(r'\b\d{1,2}\b', date)
return int(rslt.group())
def getTime(self,date):
rslt = re.search(r'\b\d{2}:\d{2}:\d{2}\b', date)
timeList = rslt.group().split(':')
for i in range(len(timeList)):
timeList[i] = int(timeList[i])
return timeList
def transformDate(self,date):
year = self.getYear(date)
month = self.getMonth(date)
day = self.getDay(date)
rslt = '{}-{}-{} '.format(year,month,day)
timeList = self.getTime(date)
timeList = map(str,timeList)
timeList = ':'.join(timeList)
rslt = rslt + timeList
return rslt
def parseMailSubject(self,msg):
subSrt = msg.get('subject')
if None == subSrt:
subject = '无主题'
else:
subList = header.decode_header(subSrt)
subinfo = subList[0][0]
subcode = subList[0][1]
if not subcode:
subject = subinfo
else:
if isinstance(subinfo, bytes):
subject = subinfo.decode(subcode)
else:
subject = subinfo
return subject
def guess_charset(self,msg):
charset = msg.get_charset()
if charset is None:
content_type = msg.get('Content-Type', '').lower()
pos = content_type.find('charset=')
if pos >= 0:
charset = content_type[pos + 8:].strip()
return charset
解析邮件内容:
邮件时间格式 Thu, 18 Apr 2019 15:39:36 +0800 转为datetime类型。
年月日时间,分开解析
主题及内容 需要获取编码 解码 否则所有不是utf-8编码的都会出现乱码。
MIME实例对象的方法:
as_string() :返回字符串信息,相当于__str__(),str(msg)
as_bytes() :返回字节信息,相当于__bytes__(),bytes(msg)
is_multipart() :判断是否为有效载荷的列表message对象,是返回True,否则返回False
set_unixfrom(unixfrom) :将消息的信封头设置为unixfrom为字符串
get_unixfrom() :返回消息的信封头。默认为None
attach(payload) :将给定的有效负载添加到当前有效负载
get_payload(i=None, decode=False) :返回当前的有效载荷,这将是一个列表 Message
set_payload(payload, charset=None) :将整个消息对象的有效载荷设置为有效载荷
set_charset(charset) ;将有效负载的字符集设置为charset
get_charset() :返回Charset与消息有效负载相关的实例
__len__() :返回标题的总数,包括重复项
__contains__(name) :如果消息对象具有名为name的字段,则返回true
__getitem__(name) :返回指定标题字段的值
__setitem__(name, val) :将字段添加到带有字段名称和值val的消息中
__delitem__(name) :从消息的标题中删除所有出现的具有名称name的字段
keys() :返回所有消息标题字段名称的列表
values() :返回所有消息字段值的列表
items() :返回包含所有消息的字段标题和值
add_header(_name, _value, **_params) :扩展标题设置,_name为要添加的标题字段,_value为标题的值。