pyhon3.5 打开csv文件中文乱码问题

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u014119694/article/details/76594411

将编码格式改成gb18030就ok了


# -*- coding: utf-8 -*-
"""
Created on Wed Aug  2 16:56:32 2017

@author: lengxia
"""

import urllib.request
import re
import csv
import codecs
import sys
from bs4 import BeautifulSoup 
import importlib
wiki = 'https://zh.wikipedia.org/wiki/%E6%96%87%E4%BB%B6%E7%BC%96%E8%BE%91%E5%99%A8%E6%AF%94%E8%BE%83'
header = {'User-Agent': 'Mozilla/5.0'}
page = urllib.request.urlopen(wiki).read()
page=page.decode("utf8")
soup = BeautifulSoup(page,"lxml")
name = ""       #名字
creater = ""    #归属
first = ""      #首次公开发布的时间
latest = ""     #最新稳定版本
cost = ""       #售价
licence = ""    #授权条款
table = soup.find("table", {"class" : "sortable wikitable"})
f=open('url2table.csv', 'w',encoding="gb18030")
csv_writer = csv.writer(f)


td_th = re.compile('t[dh]')

for row in table.findAll("tr"):
    cells = row.findAll(td_th)
    if len(cells) == 6:
        name = cells[0].find(text=True)
        if not name:
            continue
        creater = cells[1].find(text=True)
        first = cells[2].find(text=True)
        latest = cells[3].find(text=True)
        cost = cells[4].find(text=True)
        licence = cells[5].find(text=True)

        print([ x for x in [name, creater, first, latest, cost, licence]])
        csv_writer.writerow([ x for x in [name, creater, first, latest, cost, licence]])

f.close()

猜你喜欢

转载自blog.csdn.net/u014119694/article/details/76594411