#-*-coding:utf-8-*-
import pandas as pd
import numpy as np
#创建特征列表
column_names = ['sample code number','clump thickness','uniformity of cell size','uniformity of cell shape','marginal adhesion','single epithelial cell size','bare nuclei','bland chromatin','normol nucleoli','mitoses','class']
#使用pandas.read_csv函数从互联网读取指定数据
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data',names = column_names)
#将?替换为标准缺失值进行表示
data = data.replace(to_replace = '?',value = np.nan)
#丢失带有缺失值的数据(只要有一个维度有缺失)
data = data.dropna(how = 'any')
#输出data的数据量和维度
data.shape
在访问对应的网站时,terminal出现异常;
异常如下:
'''
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 1318, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1239, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1285, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1234, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1026, in _send_output
self.send(msg)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 964, in send
self.connect()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1400, in connect
server_hostname=server_hostname)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/ssl.py", line 407, in wrap_socket
_context=self, _session=session)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/ssl.py", line 814, in __init__
self.do_handshake()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/ssl.py", line 1068, in do_handshake
self._sslobj.do_handshake()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/ssl.py", line 689, in do_handshake
self._sslobj.do_handshake()
ssl.SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:833)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "code-11.py", line 8, in <module>
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data',names = column_names)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/io/parsers.py", line 709, in parser_f
return _read(filepath_or_buffer, kwds)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/io/parsers.py", line 433, in _read
filepath_or_buffer, encoding, compression)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/io/common.py", line 190, in get_filepath_or_buffer
req = _urlopen(filepath_or_buffer)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 526, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 544, in _open
'_open', req)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 1361, in https_open
context=self._context, check_hostname=self._check_hostname)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 1320, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:833)>
'''
于是检索一下百度,发现有人遇到同样的问题:keyword = certifi
于是在mac上 python3 -m pip install certifi
安装成功;
But
没有成功;
看了一下,果然还是少了SSL和Urllib
于是引入urllib.request和ssl
#!/usr/bin/python3
#-*-coding:utf-8-*-
import urllib.request
import ssl
import pandas as pd
import numpy as np
#创建特征列表
column_names = ['sample code number','clump thickness','uniformity of cell size','uniformity of cell shape','marginal adhesion','single epithelial cell size','bare nuclei','bland chromatin','normol nucleoli','mitoses','class']
ssl._create_default_https_context = ssl._create_unverified_context
response = urllib.request.urlopen('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data')
#使用pandas.read_csv函数从互联网读取指定数据
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data',names = column_names)
#将?替换为标准缺失值进行表示
data = data.replace(to_replace = '?',value = np.nan)
#丢失带有缺失值的数据(只要有一个维度有缺失)
data = data.dropna(how = 'any')
#输出data的数据量和维度
print (data.shape)
完美运行成功,终于完成了数据预处理功能;UP UP UP
数据分析 DAY0