关注微xin号:小程在线
关注CSDN博客:程志伟的博客
import numpy as np
import pandas as pd
#### 1.1读取csv文件 ####
eu12=pd.read_csv('H:/0date/Eueo2012.csv')
eu12.head()
Out[2]:
Team Goals Shots on target ... Subs on Subs off Players Used
0 Croatia 4 13 ... 9 9 16
1 Czech Republic 4 13 ... 11 11 19
2 Denmark 4 10 ... 7 7 15
3 England 5 11 ... 11 11 16
4 France 3 22 ... 11 11 19
[5 rows x 35 columns]
#把第一列作为索引
eu13=pd.read_csv('H:/0date/Eueo2012.csv',index_col=0)
eu13.head()
Out[3]:
Goals Shots on target ... Subs off Players Used
Team ...
Croatia 4 13 ... 9 16
Czech Republic 4 13 ... 11 19
Denmark 4 10 ... 7 15
England 5 11 ... 11 16
France 3 22 ... 11 19
[5 rows x 34 columns]
#读取没有列名的数据
eu14=pd.read_csv('H:/0date/Eueo2012_no_header.csv',index_col=0)
eu14.head()
Out[4]:
4 13 12 51.90% 16.00% 32 ... 2.1 9 0.4 9.1 9.2 16
Croatia ...
Czech Republic 4 13 18 41.90% 12.90% 39 ... 8 7 0 11 11 19
Denmark 4 10 10 50.00% 20.00% 27 ... 8 4 0 7 7 15
England 5 11 18 50.00% 17.20% 40 ... 6 5 0 11 11 16
France 3 22 24 37.90% 6.50% 65 ... 5 6 0 11 11 19
Germany 10 32 32 47.80% 15.60% 80 ... 12 4 0 15 15 17
[5 rows x 34 columns]
#把列名设置为空,以数字作为列名
eu15=pd.read_csv('H:/0date/Eueo2012_no_header.csv',index_col=0,header=None)
eu15.head()
Out[5]:
1 2 3 4 5 6 7 ... 28 29 30 31 32 33 34
0 ...
Croatia 4 13 12 51.90% 16.00% 32 0 ... 62 2 9 0 9 9 16
Czech Republic 4 13 18 41.90% 12.90% 39 0 ... 73 8 7 0 11 11 19
Denmark 4 10 10 50.00% 20.00% 27 1 ... 38 8 4 0 7 7 15
England 5 11 18 50.00% 17.20% 40 0 ... 45 6 5 0 11 11 16
France 3 22 24 37.90% 6.50% 65 1 ... 51 5 6 0 11 11 19
[5 rows x 34 columns]
#给列赋值新的名字
columns=eu12.columns
eu16=pd.read_csv('H:/0date/Eueo2012_no_header.csv',index_col=0,names=columns)
eu16.head()
Out[6]:
Goals Shots on target ... Subs off Players Used
Team ...
Croatia 4 13 ... 9 16
Czech Republic 4 13 ... 11 19
Denmark 4 10 ... 7 15
England 5 11 ... 11 16
France 3 22 ... 11 19
[5 rows x 34 columns]
#改变数据的类型
eu16=pd.read_csv('H:/0date/Eueo2012_no_header.csv',names=columns,dtype={'Goals':np.float64})
eu16.dtypes
Out[7]:
Team object
Goals float64
Shots on target int64
Shots off target int64
Shooting Accuracy object
% Goals-to-shots object
Total shots (inc. Blocked) int64
Hit Woodwork int64
Penalty goals int64
Penalties not scored int64
Headed goals int64
Passes int64
Passes completed int64
Passing Accuracy object
Touches int64
Crosses int64
Dribbles int64
Corners Taken int64
Tackles int64
Clearances int64
Interceptions int64
Clearances off line float64
Clean Sheets int64
Blocks int64
Goals conceded int64
Saves made int64
Saves-to-shots ratio object
Fouls Won int64
Fouls Conceded int64
Offsides int64
Yellow Cards int64
Red Cards int64
Subs on int64
Subs off int64
Players Used int64
dtype: object
#usecols函数只读取某几列
eu17=pd.read_csv('H:/0date/Eueo2012.csv',index_col=0,usecols=['Team','Goals','Shots on target'])
eu17.head()
Out[8]:
Goals Shots on target
Team
Croatia 4 13
Czech Republic 4 13
Denmark 4 10
England 5 11
France 3 22
#to_csv保存文件到本地
eu12.to_csv('H:/0date/Eueo2012_save.csv')
#### 1.2读取不规则数据 ####
#发现数据已?作为分隔符
eu20=pd.read_table('H:/0date/Eueo2012_del.txt',index_col=0)
eu20.head()
Out[9]:
Empty DataFrame
Columns: []
Index: [Croatia?4?13, Czech Republic?4?13, Denmark?4?10, England?5?11, France?3?22]
#sep函数使用?分隔数据
eu21=pd.read_csv('H:/0date/Eueo2012_del.txt',sep='?',index_col=0) #csv也可以
eu21.head()
Out[10]:
Goals Shots on target
Team
Croatia 4 13
Czech Republic 4 13
Denmark 4 10
England 5 11
France 3 22
#skiprows函数跳过某几行
eu22=pd.read_csv('H:/0date/Eueo2012_skip.txt',sep='?',skiprows=[0,1],index_col=0)
eu22
Out[11]:
Goals Shots on target
Team
Croatia 4.0 13.0
Czech Republic 4.0 13.0
Denmark 4.0 10.0
England 5.0 11.0
France 3.0 22.0
Germany 10.0 32.0
Greece 5.0 8.0
Italy 6.0 34.0
Netherlands 2.0 12.0
Poland 2.0 15.0
Portugal 6.0 22.0
Republic of Ireland 1.0 7.0
Russia 5.0 9.0
Spain 12.0 42.0
Sweden 5.0 17.0
Ukraine 2.0 7.0
This is the end. NaN NaN
#skipfooter函数跳过最后一行
eu23=pd.read_csv('H:/0date/Eueo2012_skip.txt',sep='?',skiprows=[0,1],skipfooter=1,index_col=0,engine='python')
eu23
Out[12]:
Goals Shots on target
Team
Croatia 4 13
Czech Republic 4 13
Denmark 4 10
England 5 11
France 3 22
Germany 10 32
Greece 5 8
Italy 6 34
Netherlands 2 12
Poland 2 15
Portugal 6 22
Republic of Ireland 1 7
Russia 5 9
Spain 12 42
Sweden 5 17
Ukraine 2 7
#nrows函数读取5行数据
eu24=pd.read_table('H:/0date/Eueo2012_del.txt',sep='?',nrows=5,index_col=0)
eu24
Out[13]:
Goals Shots on target
Team
Croatia 4 13
Czech Republic 4 13
Denmark 4 10
England 5 11
France 3 22
#跳过5行,读取5行,以?分隔
eu25=pd.read_csv('H:/0date/Eueo2012_del.txt',header=0,skiprows=5,nrows=5,sep='?',index_col=0,names=['Goals','Shote on target'])
eu25
Out[14]:
Goals Shote on target
Germany 10 32
Greece 5 8
Italy 6 34
Netherlands 2 12
Poland 2 15
#### 1.3读取excel ####
#sheet_name读取sheet
eu31=pd.read_excel('H:/0date/Eueo2012_excel.xlsx',sheet_name='Eueo2011')
eu31.head()
Out[15]:
Team Goals Shots on target ... Subs on Subs off Players Used
0 Croatia 8 13 ... 9 9 16
1 Czech Republic 4 13 ... 11 11 19
2 Denmark 4 10 ... 7 7 15
3 England 5 11 ... 11 11 16
4 France 3 22 ... 11 11 19
[5 rows x 35 columns]
#写入数据
eu12.to_excel('H:/0date/test.xlsx')
#写入数据,并将sheet命名为eu2012
eu12.to_excel('H:/0date/test.xlsx',sheet_name='eu2012')
#将两个sheet写入一个excel
from pandas import ExcelWriter
with ExcelWriter("H:/0date/test3.xls") as writer:
eu12.to_excel(writer, sheet_name='eu12')
eu12.to_excel(writer, sheet_name='eu11')
#### 读取1.4json数据 ####
eu12_json = pd.read_json("H:/0date/eueo2012.json")
eu12_json.head(5)
Out[16]:
Team Goals Shots on target ... Subs on Subs off Players Used
0 Croatia 4 13 ... 9 9 16
1 Czech Republic 4 13 ... 11 11 19
2 Denmark 4 10 ... 7 7 15
3 England 5 11 ... 11 11 16
4 France 3 22 ... 11 11 19
[5 rows x 35 columns]
#### 1.5连接数据库 ####
1创建数据库连接connect和关闭数据库连接close
创建数据库连接的三种方式:
方法一:用户名、密码和监听分开写
import cx_Oracle
db=cx_Oracle.connect('username/password@host/orcl')
db.close()
方法二:用户名、密码和监听写在一起
import cx_Oracle
db=cx_Oracle.connect('username','password','host/orcl')
db.close()
方法三:配置监听并连接
import cx_Oracle
tns=cx_Oracle.makedsn('host',1521,'orcl')
db=cx_Oracle.connect('username','password',tns)
db.close()
2建立cursor并执行SQL语句:查询、更新、插入、删除
创建数据库连接,创建游标cursor,然后执行sql语句,执行完成后,关闭游标,关闭数据库连接
cr = db.cursor() #创建cursor
sql = 'select * from table'
#一次性返回所有的结果集(fetchall)
cr.execute(sql) #执行sql语句
rs = cr.fetchall()
print(rs)
#传递参数
#参数作为字典传递
pa = {'id':'1'}
cr.execute('select * from table where id=:id',pa)
rs = cr.fetchall()
print(rs)
#直接传递参数
cr.execute('select * from table where id=c_id',c_id='1')
rs = cr.fetchall()
print(rs)
#资源关闭
cr.close()
db.close()
注意:插入、更新、删除操作后需要提交commit
cr = db.cursor() #创建cursor
#insert
sql = 'insert into table(id) values(1)'
#update
sql = 'update table set id = 2'
#delete
sql = 'delete from table where id = 2'
cr.execute(sql) #执行sql
cr.close()
db.commit() #提交