01.创建文件 import pandas as pd df = pd.DataFrame({'ID':[1,2,3],'Name':['Tim','','Nick']}) #创建DataFrame df
df = df.set_index('ID') df
df.to_excel(r"C:\Users\Administrator\Desktop\output.xlsx") #写入到Excel
02.读取文件 import pandas as pd people = pd.read_excel(r"C:\Users\Administrator\Desktop\people.xlsx") #读取Excel文件 print(people.shape) #输出行列数 print(people.columns) #输出列名 print(people.head()) #输出头5行 print(people.tail(3)) #查看数据的最后3行 people = pd.read_excel(r"C:\Users\Administrator\Desktop\people.xlsx", header=1) #读取Excel文件,默认以第二行为列名 people = pd.read_excel(r"C:\Users\Administrator\Desktop\people.xlsx", header=None) #读取Excel文件,默认没有列名 #将会自动用1,2,3....来代替列名 people.columns = ['ID','Type','FirstName','MiddleName','LastName'] #设置列名 people = people.set_index('ID') people.set_index('ID',inplace=True) #直接在原来的DataFrame上进行修改,不会生成新的dataframe people people.to_excel(r"C:\Users\Administrator\Desktop\output.xlsx") df = pd.read_excel(r"C:\Users\Administrator\Desktop\output.xlsx",index_col='ID') #指明读取文件的时候 index 为 ID 列
03.行、列、单元格 import pandas as pd s1 = pd.Series() s1.index ''' Index([], dtype='object') ''' d = {'x':100,'y':200,'z':300} print(d.keys()) print(d.values()) print(d['x']) ''' dict_keys(['x', 'y', 'z']) dict_values([100, 200, 300]) 100 ''' s1 = pd.Series(d) print(s1.index) print(s1.data) s1 ''' Index(['x', 'y', 'z'], dtype='object') <memory at 0x00000187AE02D648> x 100 y 200 z 300 dtype: int64 ''' L1 = [100,200,300] L2 = ['x','y','z'] s1 = pd.Series(L1,index=L2) s1.index ''' Index(['x', 'y', 'z'], dtype='object') ''' s1 = pd.Series([100,200,300],index=['x','y','z']) s1.index ''' Index(['x', 'y', 'z'], dtype='object') ''' s1 = pd.Series([1,2,3],index=[1,2,3],name='A') s2 = pd.Series([10,20,30],index=[1,2,3],name='B') s3 = pd.Series([100,200,300],index=[1,2,3],name='C') df = pd.DataFrame({s1.name:s1,s2.name:s2,s3.name:s3}) df
s1 = pd.Series([1,2,3],index=[1,2,3],name='A') s2 = pd.Series([10,20,30],index=[1,2,3],name='B') s3 = pd.Series([100,200,300],index=[1,2,3],name='C') df = pd.DataFrame([s1,s2,s3]) df
s1 = pd.Series([1,2,3],index=[1,2,3],name='A') s2 = pd.Series([10,20,30],index=[1,2,3],name='B') s3 = pd.Series([100,200,300],index=[2,3,4],name='C') df = pd.DataFrame({s1.name:s1,s2.name:s2,s3.name:s3}) df