pandas vs excel

01.创建文件

import pandas as pd

df = pd.DataFrame({'ID':[1,2,3],'Name':['Tim','','Nick']}) #创建DataFrame

df

df = df.set_index('ID')

df

df.to_excel(r"C:\Users\Administrator\Desktop\output.xlsx") #写入到Excel
02.读取文件

import pandas as pd

people = pd.read_excel(r"C:\Users\Administrator\Desktop\people.xlsx") #读取Excel文件

print(people.shape) #输出行列数
print(people.columns) #输出列名
print(people.head()) #输出头5行
print(people.tail(3)) #查看数据的最后3行

people = pd.read_excel(r"C:\Users\Administrator\Desktop\people.xlsx", header=1) #读取Excel文件,默认以第二行为列名

people = pd.read_excel(r"C:\Users\Administrator\Desktop\people.xlsx", header=None) #读取Excel文件,默认没有列名
#将会自动用1,2,3....来代替列名

people.columns = ['ID','Type','FirstName','MiddleName','LastName'] #设置列名

people = people.set_index('ID')
people.set_index('ID',inplace=True) #直接在原来的DataFrame上进行修改,不会生成新的dataframe

people

people.to_excel(r"C:\Users\Administrator\Desktop\output.xlsx")

df = pd.read_excel(r"C:\Users\Administrator\Desktop\output.xlsx",index_col='ID') #指明读取文件的时候 index 为 ID 列
03.行、列、单元格

import pandas as pd

s1 = pd.Series()
s1.index
'''
Index([], dtype='object')
'''

d = {'x':100,'y':200,'z':300}

print(d.keys())
print(d.values())
print(d['x'])
'''
dict_keys(['x', 'y', 'z'])
dict_values([100, 200, 300])
100
'''

s1 = pd.Series(d)

print(s1.index)
print(s1.data)
s1
'''
Index(['x', 'y', 'z'], dtype='object')
<memory at 0x00000187AE02D648>

x    100
y    200
z    300
dtype: int64
'''

L1 = [100,200,300]
L2 = ['x','y','z']

s1 = pd.Series(L1,index=L2)
s1.index
'''
Index(['x', 'y', 'z'], dtype='object')
'''

s1 = pd.Series([100,200,300],index=['x','y','z'])
s1.index
'''
Index(['x', 'y', 'z'], dtype='object')
'''

s1 = pd.Series([1,2,3],index=[1,2,3],name='A')
s2 = pd.Series([10,20,30],index=[1,2,3],name='B')
s3 = pd.Series([100,200,300],index=[1,2,3],name='C')

df = pd.DataFrame({s1.name:s1,s2.name:s2,s3.name:s3})
df

s1 = pd.Series([1,2,3],index=[1,2,3],name='A')
s2 = pd.Series([10,20,30],index=[1,2,3],name='B')
s3 = pd.Series([100,200,300],index=[1,2,3],name='C')

df = pd.DataFrame([s1,s2,s3])
df

s1 = pd.Series([1,2,3],index=[1,2,3],name='A')
s2 = pd.Series([10,20,30],index=[1,2,3],name='B')
s3 = pd.Series([100,200,300],index=[2,3,4],name='C')

df = pd.DataFrame({s1.name:s1,s2.name:s2,s3.name:s3})
df
 

猜你喜欢

转载自www.cnblogs.com/LXL616/p/11842118.html
今日推荐