Python数据分析（2）pandas库的使用：Series对象，DataFrame对象，Index对象

#coding=gbk
#pandas 库的使用
#pandas 库使用索引机制，有Series 和  DataFrame 2种数据结构
import pandas as pd
import numpy as np
print("--------------")
s=pd.Series([1,2,3])
print(s)
# 0    1    左边的对应的是索引
# 1    2
# 2    3
# dtype: int64
s1=pd.Series([1,2,3],index=['a','b','c'])   #自定义索引值
print(s1)
# a    1    
# b    2
# c    3
# dtype: int64
print(s1.index) #Index(['a', 'b', 'c'], dtype='object')
print(s1.values)    #[1 2 3]
#选择内部元素
print(s1[1])    #2
print(s1[['a','b']]) 
print(s1[0:2]) 
# a    1
# b    2
# dtype: int64
#为元素赋值
s1[1]=9
print(s1)
#使用numpy数组
s2=np.array([1,3,5])
s3=pd.Series(s2)
print(s3)
# 0    1
# 1    3
# 2    5
# dtype: int32
s2[2]=99        #改变numpy数组的元素，同样会改变 pandas数组的元素
print(s3)
# 0     1
# 1     3
# 2    99
# dtype: int32
#Series运算
print(s3+12)
s3[3]=1
s3[4]=3
print(s3)
# 0     1
# 1     3
# 2    99
# 3     1
# 4     3
# dtype: int64
#统计有几个不同的元素
print(s3.unique())  #[ 1  3 99]
#统计不同元素出现的次数
print(s3.value_counts())
# 3     2    代表3出现2次
# 1     2    代表1出现2次
# 99    1
# dtype: int64


#naN表示非数值
a=pd.Series([1,2,np.NaN,4])
print(a)
# 0    1.0
# 1    2.0
# 2    NaN
# 3    4.0
# dtype: float64
print(a.isnull())
# 0    False    判断是否为非数值
# 1    False
# 2     True
# 3    False
# dtype: bool


#Series 对象用作字典
mydict={'red':1,'blue':2,'black':3}
d=pd.Series(mydict)
print(d)
# black    3
# blue     2
# red      1
# dtype: int64

DataFrame对象简述：


#DataFrame 对象
data={'color':['blue','green','black','yellow'],
      'object':['book','pencil','notebbook','pen'],
      'price':[1.2,4.5,6,9]}
frame=pd.DataFrame(data)
print(frame)
#     color     object  price    #有索引值，第一列
# 0    blue       book    1.2
# 1   green     pencil    4.5
# 2   black  notebbook    6.0
# 3  yellow        pen    9.0
frame1=pd.DataFrame(data,index=['one','two','three','four'])    #改变索引的参数
print(frame1)
print(pd.DataFrame(data,columns=['color','price']))
#     color  price
# 0    blue    1.2
# 1   green    4.5
# 2   black    6.0
# 3  yellow    9.0
frame2=pd.DataFrame(np.arange(16).reshape(4,4),
                    index=['blue','green','black','yellow'],
                    columns=['color','price','object','real'])
print(frame2)
#         color  price  object  real    创建dataframe对象
# blue        0      1       2     3
# green       4      5       6     7
# black       8      9      10    11
# yellow     12     13      14    15


#获取元素
print(frame2.price)
# blue       1
# green      5
# black      9
# yellow    13
print(frame2.values)
# [[ 0  1  2  3]    获取全部元素
#  [ 4  5  6  7]
#  [ 8  9 10 11]
#  [12 13 14 15]]
print(frame2.ix[2])
print(frame2.ix[[0,2]])     #中间有2个括号
#        color  price  object  real
# blue       0      1       2     3
# black      8      9      10    11
print(frame2.ix[0,2])   #返回一个具体的值    2，中间没有中括号
print(frame2[1:3])  #查找第二，三行的元素
#赋值
frame2['new']=[4,6,8,12]
print(frame2)   #新增加一列
#         color  price  object  real  new
# blue        0      1       2     3    4
# green       4      5       6     7    6
# black       8      9      10    11    8
# yellow     12     13      14    15   12
del frame2['new']   #删除一列
print(frame2)
#筛选
print(frame2[frame2<12])
#         color  price  object  real
# blue      0.0    1.0     2.0   3.0
# green     4.0    5.0     6.0   7.0
# black     8.0    9.0    10.0  11.0
# yellow    NaN    NaN     NaN   NaN
#转置
print(frame2.T)
#         blue  green  black  yellow
# color      0      4      8      12
# price      1      5      9      13
# object     2      6     10      14
# real       3      7     11      15


#index 对象
print(frame2.index) #Index(['blue', 'green', 'black', 'yellow'], dtype='object')
print(frame2.idxmax())
print(frame2.idxmin())

Python数据分析（2）pandas库的使用：Series对象，DataFrame对象，Index对象

DataFrame对象简述：

猜你喜欢