参考资料:清华计算机博士带你学-Python金融量化分析
目录
demo1-Series使用特性
#Pandas-数据分析核心包
import pandas as pd
import numpy as np
#demo1-Series使用特性
#数组与字典结合体
print("*****Series支持数组的特性:索引*****")
sr = pd.Series([2,3,4,5],index=["a","b","c","d"]) #array_like创建Series
print(sr)
print(sr[0]) #按下标索引
print(sr["a"]) #按标签索引
print(sr+2) #与标量运算
print(sr+2*sr) #两个Series运算
print(sr[:2]) #切片
print(sr[[1,3]]) #花式索引
print(np.sqrt(sr)) #通用函数
print(sr[sr>3]) #布尔型索引
print("*****Series支持字典的特性:标签*****")
sr = pd.Series({"a":1,"b":2}) #使用字典创建Series
print(sr)
print(sr["a"],sr["b"])
print(sr[["a","b"]]) #花式索引
print("a" in sr,"c" in sr) #in运算
for each in sr:
print(each) #与内置dict不同,打印的是value
print(sr.index,sr.index[0])
print(sr.values,sr.values[1])
*****Series支持数组的特性:下标***** a 2 b 3 c 4 d 5 dtype: int64 2 2 a 4 b 5 c 6 d 7 dtype: int64 a 6 b 9 c 12 d 15 dtype: int64 a 2 b 3 dtype: int64 b 3 d 5 dtype: int64 a 1.414214 b 1.732051 c 2.000000 d 2.236068 dtype: float64 c 4 d 5 dtype: int64 *****Series支持字典的特性:标签***** a 1 b 2 dtype: int64 1 2 a 1 b 2 dtype: int64 True False 1 2 Index(['a', 'b'], dtype='object') a [1 2] 2
demo2-Series整数索引问题
#demo2-Series整数索引问题
#iloc与loc
sr = pd.Series(np.arange(20))
srNew = sr[10:].copy()
print(srNew,sr[10]) #默认是标签
print(srNew.iloc[9]) #按索引
print(srNew.loc[10]) #按标签
10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 dtype: int32 10 19 10
demo3-Series数据对齐
print("*****sr1与sr2等长*****")
sr1 = pd.Series([12,23,34],index=["c","a","d"])
sr2 = pd.Series([11,20,10],index=["d","c","a"])
print(sr1+sr2) #按标签index相加
print("*****sr1与sr2不等长*****")
sr1 = pd.Series([12,23,34],index=["c","a","d"])
sr2 = pd.Series([11,20,10,21],index=["d","c","a","b"])
print(sr1+sr2) #按标签index相加
print("*****sr1与sr2不等长:fill_value*****")
sr1 = pd.Series([12,23,34],index=["c","a","d"])
sr2 = pd.Series([11,20,10,21],index=["d","c","a","b"])
print(sr1.add(sr2,fill_value=0)) #注意观察"b"标签所对应的值
*****sr1与sr2等长***** a 33 c 32 d 45 dtype: int64 *****sr1与sr2不等长***** a 33.0 b NaN c 32.0 d 45.0 dtype: float64 *****sr1与sr2不等长:fill_value***** a 33.0 b 21.0 c 32.0 d 45.0 dtype: float64
demo4-Series缺失值的处理
#demo4-Series缺失值的处理
sr1 = pd.Series([12,23,34],index=["c","a","d"])
sr2 = pd.Series([11,20,10,21],index=["d","c","a","b"])
sr3 = sr1 + sr2
print("*****删除缺失值的3种方法*****")
print(sr3)
print(sr3.isnull())
print(sr3.notnull())
print(sr3[~sr3.isnull()]) #方法一
print(sr3[sr3.notnull()]) #方法二
print(sr3.dropna()) #方法三
print(sr3) #不在原始数据上改
print("*****填充缺失值*****")
print(sr3.fillna(0)) #填充9
print(sr3.fillna(sr3.mean())) #填充平均值
*****删除缺失值的3种方法***** a 33.0 b NaN c 32.0 d 45.0 dtype: float64 a False b True c False d False dtype: bool a True b False c True d True dtype: bool a 33.0 c 32.0 d 45.0 dtype: float64 a 33.0 c 32.0 d 45.0 dtype: float64 a 33.0 c 32.0 d 45.0 dtype: float64 a 33.0 b NaN c 32.0 d 45.0 dtype: float64 *****填充缺失值***** a 33.0 b 0.0 c 32.0 d 45.0 dtype: float64 a 33.000000 b 36.666667 c 32.000000 d 45.000000 dtype: float64
#Series小结
#字典与数字的结合体:下标索引 + 标签访问
#整数索引loc与iloc
#数据对齐/ sr1.add(sr2,fill_value=0)
#缺失数据处理:①dropna ②fillna(0)
demo5-DataFrame的创建
#demo5-DataFrame的创建
#表格型数据结构 ,可以看作由Series组成的字典,共用一个索引
print(pd.DataFrame({"one":[1,2,3],"two":[4,5,6]},index=["a","b","c"])) #统一指定index
#一列为同一个类型,由于存在nan,因此one列变为浮点型 采用Series分别指定index
print(pd.DataFrame({"one":pd.Series([1,2,3],index=["a","b","c"]),"two":pd.Series([2,1,3,4],index=["a","b","c","d"])}))
#pd.read_csv() #读取文件
#df.to_csv() #保存文件
data = pd.read_csv("600519.csv",index_col="trade_date")
data
one two a 1 4 b 2 5 c 3 6 one two a 1.0 2 b 2.0 1 c 3.0 3 d NaN 4
demo6-DataFrame常见属性
#demo6-DataFrame常见属性
#index 标签
#T 转置
#columns 列名
#values 数值值
#describe 快速统计
df = pd.DataFrame({"one":[1,2,3],"two":[4,5,6]},index=["a","b","c"])
print(df)
print(df.index) #获取标签
print(df.T) #转置
print(df.columns) #列名/列索引
print(df.values,type(df.values),df.values.shape) #获取数组值->返回二维数组
print(df.describe())
one two a 1 4 b 2 5 c 3 6 Index(['a', 'b', 'c'], dtype='object') a b c one 1 2 3 two 4 5 6 Index(['one', 'two'], dtype='object') [[1 4] [2 5] [3 6]] <class 'numpy.ndarray'> (3, 2) one two count 3.0 3.0 mean 2.0 5.0 std 1.0 1.0 min 1.0 4.0 25% 1.5 4.5 50% 2.0 5.0 75% 2.5 5.5 max 3.0 6.0
demo7-DataFrame索引与切片
#demo7-DataFrame索引与切片
#使用loc或者iloc访问
#{行,列]
df = pd.DataFrame({"one":pd.Series([1,2,3],index=["a","b","c"]),"two":pd.Series([2,1,3,4],index=["a","b","c","d"])})
print(df)
print(df["one"]["a"]) #先列后行:不推荐连续使用两个中括号
print("*****取某一个元素*****")
print(df.loc["a","one"]) #取某一个
print("*****取某一列元素*****")
print(df["one"],type(df["one"])) #看一列数据->Series
print("*****取某一行元素*****")
print(df.iloc[0]) #看一行数据
print(df.loc["a",:]) #看一行数据,:切片表示所有
print(df.loc["a",]) #看一行数据 ,后省略也是默认所有
print(df.loc["a"]) #看一行数据->简写
print("*****根据需求任意取元素*****")
print(df.loc[["a","d"],:]) #常规索引 切片 布尔值索引 花式索引可以任意搭配
one two a 1.0 2 b 2.0 1 c 3.0 3 d NaN 4 1.0 *****取某一个元素***** 1.0 *****取某一列元素***** a 1.0 b 2.0 c 3.0 d NaN Name: one, dtype: float64 <class 'pandas.core.series.Series'> *****取某一行元素***** one 1.0 two 2.0 Name: a, dtype: float64 one 1.0 two 2.0 Name: a, dtype: float64 one 1.0 two 2.0 Name: a, dtype: float64 one 1.0 two 2.0 Name: a, dtype: float64 *****根据需求任意取元素***** one two a 1.0 2 d NaN 4
demo8-DataFrame数据对齐与缺失数据处理
#demo8-DataFrame数据对齐与缺失数据处理
df1 = pd.DataFrame({"two":[1,2,3,4],"one":[4,5,6,7]},index=list("cdba"))
df2 = pd.DataFrame({"one":pd.Series([1,2,3],index=["a","b","c"]),"two":pd.Series([2,1,3,4],index=["a","b","c","d"])})
print("*****DataFrame数据对齐*****")
print(df1)
print(df2)
print(df1+df2) #行索引与列索引对齐相加
#dropna(axis=0,where='any')
#fillna(value)
#isnull
#notnull
df2.loc["d","two"] = np.nan
df2.loc["c","two"] = np.nan
print(df2)
print("*****fillna()*****")
print(df2.fillna(999)) #填充值
print("*****dropna:默认参数axis=0为行,how='any'即任意有nan即删除*****")
print(df2.dropna()) #有一个缺失值,就删除一整行 默认how是any即任何一个,默认axis=0是行
print(df2.dropna(how="any",axis=0))
print(df2.dropna(how="all")) #所有都是nan才删除行
print(df2.dropna(axis=1))#axis=是列
*****DataFrame数据对齐***** two one c 1 4 d 2 5 b 3 6 a 4 7 one two a 1.0 2 b 2.0 1 c 3.0 3 d NaN 4 one two a 8.0 6 b 8.0 4 c 7.0 4 d NaN 6 one two a 1.0 2.0 b 2.0 1.0 c 3.0 NaN d NaN NaN *****fillna()***** one two a 1.0 2.0 b 2.0 1.0 c 3.0 999.0 d 999.0 999.0 *****dropna***** one two a 1.0 2.0 b 2.0 1.0 one two a 1.0 2.0 b 2.0 1.0 one two a 1.0 2.0 b 2.0 1.0 c 3.0 NaN Empty DataFrame Columns: [] Index: [a, b, c, d]
demo9-Pandas常用函数
#demo9-Pandas常用函数
#mean(axis=0,skipna=True)
#sum(axis=1)
#sort_index(axis,...,ascending)
#sort_values(by,axis,ascending)
#Numpy的函数同样适用于Pandas
#axis=0表示跨行,axis=1表示跨列/通用函数与dropna联合理解
#推荐资料:https://zhuanlan.zhihu.com/p/110105054
print(df)
print("*****mean*****")
print(df.mean()) #按列求平均 (1+2+3)/3=2
print(df.mean(axis=0)) #axis-跨行/默认按列
print(df.mean(skipna=True))
print(df.mean(axis=1)) #按行求平均
print("*****sum*****")
print(df.sum()) #默认按列
print(df.sum(axis=1))
print("*****sort_values*****") #nan不参与排序,无论升序或者降序均放在最后面
print(df)
print(df.sort_values(by="two")) #by:按某列排序
print(df.sort_values(by="two",ascending=False)) #ascending意为上升,为False则为降序
print(df.sort_values(axis=1,by="b"))
print("*****sort_index*****")
dfNew = pd.DataFrame({"two":[1,2,3,4],"one":[4,5,6,7]},index=list("cdba"))
print(dfNew)
print(dfNew.sort_index())
print(dfNew.sort_index(ascending=False))
print(dfNew.sort_index(axis=1))
one two a 1.0 2 b 2.0 1 c 3.0 3 d NaN 4 *****mean***** one 2.0 two 2.5 dtype: float64 one 2.0 two 2.5 dtype: float64 one 2.0 two 2.5 dtype: float64 a 1.5 b 1.5 c 3.0 d 4.0 dtype: float64 *****sum***** one 6.0 two 10.0 dtype: float64 a 3.0 b 3.0 c 6.0 d 4.0 dtype: float64 *****sort_values***** one two a 1.0 2 b 2.0 1 c 3.0 3 d NaN 4 one two b 2.0 1 a 1.0 2 c 3.0 3 d NaN 4 one two d NaN 4 c 3.0 3 a 1.0 2 b 2.0 1 two one a 2 1.0 b 1 2.0 c 3 3.0 d 4 NaN *****sort_index***** two one c 1 4 d 2 5 b 3 6 a 4 7 two one a 4 7 b 3 6 c 1 4 d 2 5 two one d 2 5 c 1 4 b 3 6 a 4 7 one two c 4 1 d 5 2 b 6 3 a 7 4
demo10-时间对象
#demo10-时间对象
print("*****pd.to_datetime()*****")
print(pd.to_datetime(["2021-7-20","1996.10.04","1997-MAY-20"]))
print(type(pd.to_datetime(["2021-7-20","1996.10.04","1997-MAY-20"]))) #类型为DatetimeIndex通常用于做索引
print(type(pd.to_datetime(["2021-7-20","1996.10.04","1997-MAY-20"])[0]))
print("*****生成一定时间范围的时间对象*****")
print(pd.date_range("2010.1.1","2010.5.1")) #指定start与end
print(pd.date_range("2010.1.1",periods=30)) #period长度 freq默认是D天 W周
print(pd.date_range("2010.1.1",periods=30,freq="h")) #freq为小时
print(pd.date_range("2010.1.1",periods=30,freq="1h30min")) #freq为小时
print(pd.date_range("2022.9.1",periods=30,freq="B")) #B为工作日
print(type(pd.date_range("2022.9.1",periods=30,freq="B")))
print(type(pd.date_range("2022.9.1",periods=30,freq="B")[0])) #类型为时间戳
*****pd.to_datetime()***** DatetimeIndex(['2021-07-20', '1996-10-04', '1997-05-20'], dtype='datetime64[ns]', freq=None) <class 'pandas.core.indexes.datetimes.DatetimeIndex'> <class 'pandas._libs.tslibs.timestamps.Timestamp'> *****生成一定时间范围的时间对象***** DatetimeIndex(['2010-01-01', '2010-01-02', '2010-01-03', '2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07', '2010-01-08', '2010-01-09', '2010-01-10', ... '2010-04-22', '2010-04-23', '2010-04-24', '2010-04-25', '2010-04-26', '2010-04-27', '2010-04-28', '2010-04-29', '2010-04-30', '2010-05-01'], dtype='datetime64[ns]', length=121, freq='D') DatetimeIndex(['2010-01-01', '2010-01-02', '2010-01-03', '2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07', '2010-01-08', '2010-01-09', '2010-01-10', '2010-01-11', '2010-01-12', '2010-01-13', '2010-01-14', '2010-01-15', '2010-01-16', '2010-01-17', '2010-01-18', '2010-01-19', '2010-01-20', '2010-01-21', '2010-01-22', '2010-01-23', '2010-01-24', '2010-01-25', '2010-01-26', '2010-01-27', '2010-01-28', '2010-01-29', '2010-01-30'], dtype='datetime64[ns]', freq='D') DatetimeIndex(['2010-01-01 00:00:00', '2010-01-01 01:00:00', '2010-01-01 02:00:00', '2010-01-01 03:00:00', '2010-01-01 04:00:00', '2010-01-01 05:00:00', '2010-01-01 06:00:00', '2010-01-01 07:00:00', '2010-01-01 08:00:00', '2010-01-01 09:00:00', '2010-01-01 10:00:00', '2010-01-01 11:00:00', '2010-01-01 12:00:00', '2010-01-01 13:00:00', '2010-01-01 14:00:00', '2010-01-01 15:00:00', '2010-01-01 16:00:00', '2010-01-01 17:00:00', '2010-01-01 18:00:00', '2010-01-01 19:00:00', '2010-01-01 20:00:00', '2010-01-01 21:00:00', '2010-01-01 22:00:00', '2010-01-01 23:00:00', '2010-01-02 00:00:00', '2010-01-02 01:00:00', '2010-01-02 02:00:00', '2010-01-02 03:00:00', '2010-01-02 04:00:00', '2010-01-02 05:00:00'], dtype='datetime64[ns]', freq='H') DatetimeIndex(['2010-01-01 00:00:00', '2010-01-01 01:30:00', '2010-01-01 03:00:00', '2010-01-01 04:30:00', '2010-01-01 06:00:00', '2010-01-01 07:30:00', '2010-01-01 09:00:00', '2010-01-01 10:30:00', '2010-01-01 12:00:00', '2010-01-01 13:30:00', '2010-01-01 15:00:00', '2010-01-01 16:30:00', '2010-01-01 18:00:00', '2010-01-01 19:30:00', '2010-01-01 21:00:00', '2010-01-01 22:30:00', '2010-01-02 00:00:00', '2010-01-02 01:30:00', '2010-01-02 03:00:00', '2010-01-02 04:30:00', '2010-01-02 06:00:00', '2010-01-02 07:30:00', '2010-01-02 09:00:00', '2010-01-02 10:30:00', '2010-01-02 12:00:00', '2010-01-02 13:30:00', '2010-01-02 15:00:00', '2010-01-02 16:30:00', '2010-01-02 18:00:00', '2010-01-02 19:30:00'], dtype='datetime64[ns]', freq='90T') DatetimeIndex(['2022-09-01', '2022-09-02', '2022-09-05', '2022-09-06', '2022-09-07', '2022-09-08', '2022-09-09', '2022-09-12', '2022-09-13', '2022-09-14', '2022-09-15', '2022-09-16', '2022-09-19', '2022-09-20', '2022-09-21', '2022-09-22', '2022-09-23', '2022-09-26', '2022-09-27', '2022-09-28', '2022-09-29', '2022-09-30', '2022-10-03', '2022-10-04', '2022-10-05', '2022-10-06', '2022-10-07', '2022-10-10', '2022-10-11', '2022-10-12'], dtype='datetime64[ns]', freq='B') <class 'pandas.core.indexes.datetimes.DatetimeIndex'> <class 'pandas._libs.tslibs.timestamps.Timestamp'>
demo11-时间序列
#demo11-时间序列
print("*****时间对象索引*****")
dateIndex = pd.date_range("2022.9.1",periods=66)
sr = pd.Series(np.arange(66),index=dateIndex)
print(sr.index) #索引为时间对象
print(sr["2022-9-25":]) #从2022.9.25开始切片
print(sr["2022-10":]) #从2022-10开始
print(sr["2022-10"]) #只选择2022-10月
print("*****resample重采样*****")
print(sr.resample("w").sum())
print(sr.resample("w-mon").sum())
print(sr.resample("m").mean())
*****时间对象索引***** DatetimeIndex(['2022-09-01', '2022-09-02', '2022-09-03', '2022-09-04', '2022-09-05', '2022-09-06', '2022-09-07', '2022-09-08', '2022-09-09', '2022-09-10', '2022-09-11', '2022-09-12', '2022-09-13', '2022-09-14', '2022-09-15', '2022-09-16', '2022-09-17', '2022-09-18', '2022-09-19', '2022-09-20', '2022-09-21', '2022-09-22', '2022-09-23', '2022-09-24', '2022-09-25', '2022-09-26', '2022-09-27', '2022-09-28', '2022-09-29', '2022-09-30', '2022-10-01', '2022-10-02', '2022-10-03', '2022-10-04', '2022-10-05', '2022-10-06', '2022-10-07', '2022-10-08', '2022-10-09', '2022-10-10', '2022-10-11', '2022-10-12', '2022-10-13', '2022-10-14', '2022-10-15', '2022-10-16', '2022-10-17', '2022-10-18', '2022-10-19', '2022-10-20', '2022-10-21', '2022-10-22', '2022-10-23', '2022-10-24', '2022-10-25', '2022-10-26', '2022-10-27', '2022-10-28', '2022-10-29', '2022-10-30', '2022-10-31', '2022-11-01', '2022-11-02', '2022-11-03', '2022-11-04', '2022-11-05'], dtype='datetime64[ns]', freq='D') 2022-09-25 24 2022-09-26 25 2022-09-27 26 2022-09-28 27 2022-09-29 28 2022-09-30 29 2022-10-01 30 2022-10-02 31 2022-10-03 32 2022-10-04 33 2022-10-05 34 2022-10-06 35 2022-10-07 36 2022-10-08 37 2022-10-09 38 2022-10-10 39 2022-10-11 40 2022-10-12 41 2022-10-13 42 2022-10-14 43 2022-10-15 44 2022-10-16 45 2022-10-17 46 2022-10-18 47 2022-10-19 48 2022-10-20 49 2022-10-21 50 2022-10-22 51 2022-10-23 52 2022-10-24 53 2022-10-25 54 2022-10-26 55 2022-10-27 56 2022-10-28 57 2022-10-29 58 2022-10-30 59 2022-10-31 60 2022-11-01 61 2022-11-02 62 2022-11-03 63 2022-11-04 64 2022-11-05 65 Freq: D, dtype: int32 2022-10-01 30 2022-10-02 31 2022-10-03 32 2022-10-04 33 2022-10-05 34 2022-10-06 35 2022-10-07 36 2022-10-08 37 2022-10-09 38 2022-10-10 39 2022-10-11 40 2022-10-12 41 2022-10-13 42 2022-10-14 43 2022-10-15 44 2022-10-16 45 2022-10-17 46 2022-10-18 47 2022-10-19 48 2022-10-20 49 2022-10-21 50 2022-10-22 51 2022-10-23 52 2022-10-24 53 2022-10-25 54 2022-10-26 55 2022-10-27 56 2022-10-28 57 2022-10-29 58 2022-10-30 59 2022-10-31 60 2022-11-01 61 2022-11-02 62 2022-11-03 63 2022-11-04 64 2022-11-05 65 Freq: D, dtype: int32 2022-10-01 30 2022-10-02 31 2022-10-03 32 2022-10-04 33 2022-10-05 34 2022-10-06 35 2022-10-07 36 2022-10-08 37 2022-10-09 38 2022-10-10 39 2022-10-11 40 2022-10-12 41 2022-10-13 42 2022-10-14 43 2022-10-15 44 2022-10-16 45 2022-10-17 46 2022-10-18 47 2022-10-19 48 2022-10-20 49 2022-10-21 50 2022-10-22 51 2022-10-23 52 2022-10-24 53 2022-10-25 54 2022-10-26 55 2022-10-27 56 2022-10-28 57 2022-10-29 58 2022-10-30 59 2022-10-31 60 Freq: D, dtype: int32 *****resample重采样***** 2022-09-04 6 2022-09-11 49 2022-09-18 98 2022-09-25 147 2022-10-02 196 2022-10-09 245 2022-10-16 294 2022-10-23 343 2022-10-30 392 2022-11-06 375 Freq: W-SUN, dtype: int32 2022-09-05 10 2022-09-12 56 2022-09-19 105 2022-09-26 154 2022-10-03 203 2022-10-10 252 2022-10-17 301 2022-10-24 350 2022-10-31 399 2022-11-07 315 Freq: W-MON, dtype: int32 2022-09-30 14.5 2022-10-31 45.0 2022-11-30 63.0 Freq: M, dtype: float64
demo12-Pandas文件操作
#demo12-Pandas文件操作
#csv:分隔符为逗号
#xlsx分隔符为制表符
#read_csv参数:
#index_col 选定某列作为index->可以使用数字或者列名 如index_col=0 或者 index_col="date"
#parse_dates Ture则将所有可以解释为时间对象进行解释 / 也可以为列表,则将特定列解析为时间对象
#header 指定文件无列名-read_csv默认会把第一行解释为列名,因此若文件无列名,则需要使用header指明无列名字
#若header为None可以使用names参数指定列名
#skip_rows 跳过某些行/不常用
#na_values = [] 指定哪些字符串为Nan #因为数据的缺省值表示可能不一样,采用该参数统一为nan方便处理
#data.columns = list("abcdefg") 修改列名
#to_csv函数:
#sep:指定分隔符,默认是,
#na_rep:指定缺失值转换的字符串,默认是空字符串
#header:不输出列名
#index:不输出行索引
#columns:指定输出的列,传入列表
#to_json、to_excel、to_pickle.....等
data = pd.read_csv("601318.csv",index_col="date")
print(type(data.index))#此时index并非时间对象
data = pd.read_csv("601318.csv",index_col="date",parse_dates=True)
print(type(data.index))
data = pd.read_csv("601318.csv",index_col="date",parse_dates=["date"])
print(type(data.index))
data = pd.read_csv("601318.csv",index_col="date")
data.columns = list("abcdefg") #
print(data)
data = pd.read_csv("601318.csv",index_col="date",parse_dates=["date"])
print(data)
print(data.loc["2017-5","close"]) #important
data.to_csv("test.csv",columns=["close","open"],header=False,index=False)
<class 'pandas.core.indexes.base.Index'> <class 'pandas.core.indexes.datetimes.DatetimeIndex'> <class 'pandas.core.indexes.datetimes.DatetimeIndex'> a b c d e f g date 2007/3/1 0 21.878 20.473 22.302 20.040 1977633.51 601318 2007/3/2 1 20.565 20.307 20.758 20.075 425048.32 601318 2007/3/5 2 20.119 19.419 20.202 19.047 419196.74 601318 2007/3/6 3 19.253 19.800 20.128 19.143 297727.88 601318 2007/3/7 4 19.817 20.338 20.522 19.651 287463.78 601318 ... ... ... ... ... ... ... ... 2017/12/11 2558 71.200 73.250 73.310 70.820 1139927.00 601318 2017/12/12 2559 73.250 71.210 73.560 71.170 777900.00 601318 2017/12/13 2560 71.210 72.120 72.620 70.200 865117.00 601318 2017/12/14 2561 72.120 71.010 72.160 70.600 676186.00 601318 2017/12/15 2562 70.690 70.380 71.440 70.050 735547.00 601318 [2563 rows x 7 columns] Unnamed: 0 open close high low volume code date 2007-03-01 0 21.878 20.473 22.302 20.040 1977633.51 601318 2007-03-02 1 20.565 20.307 20.758 20.075 425048.32 601318 2007-03-05 2 20.119 19.419 20.202 19.047 419196.74 601318 2007-03-06 3 19.253 19.800 20.128 19.143 297727.88 601318 2007-03-07 4 19.817 20.338 20.522 19.651 287463.78 601318 ... ... ... ... ... ... ... ... 2017-12-11 2558 71.200 73.250 73.310 70.820 1139927.00 601318 2017-12-12 2559 73.250 71.210 73.560 71.170 777900.00 601318 2017-12-13 2560 71.210 72.120 72.620 70.200 865117.00 601318 2017-12-14 2561 72.120 71.010 72.160 70.600 676186.00 601318 2017-12-15 2562 70.690 70.380 71.440 70.050 735547.00 601318 [2563 rows x 7 columns] date 2017-05-02 37.167 2017-05-03 37.255 2017-05-04 37.079 2017-05-05 36.530 2017-05-08 37.049 2017-05-09 37.245 2017-05-10 39.059 2017-05-11 38.990 2017-05-12 40.147 2017-05-15 40.098 2017-05-16 40.285 2017-05-17 39.628 2017-05-18 39.824 2017-05-19 40.206 2017-05-22 42.000 2017-05-23 42.324 2017-05-24 42.186 2017-05-25 44.598 2017-05-26 44.294 2017-05-31 44.187 Name: close, dtype: float64