为了更容易reshaping,重新命名索引层

import pandas as pd
# 读取college数据集,分组后,统计本科生的SAT数学成绩信息
college = pd.read_csv('data/college.csv')
cg = college.groupby(['STABBR', 'RELAFFIL'])['UGDS', 'SATMTMID'] \
.agg(['count', 'min', 'max']).head(6)
cg
    UGDS SATMTMID
    count min max count min max
STABBR RELAFFIL            
AK 0 7 109.0 12865.0 0 NaN NaN
1 3 27.0 275.0 1 503.0 503.0
AL 0 71 12.0 29851.0 13 420.0 590.0
1 18 13.0 3033.0 8 400.0 560.0
AR 0 68 18.0 21405.0 9 427.0 565.0
1 14 20.0 4485.0 7 495.0 600.0
# 行索引的两级都有名字,而列索引没有名字。用rename_axis给列索引的两级命名
cg = cg.rename_axis(['AGG_COLS', 'AGG_FUNCS'], axis='columns')
cg
  AGG_COLS UGDS SATMTMID
  AGG_FUNCS count min max count min max
STABBR RELAFFIL            
AK 0 7 109.0 12865.0 0 NaN NaN
1 3 27.0 275.0 1 503.0 503.0
AL 0 71 12.0 29851.0 13 420.0 590.0
1 18 13.0 3033.0 8 400.0 560.0
AR 0 68 18.0 21405.0 9 427.0 565.0
1 14 20.0 4485.0 7 495.0 600.0
# 将AGG_FUNCS列移到行索引
cg.stack('AGG_FUNCS').head()
    AGG_COLS UGDS SATMTMID
STABBR RELAFFIL AGG_FUNCS    
AK 0 count 7.0 0.0
min 109.0 NaN
max 12865.0 NaN
1 count 3.0 1.0
min 27.0 503.0
# stack默认是将列放到行索引的最内层,可以使用swaplevel改变层级
cg.stack('AGG_FUNCS').swaplevel('AGG_FUNCS', 'STABBR', axis='index').head()
    AGG_COLS UGDS SATMTMID
AGG_FUNCS RELAFFIL STABBR    
count 0 AK 7.0 0.0
min 0 AK 109.0 NaN
max 0 AK 12865.0 NaN
count 1 AK 3.0 1.0
min 1 AK 27.0 503.0
# 在此前的基础上再做sort_index #.swaplevel 重排层次化索引分级顺序
cg.stack('AGG_FUNCS').swaplevel('AGG_FUNCS', 'STABBR', axis='index') \
          .sort_index(level='RELAFFIL', axis='index') \
          .sort_index(level='AGG_COLS', axis='columns').head(6)
    AGG_COLS SATMTMID UGDS
AGG_FUNCS RELAFFIL STABBR    
count 0 AK 0.0 7.0
AL 13.0 71.0
AR 9.0 68.0
max 0 AK NaN 12865.0
AL 590.0 29851.0
AR 565.0 21405.0
# 对一些列做stack,对其它列做unstack
cg.stack('AGG_FUNCS').unstack(['RELAFFIL', 'STABBR'])
AGG_COLS UGDS SATMTMID
RELAFFIL 0 1 0 1 0 1 0 1 0 1 0 1
STABBR AK AK AL AL AR AR AK AK AL AL AR AR
AGG_FUNCS                        
count 7.0 3.0 71.0 18.0 68.0 14.0 0.0 1.0 13.0 8.0 9.0 7.0
min 109.0 27.0 12.0 13.0 18.0 20.0 NaN 503.0 420.0 400.0 427.0 495.0
max 12865.0 275.0 29851.0 3033.0 21405.0 4485.0 NaN 503.0 590.0 560.0 565.0 600.0
# 对所有列做stack,会返回一个Series
cg.stack(['AGG_FUNCS', 'AGG_COLS']).head(12)
'''
STABBR  RELAFFIL  AGG_FUNCS  AGG_COLS
AK      0         count      UGDS            7.0
                             SATMTMID        0.0
                  min        UGDS          109.0
                  max        UGDS        12865.0
        1         count      UGDS            3.0
                             SATMTMID        1.0
                  min        UGDS           27.0
                             SATMTMID      503.0
                  max        UGDS          275.0
                             SATMTMID      503.0
AL      0         count      UGDS           71.0
                             SATMTMID       13.0
dtype: float64

'''
# 删除行和列索引所有层级的名称
cg.rename_axis([None, None], axis='index').rename_axis([None, None],axis='columns')
    UGDS SATMTMID
    count min max count min max
AK 0 7 109.0 12865.0 0 NaN NaN
1 3 27.0 275.0 1 503.0 503.0
AL 0 71 12.0 29851.0 13 420.0 590.0
1 18 13.0 3033.0 8 400.0 560.0
AR 0 68 18.0 21405.0 9 427.0 565.0
1 14 20.0 4485.0 7 495.0 600.0

猜你喜欢

转载自blog.csdn.net/weixin_48135624/article/details/114157189