# 读取employee数据集,求出每个种族的平均工资
employee = pd.read_csv('data/employee.csv')
employee.groupby('RACE')['BASE_SALARY'].mean().astype(int)
RACE
American Indian or Alaskan Native 60272
Asian/Pacific Islander 61660
Black or African American 50137
Hispanic/Latino 52345
Others 51278
White 64419
Name: BASE_SALARY, dtype: int32
# 对种族和性别分组,求平均工资
agg = employee.groupby(['RACE', 'GENDER'])['BASE_SALARY'].mean().astype(int)
agg
'''
RACE GENDER
American Indian or Alaskan Native Female 60238
Male 60305
Asian/Pacific Islander Female 63226
Male 61033
Black or African American Female 48915
Male 51082
Hispanic/Latino Female 46503
Male 54782
Others Female 63785
Male 38771
White Female 66793
Male 63940
Name: BASE_SALARY, dtype: int32
'''
#stack: 将数据从”表格结构“变成”花括号结构“,即将其列索引变成行索引。
#unstack,数据从”花括号结构“变成”表格结构“,将数据的行索引转换为列索引
agg.unstack('GENDER')
GENDER |
Female |
Male |
RACE |
|
|
American Indian or Alaskan Native |
60238 |
60305 |
Asian/Pacific Islander |
63226 |
61033 |
Black or African American |
48915 |
51082 |
Hispanic/Latino |
46503 |
54782 |
Others |
63785 |
38771 |
White |
66793 |
63940 |
# 对索引层RACE做unstack
agg.unstack('RACE')
RACE |
American Indian or Alaskan Native |
Asian/Pacific Islander |
Black or African American |
Hispanic/Latino |
Others |
White |
GENDER |
|
|
|
|
|
|
Female |
60238 |
63226 |
48915 |
46503 |
63785 |
66793 |
Male |
60305 |
61033 |
51082 |
54782 |
38771 |
63940 |
#unstack处理multiIndex
# 按RACE和GENDER分组,求工资的平均值、最大值和最小值
agg2 = employee.groupby(['RACE', 'GENDER'])['BASE_SALARY'].agg(['mean', 'max', 'min']).astype(int)
agg2
|
|
mean |
max |
min |
RACE |
GENDER |
|
|
|
American Indian or Alaskan Native |
Female |
60238 |
98536 |
26125 |
Male |
60305 |
81239 |
26125 |
Asian/Pacific Islander |
Female |
63226 |
130416 |
26125 |
Male |
61033 |
163228 |
27914 |
Black or African American |
Female |
48915 |
150416 |
24960 |
Male |
51082 |
275000 |
26125 |
Hispanic/Latino |
Female |
46503 |
126115 |
26125 |
Male |
54782 |
165216 |
26104 |
Others |
Female |
63785 |
63785 |
63785 |
Male |
38771 |
38771 |
38771 |
White |
Female |
66793 |
178331 |
27955 |
Male |
63940 |
210588 |
26125 |
# 此时unstack('GENDER')会生成多级列索引,可以用stack和unstack调整结构
agg2.unstack('GENDER')
|
mean |
max |
min |
GENDER |
Female |
Male |
Female |
Male |
Female |
Male |
RACE |
|
|
|
|
|
|
American Indian or Alaskan Native |
60238 |
60305 |
98536 |
81239 |
26125 |
26125 |
Asian/Pacific Islander |
63226 |
61033 |
130416 |
163228 |
26125 |
27914 |
Black or African American |
48915 |
51082 |
150416 |
275000 |
24960 |
26125 |
Hispanic/Latino |
46503 |
54782 |
126115 |
165216 |
26125 |
26104 |
Others |
63785 |
38771 |
63785 |
38771 |
63785 |
38771 |
White |
66793 |
63940 |
178331 |
210588 |
27955 |
26125 |