unstack()可以把stack结果还原回来

import pandas as pd
# 读取college数据集,学校名作为行索引,,只选取本科生的列
usecol_func = lambda x: 'UGDS_' in x or x == 'INSTNM'
college = pd.read_csv('data/college.csv', index_col='INSTNM', usecols=usecol_func)
#用read_csv()方法只选取特定的列,指定uscols参数,index_col列索引
college.head()
  UGDS_WHITE UGDS_BLACK UGDS_HISP UGDS_ASIAN UGDS_AIAN UGDS_NHPI UGDS_2MOR UGDS_NRA UGDS_UNKN
INSTNM                  
Alabama A & M University 0.0333 0.9353 0.0055 0.0019 0.0024 0.0019 0.0000 0.0059 0.0138
University of Alabama at Birmingham 0.5922 0.2600 0.0283 0.0518 0.0022 0.0007 0.0368 0.0179 0.0100
Amridge University 0.2990 0.4192 0.0069 0.0034 0.0000 0.0000 0.0000 0.0000 0.2715
University of Alabama in Huntsville 0.6988 0.1255 0.0382 0.0376 0.0143 0.0002 0.0172 0.0332 0.0350
Alabama State University 0.0158 0.9208 0.0121 0.0019 0.0010 0.0006 0.0098 0.0243 0.0137
# 用stack方法,将所有水平列名,转化为垂直的行索引
college_stacked = college.stack()
college_stacked.head(18)
# stack 的作用就是把列索引的层压入行索引

INSTNM                                         
Alabama A & M University             UGDS_WHITE    0.0333
                                     UGDS_BLACK    0.9353
                                     UGDS_HISP     0.0055
                                     UGDS_ASIAN    0.0019
                                     UGDS_AIAN     0.0024
                                     UGDS_NHPI     0.0019
                                     UGDS_2MOR     0.0000
                                     UGDS_NRA      0.0059
                                     UGDS_UNKN     0.0138
University of Alabama at Birmingham  UGDS_WHITE    0.5922
                                     UGDS_BLACK    0.2600
                                     UGDS_HISP     0.0283
                                     UGDS_ASIAN    0.0518
                                     UGDS_AIAN     0.0022
                                     UGDS_NHPI     0.0007
                                     UGDS_2MOR     0.0368
                                     UGDS_NRA      0.0179
                                     UGDS_UNKN     0.0100
dtype: float64
# unstack方法可以将其还原,# unstack 的作用是把行索引转为列索引
college_stacked.unstack().head()
  UGDS_WHITE UGDS_BLACK UGDS_HISP UGDS_ASIAN UGDS_AIAN UGDS_NHPI UGDS_2MOR UGDS_NRA UGDS_UNKN
INSTNM                  
Alabama A & M University 0.0333 0.9353 0.0055 0.0019 0.0024 0.0019 0.0000 0.0059 0.0138
University of Alabama at Birmingham 0.5922 0.2600 0.0283 0.0518 0.0022 0.0007 0.0368 0.0179 0.0100
Amridge University 0.2990 0.4192 0.0069 0.0034 0.0000 0.0000 0.0000 0.0000 0.2715
University of Alabama in Huntsville 0.6988 0.1255 0.0382 0.0376 0.0143 0.0002 0.0172 0.0332 0.0350
Alabama State University 0.0158 0.9208 0.0121 0.0019 0.0010 0.0006 0.0098 0.0243 0.013
# 另一种方式是先用melt,再用pivot。先加载数据,不指定行索引名
#可以使用Pandas melt()和pivot_table()完成多列到多行的转行
#melt 是溶解/分解的意思, 即拆分数据。
#df.pivot() 将长数据集转换成宽数据集,df.melt() 则是将宽数据集变成长数据集
college2 = pd.read_csv('data/college.csv', usecols=usecol_func)
college2.head()
  INSTNM UGDS_WHITE UGDS_BLACK UGDS_HISP UGDS_ASIAN UGDS_AIAN UGDS_NHPI UGDS_2MOR UGDS_NRA UGDS_UNKN
0 Alabama A & M University 0.0333 0.9353 0.0055 0.0019 0.0024 0.0019 0.0000 0.0059 0.0138
1 University of Alabama at Birmingham 0.5922 0.2600 0.0283 0.0518 0.0022 0.0007 0.0368 0.0179 0.0100
2 Amridge University 0.2990 0.4192 0.0069 0.0034 0.0000 0.0000 0.0000 0.0000 0.2715
3 University of Alabama in Huntsville 0.6988 0.1255 0.0382 0.0376 0.0143 0.0002 0.0172 0.0332 0.0350
4 Alabama State University 0.0158 0.9208 0.0121 0.0019 0.0010 0.0006 0.0098 0.0243 0.0137
# 使用melt,将所有race列变为一列
college_melted = college2.melt(id_vars='INSTNM', var_name='Race',value_name='Percentage')
college_melted.head()
  INSTNM Race Percentage
0 Alabama A & M University UGDS_WHITE 0.0333
1 University of Alabama at Birmingham UGDS_WHITE 0.5922
2 Amridge University UGDS_WHITE 0.2990
3 University of Alabama in Huntsville UGDS_WHITE 0.6988
4 Alabama State University UGDS_WHITE 0.0158
# 用pivot还原
melted_inv = college_melted.pivot(index='INSTNM',columns='Race',values='Percentage')
melted_inv.head()
Race UGDS_2MOR UGDS_AIAN UGDS_ASIAN UGDS_BLACK UGDS_HISP UGDS_NHPI UGDS_NRA UGDS_UNKN UGDS_WHITE
INSTNM                  
A & W Healthcare Educators 0.0000 0.0 0.0000 0.9750 0.0250 0.0 0.0000 0.0000 0.0000
A T Still University of Health Sciences NaN NaN NaN NaN NaN NaN NaN NaN NaN
ABC Beauty Academy 0.0000 0.0 0.9333 0.0333 0.0333 0.0 0.0000 0.0000 0.0000
ABC Beauty College Inc 0.0000 0.0 0.0000 0.6579 0.0526 0.0 0.0000 0.0000 0.2895
AI Miami International University of Art and Design 0.0018 0.0 0.0018 0.0198 0.4773 0.0 0.0025 0.4644 0.0324

数据转置

# 使用最外层的行索引做unstack
college.stack().unstack(0)
INSTNM Alabama A & M University University of Alabama at Birmingham Amridge University University of Alabama in Huntsville Alabama State University The University of Alabama Central Alabama Community College Athens State University Auburn University at Montgomery Auburn University ... MCI Institute of Technology-Boca Raton West Coast University-Miami National American University-Houston Aparicio-Levy Technical College Fred D. Learey Technical College Hollywood Institute of Beauty Careers-West Palm Beach Hollywood Institute of Beauty Careers-Casselberry Coachella Valley Beauty College-Beaumont Dewey University-Mayaguez Coastal Pines Technical College
UGDS_WHITE 0.0333 0.5922 0.2990 0.6988 0.0158 0.7825 0.7255 0.7823 0.5328 0.8507 ... 0.0199 0.1522 0.1858 0.2431 0.3731 0.2182 0.1200 0.3284 0.0 0.6762
UGDS_BLACK 0.9353 0.2600 0.4192 0.1255 0.9208 0.1119 0.2613 0.1200 0.3376 0.0704 ... 0.2815 0.1739 0.6443 0.1215 0.1388 0.4182 0.3333 0.1045 0.0 0.2508
UGDS_HISP 0.0055 0.0283 0.0069 0.0382 0.0121 0.0348 0.0044 0.0191 0.0074 0.0248 ... 0.6854 0.6087 0.0672 0.6243 0.3080 0.2364 0.4400 0.4925 1.0 0.0359
UGDS_ASIAN 0.0019 0.0518 0.0034 0.0376 0.0019 0.0106 0.0025 0.0053 0.0221 0.0227 ... 0.0132 0.0217 0.0079 0.0055 0.0000 0.0182 0.0000 0.0149 0.0 0.0045
UGDS_AIAN 0.0024 0.0022 0.0000 0.0143 0.0010 0.0038 0.0044 0.0157 0.0044 0.0074 ... 0.0000 0.0000 0.0079 0.0055 0.0000 0.0000 0.0000 0.0299 0.0 0.0034
UGDS_NHPI 0.0019 0.0007 0.0000 0.0002 0.0006 0.0009 0.0000 0.0010 0.0016 0.0000 ... 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0149 0.0 0.0017
UGDS_2MOR 0.0000 0.0368 0.0000 0.0172 0.0098 0.0261 0.0000 0.0174 0.0297 0.0000 ... 0.0000 0.0435 0.0751 0.0000 0.0022 0.0000 0.0400 0.0149 0.0 0.0191
UGDS_NRA 0.0059 0.0179 0.0000 0.0332 0.0243 0.0268 0.0000 0.0057 0.0397 0.0100 ... 0.0000 0.0000 0.0000 0.0000 0.0000 0.0182 0.0000 0.0000 0.0 0.0028
UGDS_UNKN 0.0138 0.0100 0.2715 0.0350 0.0137 0.0026 0.0019 0.0334 0.0246 0.0140 ... 0.0000 0.0000 0.0119 0.0000 0.1779 0.0909 0.0667 0.0000 0.0 0.0056

9 rows × 6874 columns

# 转置DataFrame更简单的方法是transpose()或T
#transpose()简单来说,就相当于数学中的转置,在矩阵中,转置就是把行与列相互调换位置;
college.T
INSTNM Alabama A & M University University of Alabama at Birmingham Amridge University University of Alabama in Huntsville Alabama State University The University of Alabama Central Alabama Community College Athens State University Auburn University at Montgomery Auburn University ... Strayer University-North Dallas Strayer University-San Antonio Strayer University-Stafford WestMed College - Merced Vantage College SAE Institute of Technology San Francisco Rasmussen College - Overland Park National Personal Training Institute of Cleveland Bay Area Medical Academy - San Jose Satellite Location Excel Learning Center-San Antonio South
UGDS_WHITE 0.0333 0.5922 0.2990 0.6988 0.0158 0.7825 0.7255 0.7823 0.5328 0.8507 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
UGDS_BLACK 0.9353 0.2600 0.4192 0.1255 0.9208 0.1119 0.2613 0.1200 0.3376 0.0704 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
UGDS_HISP 0.0055 0.0283 0.0069 0.0382 0.0121 0.0348 0.0044 0.0191 0.0074 0.0248 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
UGDS_ASIAN 0.0019 0.0518 0.0034 0.0376 0.0019 0.0106 0.0025 0.0053 0.0221 0.0227 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
UGDS_AIAN 0.0024 0.0022 0.0000 0.0143 0.0010 0.0038 0.0044 0.0157 0.0044 0.0074 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
UGDS_NHPI 0.0019 0.0007 0.0000 0.0002 0.0006 0.0009 0.0000 0.0010 0.0016 0.0000 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
UGDS_2MOR 0.0000 0.0368 0.0000 0.0172 0.0098 0.0261 0.0000 0.0174 0.0297 0.0000 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
UGDS_NRA 0.0059 0.0179 0.0000 0.0332 0.0243 0.0268 0.0000 0.0057 0.0397 0.0100 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
UGDS_UNKN 0.0138 0.0100 0.2715 0.0350 0.0137 0.0026 0.0019 0.0334 0.0246 0.0140 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

9 rows × 7535 columns


wide_to_long同时stack多列

:https://blog.csdn.net/weixin_48135624/article/details/114156665?spm=1001.2014.3001.5501

猜你喜欢

转载自blog.csdn.net/weixin_48135624/article/details/114156773