Python数据分析与机器学习-Pandas_1

import pandas
food_info = pandas.read_csv("food_info.csv")
print(type(food_info))
print (food_info.dtypes)

<class 'pandas.core.frame.DataFrame'>
NDB_No               int64
Shrt_Desc           object
Water_(g)          float64
Energ_Kcal           int64
Protein_(g)        float64
Lipid_Tot_(g)      float64
Ash_(g)            float64
Carbohydrt_(g)     float64
Fiber_TD_(g)       float64
Sugar_Tot_(g)      float64
Calcium_(mg)       float64
Iron_(mg)          float64
Magnesium_(mg)     float64
Phosphorus_(mg)    float64
Potassium_(mg)     float64
Sodium_(mg)        float64
Zinc_(mg)          float64
Copper_(mg)        float64
Manganese_(mg)     float64
Selenium_(mcg)     float64
Vit_C_(mg)         float64
Thiamin_(mg)       float64
Riboflavin_(mg)    float64
Niacin_(mg)        float64
Vit_B6_(mg)        float64
Vit_B12_(mcg)      float64
Vit_A_IU           float64
Vit_A_RAE          float64
Vit_E_(mg)         float64
Vit_D_mcg          float64
Vit_D_IU           float64
Vit_K_(mcg)        float64
FA_Sat_(g)         float64
FA_Mono_(g)        float64
FA_Poly_(g)        float64
Cholestrl_(mg)     float64
dtype: object

# first_rows = food_info.head()
# print(first_rows)
food_info.head(3)
food_info.tail(4)
print(food_info.columns)
print(food_info.shape)

Index(['NDB_No', 'Shrt_Desc', 'Water_(g)', 'Energ_Kcal', 'Protein_(g)',
       'Lipid_Tot_(g)', 'Ash_(g)', 'Carbohydrt_(g)', 'Fiber_TD_(g)',
       'Sugar_Tot_(g)', 'Calcium_(mg)', 'Iron_(mg)', 'Magnesium_(mg)',
       'Phosphorus_(mg)', 'Potassium_(mg)', 'Sodium_(mg)', 'Zinc_(mg)',
       'Copper_(mg)', 'Manganese_(mg)', 'Selenium_(mcg)', 'Vit_C_(mg)',
       'Thiamin_(mg)', 'Riboflavin_(mg)', 'Niacin_(mg)', 'Vit_B6_(mg)',
       'Vit_B12_(mcg)', 'Vit_A_IU', 'Vit_A_RAE', 'Vit_E_(mg)', 'Vit_D_mcg',
       'Vit_D_IU', 'Vit_K_(mcg)', 'FA_Sat_(g)', 'FA_Mono_(g)', 'FA_Poly_(g)',
       'Cholestrl_(mg)'],
      dtype='object')
(8618, 36)

# Returns a DataFrame containing the rows at indexes 3,4,5, and 6.
print(food_info.loc[3:6])
print("--------------------------------")
# Returns a DataFrame containing the rows at indexes 2, 5, and 10.
print(food_info.loc[[2,5,10]])

   NDB_No         Shrt_Desc  Water_(g)  Energ_Kcal  Protein_(g)  \
3    1004       CHEESE BLUE      42.41         353        21.40   
4    1005      CHEESE BRICK      41.11         371        23.24   
5    1006       CHEESE BRIE      48.42         334        20.75   
6    1007  CHEESE CAMEMBERT      51.80         300        19.80   

   Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  Fiber_TD_(g)  Sugar_Tot_(g)  ...  \
3          28.74     5.11            2.34           0.0           0.50  ...   
4          29.68     3.18            2.79           0.0           0.51  ...   
5          27.68     2.70            0.45           0.0           0.45  ...   
6          24.26     3.68            0.46           0.0           0.46  ...   

   Vit_A_IU  Vit_A_RAE  Vit_E_(mg)  Vit_D_mcg  Vit_D_IU  Vit_K_(mcg)  \
3     721.0      198.0        0.25        0.5      21.0          2.4   
4    1080.0      292.0        0.26        0.5      22.0          2.5   
5     592.0      174.0        0.24        0.5      20.0          2.3   
6     820.0      241.0        0.21        0.4      18.0          2.0   

   FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  Cholestrl_(mg)  
3      18.669        7.778        0.800            75.0  
4      18.764        8.598        0.784            94.0  
5      17.410        8.013        0.826           100.0  
6      15.259        7.023        0.724            72.0  

[4 rows x 36 columns]
--------------------------------
    NDB_No             Shrt_Desc  Water_(g)  Energ_Kcal  Protein_(g)  \
2     1003  BUTTER OIL ANHYDROUS       0.24         876         0.28   
5     1006           CHEESE BRIE      48.42         334        20.75   
10    1011          CHEESE COLBY      38.20         394        23.76   

    Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  Fiber_TD_(g)  Sugar_Tot_(g)  ...  \
2           99.48     0.00            0.00           0.0           0.00  ...   
5           27.68     2.70            0.45           0.0           0.45  ...   
10          32.11     3.36            2.57           0.0           0.52  ...   

    Vit_A_IU  Vit_A_RAE  Vit_E_(mg)  Vit_D_mcg  Vit_D_IU  Vit_K_(mcg)  \
2     3069.0      840.0        2.80        1.8      73.0          8.6   
5      592.0      174.0        0.24        0.5      20.0          2.3   
10     994.0      264.0        0.28        0.6      24.0          2.7   

    FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  Cholestrl_(mg)  
2       61.924       28.732        3.694           256.0  
5       17.410        8.013        0.826           100.0  
10      20.218        9.280        0.953            95.0  

[3 rows x 36 columns]

# Series object representing the "NDB_No" column.
ndb_col = food_info["NDB_No"]
# print(ndb_col)
# Alternatively, you can access a column by passing in a string variable.
col_name = "NDB_No"
ndb_col = food_info[col_name]
print(ndb_col)

0        1001
1        1002
2        1003
3        1004
4        1005
5        1006
6        1007
7        1008
8        1009
9        1010
10       1011
11       1012
12       1013
13       1014
14       1015
15       1016
16       1017
17       1018
18       1019
19       1020
20       1021
21       1022
22       1023
23       1024
24       1025
25       1026
26       1027
27       1028
28       1029
29       1030
        ...  
8588    43544
8589    43546
8590    43550
8591    43566
8592    43570
8593    43572
8594    43585
8595    43589
8596    43595
8597    43597
8598    43598
8599    44005
8600    44018
8601    44048
8602    44055
8603    44061
8604    44074
8605    44110
8606    44158
8607    44203
8608    44258
8609    44259
8610    44260
8611    48052
8612    80200
8613    83110
8614    90240
8615    90480
8616    90560
8617    93600
Name: NDB_No, Length: 8618, dtype: int64

columns = ["Zinc_(mg)","Copper_(mg)"]
zinc_copper = food_info[columns]
print(zinc_copper)

      Zinc_(mg)  Copper_(mg)
0          0.09        0.000
1          0.05        0.016
2          0.01        0.001
3          2.66        0.040
4          2.60        0.024
5          2.38        0.019
6          2.38        0.021
7          2.94        0.024
8          3.43        0.056
9          2.79        0.042
10         3.07        0.042
11         0.40        0.029
12         0.33        0.040
13         0.47        0.030
14         0.51        0.033
15         0.38        0.028
16         0.51        0.019
17         3.75        0.036
18         2.88        0.032
19         3.50        0.025
20         1.14        0.080
21         3.90        0.036
22         3.90        0.032
23         2.10        0.021
24         3.00        0.032
25         2.92        0.011
26         2.46        0.022
27         2.76        0.025
28         3.61        0.034
29         2.81        0.031
...         ...          ...
8588       3.30        0.377
8589       0.05        0.040
8590       0.05        0.030
8591       1.15        0.116
8592       5.03        0.200
8593       3.83        0.545
8594       0.08        0.035
8595       3.90        0.027
8596       4.10        0.100
8597       3.13        0.027
8598       0.13        0.000
8599       0.02        0.000
8600       0.09        0.037
8601       0.21        0.026
8602       2.77        0.571
8603       0.41        0.838
8604       0.05        0.028
8605       0.03        0.023
8606       0.10        0.112
8607       0.02        0.020
8608       1.49        0.854
8609       0.19        0.040
8610       0.10        0.038
8611       0.85        0.182
8612       1.00        0.250
8613       1.10        0.100
8614       1.55        0.033
8615       0.19        0.020
8616       1.00        0.400
8617       1.00        0.250

[8618 rows x 2 columns]

print(food_info.columns)
print(food_info.head(2))
col_names = food_info.columns.tolist()
gram_columns = []

for c in col_names:
    if c.endswith("(g)"):
        gram_columns.append(c)
gram_df = food_info[gram_columns]
print(gram_df.head(3))

Index(['NDB_No', 'Shrt_Desc', 'Water_(g)', 'Energ_Kcal', 'Protein_(g)',
       'Lipid_Tot_(g)', 'Ash_(g)', 'Carbohydrt_(g)', 'Fiber_TD_(g)',
       'Sugar_Tot_(g)', 'Calcium_(mg)', 'Iron_(mg)', 'Magnesium_(mg)',
       'Phosphorus_(mg)', 'Potassium_(mg)', 'Sodium_(mg)', 'Zinc_(mg)',
       'Copper_(mg)', 'Manganese_(mg)', 'Selenium_(mcg)', 'Vit_C_(mg)',
       'Thiamin_(mg)', 'Riboflavin_(mg)', 'Niacin_(mg)', 'Vit_B6_(mg)',
       'Vit_B12_(mcg)', 'Vit_A_IU', 'Vit_A_RAE', 'Vit_E_(mg)', 'Vit_D_mcg',
       'Vit_D_IU', 'Vit_K_(mcg)', 'FA_Sat_(g)', 'FA_Mono_(g)', 'FA_Poly_(g)',
       'Cholestrl_(mg)'],
      dtype='object')
   NDB_No                 Shrt_Desc  Water_(g)  Energ_Kcal  Protein_(g)  \
0    1001          BUTTER WITH SALT      15.87         717         0.85   
1    1002  BUTTER WHIPPED WITH SALT      15.87         717         0.85   

   Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  Fiber_TD_(g)  Sugar_Tot_(g)  ...  \
0          81.11     2.11            0.06           0.0           0.06  ...   
1          81.11     2.11            0.06           0.0           0.06  ...   

   Vit_A_IU  Vit_A_RAE  Vit_E_(mg)  Vit_D_mcg  Vit_D_IU  Vit_K_(mcg)  \
0    2499.0      684.0        2.32        1.5      60.0          7.0   
1    2499.0      684.0        2.32        1.5      60.0          7.0   

   FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  Cholestrl_(mg)  
0      51.368       21.021        3.043           215.0  
1      50.489       23.426        3.012           219.0  

[2 rows x 36 columns]
   Water_(g)  Protein_(g)  Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  \
0      15.87         0.85          81.11     2.11            0.06   
1      15.87         0.85          81.11     2.11            0.06   
2       0.24         0.28          99.48     0.00            0.00   

   Fiber_TD_(g)  Sugar_Tot_(g)  FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  
0           0.0           0.06      51.368       21.021        3.043  
1           0.0           0.06      50.489       23.426        3.012  
2           0.0           0.00      61.924       28.732        3.694

Python数据分析与机器学习-Pandas_1

猜你喜欢