批量进行One-hot

封装的代码如下,文件名为my_one_hot_encoder.py

import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

class One_hot_encoder:
    def __init__(self,file_name,column_name_list):
        self.file_name = file_name
        self.column_name_list = column_name_list
    
    def multi_column_encoder(self):
        df = pd.read_excel(self.file_name)
        Enc_ohe, Enc_label = OneHotEncoder(), LabelEncoder()
        for column_name in self.column_name_list:
            df["Dummies"] = Enc_label.fit_transform(df[column_name])
            df_dummies = pd.DataFrame(Enc_ohe.fit_transform(df[["Dummies"]]).todense(), columns = Enc_label.classes_)
            df_dummies.rename(columns=lambda x: column_name + "_" + x, inplace=True)
            df = pd.concat([df, df_dummies], axis=1) 
        df.drop(["Dummies"], axis=1, inplace=True)
        df.drop(self.column_name_list, axis=1, inplace=True) 
        return df

猜你喜欢

转载自my.oschina.net/kyo4321/blog/1823498