one-hot编码方式的python实现

one-hot编码方式的实现

什么是One-Hot编码?

One-Hot编码,又称为一位有效编码,主要是采用N位状态寄存器来对N个状态进行编码,每个状态都由他独立的寄存器位,并且在任意时候只有一位有效。

One-Hot编码是分类变量作为二进制向量的表示。这首先要求将分类值映射到整数值。然后,每个整数值被表示为二进制向量,除了整数的索引之外,它都是零值,它被标记为1。

我们可以使用以下三种方式来实现one-hot:

1python代码编写

// An highlighted block
from numpy import argmax
# define input string
data = 'hello world'
print(data)
# define universe of possible input values
alphabet = 'abcdefghijklmnopqrstuvwxyz '
# define a mapping of chars to integers
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))
# integer encode input data
integer_encoded = [char_to_int[char] for char in data]
print(integer_encoded)
# one hot encode
onehot_encoded = list()
for value in integer_encoded:
       letter = [0 for _ in range(len(alphabet))]
       letter[value] = 1
       onehot_encoded.append(letter)
print(onehot_encoded)
# invert encoding
inverted = int_to_char[argmax(onehot_encoded[0])]
print(inverted)

2基于scikit-learn

// An highlighted block
from numpy import array
from numpy import argmax
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
# define example
data = ['cold', 'cold', 'warm', 'cold', 'hot', 'hot', 'warm', 'cold', 'warm', 'hot']
values = array(data)
print(values)
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
print(integer_encoded)
# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
print(onehot_encoded)
# invert first example
inverted = label_encoder.inverse_transform([argmax(onehot_encoded[0, :])])
print(inverted)

3基于keras

// An highlighted block
from numpy import array
from numpy import argmax
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
# define example
##对字符串
data = ['cold', 'cold', 'warm', 'cold', 'hot', 'hot', 'warm', 'cold', 'warm', 'hot']
values = array(data)
print(values)
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
print(integer_encoded)
 
##对数值
#data=[1, 3, 2, 0, 3, 2, 2, 1, 0, 1]
#data=array(data)
#print(data)
 
# one hot encode
encoded = to_categorical(integer_encoded)
print(encoded)
# invert encoding
inverted = argmax(encoded[0])
print(inverted)

将整个字符串转为one-hot

// An highlighted block
from numpy import array
from numpy import argmax
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
##对字符串
dataString="一乙二十丁厂七卜人入八九几儿了力乃刀又三於干亏士工土才寸下大丈与万上小口巾山千乞川亿个勺久凡及"
data=list(dataString)
values = array(data)
print(values)
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
print(integer_encoded)
##对数值
# one hot encode
encoded = to_categorical(integer_encoded)
print(encoded)
# invert encoding
inverted = argmax(encoded[0])
print(inverted)

猜你喜欢

转载自blog.csdn.net/qq_28023365/article/details/86031314