数据集: link
import warnings
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")
def scan_read_data():
from glob import glob
import torchaudio.transforms as transforms
import torch
t0 = glob("./trojan0/*")
t1 = glob("./trojan1/*")
t2 = glob("./trojan2/*")
t3 = glob("./trojan3/*")
t4 = glob("./trojan4/*")
np.random.shuffle(t0)
np.random.shuffle(t1)
np.random.shuffle(t2)
np.random.shuffle(t3)
np.random.shuffle(t4)
t0 = t0[:10]
t1 = t1[:10]
t2 = t2[:10]
t3 = t3[:10]
t4 = t4[:10]
data_set = []
data_set_label = []
for i, paths in enumerate([t0, t1, t2, t3, t4]):
for path in paths:
data = pd.read_csv(path)
data = data.values[:(data.shape[0] // 1500) * 1500, :-1].reshape([-1, 1500, 3])
data0 = transforms.Spectrogram().forward(torch.Tensor(data[:, :, 0]))
data1 = transforms.Spectrogram().forward(torch.Tensor(data[:, :, 1]))
data2 = transforms.Spectrogram().forward(torch.Tensor(data[:, :, 2]))
data = torch.concat([data0, data1, data2], 1).numpy().reshape([data0.shape[0], -1]).tolist()
data_set += data
data_set_label += [i] * len(data)
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
feature_train, feature_test, target_train, target_test = train_test_split(data_set, data_set_label,
test_size=0.2, random_state=0)
clf = RandomForestClassifier(n_estimators=100, random_state=33, n_jobs=-1)
# clf = RandomForestClassifier()
clf.fit(feature_train, target_train)
predict_results = clf.predict(feature_test)
print(accuracy_score(predict_results, target_test))
conf_mat = confusion_matrix(target_test, predict_results)
print(conf_mat)
print(classification_report(target_test, predict_results))
def scan_read_data1():
from glob import glob
import torchaudio.transforms as transforms
import torch
t0 = glob("./trojan0/*")
t1 = glob("./trojan1/*")
t2 = glob("./trojan2/*")
t3 = glob("./trojan3/*")
t4 = glob("./trojan4/*")
np.random.shuffle(t0)
np.random.shuffle(t1)
np.random.shuffle(t2)
np.random.shuffle(t3)
np.random.shuffle(t4)
t0 = t0[:10]
t1 = t1[:10]
t2 = t2[:10]
t3 = t3[:10]
t4 = t4[:10]
data_set = []
data_set_label = []
for i, paths in enumerate([t0, t1, t2, t3, t4]):
for path in paths:
data = pd.read_csv(path)
data = data.values[:(data.shape[0] // 1500) * 1500, :-1].reshape([-1, 1500, 3])
data0 = transforms.Spectrogram().forward(torch.Tensor(data[:, :, 0]))
data1 = transforms.Spectrogram().forward(torch.Tensor(data[:, :, 1]))
data2 = transforms.Spectrogram().forward(torch.Tensor(data[:, :, 2]))
# data0 = 10 * torch.log10(data0**2 + 1e-9)
# data1 = 10 * torch.log10(data1**2 + 1e-9)
# data2 = 10 * torch.log10(data2**2 + 1e-9)
data = torch.concat([data0, data1, data2], 1).numpy().reshape([data0.shape[0], -1]).tolist()
data_set += data
data_set_label += [i] * len(data)
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
# 将数据集拆分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(data_set, data_set_label, test_size=0.2, random_state=42)
# 初始化XGBoost分类器
xgb_clf = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', num_class=5)
# 训练模型
xgb_clf.fit(X_train, y_train)
# 对测试集进行预测
y_pred = xgb_clf.predict(X_test)
# 评估模型
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {
accuracy:.2f}")
# 打印更详细的分类报告
print(classification_report(y_test, y_pred))
if __name__ == '__main__':
scan_read_data()
scan_read_data1()