预处理5

###王博士（2）
import matplotlib.pyplot as plt
import numpy.linalg as LA
import scipy.io as scio
import numpy as np
import xlrd
import os,time,sys
import warnings
warnings.filterwarnings('ignore')
from feature_extraction import get_xlsxlist,print_progress,sample_num,feature_extr
def get_xlsxlist(xlsx_path, mode):
    return [os.path.join(xlsx_path, f) for f in os.listdir(xlsx_path) if f.endswith(mode)]
xlls = get_xlsxlist('./拖网/', 'xlsx') # gill net
#file_path = './GoogleDrive/My Drive/Colab Notebooks/VMS'
NAME = xlls[:]
Name=NAME
L = []
for name in Name:
    excel_trawl = xlrd.open_workbook(name)
    sheet = excel_trawl.sheet_by_index(0)
    x = sheet.col_values(1)[1:]
    L.append(len(x))    
S = np.array(L, dtype=np.float32) 
num_bin = 100
max_speed = 25
len(Name)
start_time = time.time()
gill_net_sample, speed_feature_g = feature_extr(Name)
end_time = time.time()
def feature_extr(NAME_list, feature_dim=1000, feature_speed=100, max_speed=25):
    print('Start to extract the features:')
    init_m = np.zeros(feature_dim) # 轨迹图特征初始化
    init_n = np.zeros(feature_speed) # 速占比初始化
    # inv = 10 #@param {type:"integer"}
    for num, name in enumerate(Name):
        max_num = len(Name)
        print_progress(num + 1, max_num)
        excel_trawl = xlrd.open_workbook(name)
        sheet = excel_trawl.sheet_by_index(0)
        x = sheet.col_values(1)[1:]
        y = sheet.col_values(2)[1:]
        data_position = np.vstack([np.array(x).reshape(1, -1), np.array(y).reshape(1, -1)])
        data_cov = np.cov(data_position)#协方差
        w, v = LA.eig(data_cov)#特征向量
        data_pca = np.dot(v[:, 0].T, data_position).flatten()#内积
    #     data_pca = np.dot(v[:, 1].T, data_position).flatten()
        sample_data = sample_num(data_pca, 1000)
        init_m = np.vstack([init_m, sample_data]) # pca特征采样 垂直堆叠
        z = sheet.col_values(3)[1:]
        z_lim = [i for i in z if i <= max_speed]      
        fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 6))
        n, bins, patches = plt.hist(z_lim, 100, normed=True, rwidth=0.8)
        n, bins = np.histogram(z_lim, num_bin, normed=True)
        init_n = np.vstack([init_n, n])  
    #@markdown - **提取的刺网样本1000维特征**
    gill_net_sample = init_m[1:]    
    #@markdown - **速占比归一化特征提取**
    speed_feature = init_n[1:]
    speed_feature_g = speed_feature / speed_feature.max()  
    print('Feature extraction completed.')
    return (gill_net_sample, speed_feature_g)
金戈_旭日东升
发布了8 篇原创文章 · 获赞 0 · 访问量 93
私信关注
猜你喜欢