The division method of the data set in pytorch and the cause of eError: take(): argument 'index' (position 1) must be Tensor, not numpy.ndarray

Method: When using the pytorch framework, it is inevitable to divide the data set into a training set and a verification set. Generally, the train_test_split method in sklearn.model_selection is used

The method is used as follows:

from sklearn.model_selection import train_test_split

import numpy as np

import torch

import torch.autograd import Variable

from torch.utils.data import DataLoader

traindata = np.load(train_path) # image_num * W * H

trainlabel = np.load(train_label_path)

train_data = traindata[:, np.newaxis, ...]

train_label_data = trainlabel[:, np.newaxis, ...]

x_tra, x_val, y_tra, y_val = train_test_split(train_data, train_label_data, test_size=0.1, random_state=0) # Use 9:1 for training set and validation set

x_tra = Variable(torch.from_numpy(x_tra))

x_between = x_between.float()

y_tra = Variable(torch.from_numpy(y_tra))

y_tra = y_tra.float()

x_val = Variable(torch.from_numpy(x_val))

x_val = x_val.float()

y_val = Variable(torch.from_numpy(y_val))

y_val = y_val.float()

# DataLoader for the training set

traindataset = torch.utils.data.TensorDataset(x_between, y_between)

trainloader = DataLoader(dataset=traindataset, num_workers=opt.threads, batch_size=8, shuffle=True)

# DataLoader for validation set

validataset = torch.utils.data.TensorDataset(x_val, y_val)

valiloader = DataLoader(dataset=validataset, num_workers=opt.threads, batch_size=opt.batchSize, shuffle=True)

Note: If it is used as follows, it will report eError: take(): argument 'index' (position 1) must be Tensor, not numpy.ndarray error

from sklearn.model_selection import train_test_split

import numpy as np

import torch

import torch.autograd import Variable

from torch.utils.data import DataLoader

traindata = np.load(train_path) # image_num * W * H

trainlabel = np.load(train_label_path)

train_data = traindata[:, np.newaxis, ...]

train_label_data = trainlabel[:, np.newaxis, ...]

x_train = Variable(torch.from_numpy(train_data))

x_train = x_train.float()

y_train = Variable(torch.from_numpy(train_label_data))

y_train = y_train.float()

# Divide the original training data set into a training set and a verification set, and the early stop mechanism can be used later

x_tra, x_val, y_tra, y_val = train_test_split(x_train, y_train, test_size=0.1) # use 9:1 for training set and validation set

Reason for error: The format of x_train and y_train accepted by the train_test_split method should be numpy.ndarray instead of Tensor, which needs attention.

method

import os,cv2,torch

import numpy as np

import torch.nn as nn

from torch.autograd import Variable

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix,precision_score,recall_score,f1_score

import seaborn as sbn

cfg=[32,'M',64,'M',128,64,128,'M',256,128,256,'M',512,256,512,256,512,'M',

1024,512,1024,512,1024]

PACH=r'cat&dog'

def readdata(pach):

img_lsit=[]

label_lsit=[]

for i,j in enumerate(os.listdir(pach)):

label_pach=os.path.join(pach,j)

for img_name in os.listdir(label_pach):

img_path=os.path.join(label_pach,img_name)

img=cv2.imread(img_path)/255

img=cv2.resize(img,(128,128))

img_lsit.append(img)

label_lsit.append(i)

return np.array(img_lsit),np.array(label_lsit)

def make_layer(cfg,in_channels=3,batch_norm=True):

layers=[]

in_channels=in_channels

flag=True

for i in cfg:

if i=='M':

layers.append(nn.MaxPool2d((2,2),2))

else:

layers.append(nn.Conv2d(in_channels=in_channels,

out_channels=i,

kernel_size=(1,3)[flag],

stride=1,

padding=(0,1)[flag],

bias=False))

if batch_norm:

layers.append(nn.BatchNorm2d(i))

layers.append(nn.LeakyReLU(0.1,inplace=True))

in_channels=i

flag=not flag

return nn.Sequential(*layers)

class DrakNet19(nn.Module):

def __init__(self,num_classes=1000,in_channels=3,batch_norm=True):

super(DrakNet19, self).__init__()

self.featurs=make_layer(cfg,in_channels,batch_norm)

self.classifier=nn.Sequential(

nn.Conv2d(1024,num_classes,1),

nn.AdaptiveAvgPool2d((1,1)),

nn.Softmax(dim=0)

)

self.load_weight()

def forward(self,inputs):

x=self.featurs(inputs)

x=self.classifier(x)

x=torch.squeeze(x)

return x

def load_weight(self):

weight_flie='darknet19-deepBakSu-e1b3ec1e.pth'

dic={}

for now_keys,values in zip(self.state_dict().keys(),torch.load(weight_flie).values()):

dic[now_keys]=values

self.load_state_dict(dic)

class CNN(nn.Module):

def __init__(self,num_class=1000):

super(CNN, self).__init__()

draknet_out_channel=DrakNet19().featurs[56].out_channels

self.conv=nn.Conv2d(draknet_out_channel,num_class,1)

self.avgpool=nn.AdaptiveAvgPool2d((1,1))

def forward(self,inputs):

x=self.conv(inputs)

x=self.avgpool(x)

x=torch.squeeze(x)

return x

if __name__ == '__main__':

img_lsit,label_list=readdata(PACH)

img_list=Variable(torch.Tensor(img_lsit))

img_list=torch.transpose(img_list,1,3)

label_list=Variable(torch.LongTensor(label_list))

trainx,testx,trainy,testy=train_test_split(img_list,label_list,train_size=0.8)

valx,testx,valy,testy=train_test_split(testx,testy,train_size=0.5)

net=DrakNet19().features

trainx=Variable(net(trainx))

valx=Variable(net(valx))

testx=Variable(net(testx))

model=CNN(num_class=2)

print(model)

loss=nn.CrossEntropyLoss()

optime=torch.optim.Adam(model.parameters(),lr=0.001)

x_tra, x_val, y_tra, y_val = train_test_split(x_train, y_train, test_size=0.2) # Use 8:2 for training set and validation set

Reason for error: The train_test_split method reports an error in 3.7python, because the scikit-learn version is too low, you can uninstall and install

pip install scikit-learn>=1.0.2 -i https://pypi.tuna.tsinghua.edu.cn/simple version

Guess you like

Origin blog.csdn.net/qq_44988877/article/details/129439847