1 导入库
import torch
import torchvision.transforms as transforms
import os
import numpy as np
from skimage import io, transform
from torch.utils.data import Dataset,DataLoader
2 数据增强【调用numpy的函数,数据类型是np.ndarray】
1)水平随机翻转
class RandomHorizontalFlip():
"""
Args: p (float): probability of the image being flipped. Default value is 0.5
"""
def __init__(self, p=0.5):
self.p = p
def __call__(self, image):
"""
input: image (array): Image to be flipped.
Returns: image(array): Randomly flipped image.
"""
if torch.rand(1) < self.p:
return np.fliplr(image)
return image
2)垂直随机翻转
class RandomVerticleFlip():
"""
Args: p (float): probability of the image being flipped. Default value is 0.5
"""
def __init__(self, p=0.5):
self.p = p
def __call__(self, image):
"""
input: image (array): Image to be flipped.
Returns: image(array): Randomly flipped image.
"""
if torch.rand(1) < self.p:
return np.flipup(image)
return image载入数据
3)改变尺寸,缩放
class Rescale(object):#1)
"use this class before 'To tensor'"
'''
size:接受一个元组(a,b)
factor:int或者float,<1表示缩小,>1表示放大
'''
#将其短边统一变成600
def __init__(self, size=None, factor=None):
assert (size or factor)
self.size = size
self.factor = factor
def __call__(self, image):
#(h,w,3)array
assert isinstance(image, np.ndarray)
if self.size:
assert len(self.size)==2
image = transform.resize(image, (self.size))
else:
assert isinstance(self.factor, (int, float))
image = transform.rescale(image, self.factor)
return image
4)随机裁减
class Randomcrop(object):
'''
init: output_size:len=2,(h,w)
input: image(h,w,3), array
output: array
'''
def __init__(self, output_size):
assert len(output_size)==2
self.output_size = output_size
def __call__(self, image):
# (array,array)(h,w,3)
size = image.shape[:2]
top = np.random.randint(0, size[0]-self.output_size[0])
left = np.random.randint(0, size[1]-self.output_size[1])
image = image[top:top+self.output_size[0], left:left+self.output_size[1]]
return image
5)转为tensor
class ToTensor(object):
def __call__(self, image):
image = image.transpose([2, 0, 1])
image = image.copy()
return torch.from_numpy(image)/255.
此处可能出现问题:
ndarray经过transpose变换shape后,就不能直接转为tensor,需要先拷贝一份。
#ValueError: At least one stride in the given numpy array is negative, and tensors with negative strides are not currently supported.
# (You can probably work around this by making a copy of your array with array.copy().)
6)Normalize:转为tensor后,在调用torchvision.transforms
#经过修改的源码,其接收tensor
class Normalize(object):
"""Normalize a tensor image with mean and standard deviation.
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
channels, this transform will normalize each channel of the input
``torch.*Tensor`` i.e.,
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
.. note::
This transform acts out of place, i.e., it does not mutate the input tensor.
Args:
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
inplace(bool,optional): Bool to make this operation in-place.
"""
def __init__(self, mean, std, inplace=False):
self.mean = mean
self.std = std
self.inplace = inplace
def __call__(self, sample):
"""
Args:
sample(dict): 1)'image':Tensor image of size (C, H, W) to be normalized.
2)'label'
Returns:
Tensor: Normalized Tensor image.
"""
if isinstance(sample, dict):
image, label, image_name = sample['image'], sample['label'], sample['image_name']
image = F.normalize(image, self.mean, self.std, self.inplace)
return {
'image': image, 'label': label, 'image_name': image_name}
else:
sample = F.normalize(sample, self.mean, self.std, self.inplace)
return sample
def __repr__(self):
return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
3 载入数据
此处也需要注意:
- image:使用skimage.io.imread读入的数据是float64的,而pytorch中的需要数据是float32的,要进行转换。
- 标签:曾经设置为int会报错,需要转为long(长整型)。
class MyDataset(Dataset):
def __init__(self, transforms):
super(MyDataset, self).__init__()
self.bounding_box_list = self.get_bound_boxes()
self.image_path_list = self.get_image_path()
self.transforms = transforms
def __getitem__(self, id):
image_path = self.image_path_list[id]
image = io.imread(image_path, as_gray=False) #array,读入数据是float64,但是需要float32的
image = image.astype(np.float32) #***
bound_box = self.bounding_box_list[id]
if self.transforms:
image = self.transforms(image) #tensor
return {
'image':image, 'bound_box':bound_box}
def __len__(self):
return len(self.image_path_list)
4 整合数据
def get_trainLoader(batch_size=5, shuffle=True, num_workers=0):
trainset = MyDataset(
transforms = transforms.Compose([
Rescale(size=(500, 500)),
Randomcrop((448, 448)),
RandomHorizontalFlip(),
ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
)
trainLoader = DataLoader(trainset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
return trainLoader