cs 231 Convolutional Networks 卷积网络 Naive forward pass

def conv_forward_naive(x, w, b, conv_param):
    """
    A naive implementation of the forward pass for a convolutional layer.

    The input consists of N data points, each with C channels, height H and
    width W. We convolve each input with F different filters, where each filter
    spans all C channels and has height HH and width WW.

    Input:
    - x: Input data of shape (N, C, H, W)
    - w: Filter weights of shape (F, C, HH, WW)
    - b: Biases, of shape (F,)
    - conv_param: A dictionary with the following keys:
      - 'stride': The number of pixels between adjacent receptive fields in the
        horizontal and vertical directions.
      - 'pad': The number of pixels that will be used to zero-pad the input. 
        

    During padding, 'pad' zeros should be placed symmetrically (i.e equally on both sides)
    along the height and width axes of the input. Be careful not to modfiy the original
    input x directly.

    Returns a tuple of:
    - out: Output data, of shape (N, F, H', W') where H' and W' are given by
      H' = 1 + (H + 2 * pad - HH) / stride
      W' = 1 + (W + 2 * pad - WW) / stride
    - cache: (x, w, b, conv_param)
    """
    out = None
    ###########################################################################
    # TODO: Implement the convolutional forward pass.                         #
    # Hint: you can use the function np.pad for padding.                      #
    ###########################################################################
     
    stride = conv_param['stride']
    padding = conv_param['pad']
    if isinstance(stride, numbers.Number):
        stride = (stride, stride)  #
    if isinstance(padding, numbers.Number):
        padding = [(padding, padding), (padding, padding)]
    else:
        padding = [(i,) * 2 for i in padding]
    pad = [(0, 0), (0, 0)]
    pad.extend(padding)
    x_pad = np.pad(x, pad_width=pad, mode='constant', constant_values=0)
    n, c, pad_h, pad_w = x_pad.shape
    f, w_c, hh, ww = w.shape
    assert c == w_c, 'input channels must equal to filter channels'
    out_h = (pad_h - hh) // stride[0] + 1
    out_w = (pad_w - ww) // stride[1] + 1
    out = np.zeros(shape=(n, f, out_h, out_w))
    for i in range(n):  # 每个样本点
        for j in range(f):  # 每个filter
            for _w in range(out_w):  # 水平方向
                for _h in range(out_h):  # 竖直方向
                    vert_start =  _h*stride[1]
                    vert_end   =  _h*stride[1] + hh 
                    horiz_start = _w*stride[0]
                    horiz_end   = _w*stride[0] + ww     
                    out[i, j, _h, _w] = np.sum(
                        x_pad[i, :, vert_start: vert_end, horiz_start:horiz_end] * w[j]) + b[j]
                    #print (i,"\t",j,"\t",_w,"\t", _h,"\t",stride[1],"\t",stride[0] ,"\t",w[j].shape)



    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    cache = (x, w, b, conv_param)
    return out, cache


# As usual, a bit of setup
import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.cnn import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver

#%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
#%load_ext autoreload
#%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))


data = get_CIFAR10_data()
for k, v in data.items():
  print('%s: ' % k, v.shape)

x_shape = (2, 3, 4, 4)
w_shape = (3, 3, 4, 4)
x = np.linspace(-0.1, 0.5, num=np.prod(x_shape)).reshape(x_shape)
w = np.linspace(-0.2, 0.3, num=np.prod(w_shape)).reshape(w_shape)
b = np.linspace(-0.1, 0.2, num=3)

conv_param = {'stride': 2, 'pad': 1}
out, _ = conv_forward_naive(x, w, b, conv_param)
correct_out = np.array([[[[-0.08759809, -0.10987781],
                           [-0.18387192, -0.2109216 ]],
                          [[ 0.21027089,  0.21661097],
                           [ 0.22847626,  0.23004637]],
                          [[ 0.50813986,  0.54309974],
                           [ 0.64082444,  0.67101435]]],
                         [[[-0.98053589, -1.03143541],
                           [-1.19128892, -1.24695841]],
                          [[ 0.69108355,  0.66880383],
                           [ 0.59480972,  0.56776003]],
                          [[ 2.36270298,  2.36904306],
                           [ 2.38090835,  2.38247847]]]])

# Compare your output to ours; difference should be around e-8
print('Testing conv_forward_naive')
print('difference: ', rel_error(out, correct_out))

https://github.com/duanzhihua

cs 231 Convolutional Networks 卷积网络 Naive forward pass

猜你喜欢