利用4层神经网络实现小猫的分类,小猫训练样本是(209,64*64*3=12288),故输入节点是12288个,隐藏层节点依次为20,7,5,输出层为1。
首先看文件路径,dnn_utils_v2.py代码是激活函数和激活函数导数 载入数据集 打印预测错误照片的代码:
import numpy as np import matplotlib.pyplot as plt import h5py def sigmoid(Z): """ Implements the sigmoid activation in numpy Arguments: Z -- numpy array of any shape Returns: A -- output of sigmoid(z), same shape as Z cache -- returns Z as well, useful during backpropagation """ A = 1/(1+np.exp(-Z)) cache = Z return A, cache def relu(Z): """ Implement the RELU function. Arguments: Z -- Output of the linear layer, of any shape Returns: A -- Post-activation parameter, of the same shape as Z cache -- a python dictionary containing "A" ; stored for computing the backward pass efficiently """ A = np.maximum(0,Z) assert(A.shape == Z.shape) cache = Z return A, cache def relu_backward(dA, cache): """ Implement the backward propagation for a single RELU unit. Arguments: dA -- post-activation gradient, of any shape cache -- 'Z' where we store for computing backward propagation efficiently Returns: dZ -- Gradient of the cost with respect to Z """ Z = cache dZ = np.array(dA, copy=True) # just converting dz to a correct object. # When z <= 0, you should set dz to 0 as well. dZ[Z <= 0] = 0 assert (dZ.shape == Z.shape) return dZ def sigmoid_backward(dA, cache): """ Implement the backward propagation for a single SIGMOID unit. Arguments: dA -- post-activation gradient, of any shape cache -- 'Z' where we store for computing backward propagation efficiently Returns: dZ -- Gradient of the cost with respect to Z """ Z = cache s = 1/(1+np.exp(-Z)) dZ = dA * s * (1-s) assert (dZ.shape == Z.shape) return dZ """ 载入数据集 """ def load_data(): train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r") train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r") test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels classes = np.array(test_dataset["list_classes"][:]) # the list of classes train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0])) test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0])) return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes def print_mislabeled_images(classes, X, y, p): """ Plots images where predictions and truth were different. X -- dataset y -- true labels p -- predictions """ a = p + y mislabeled_indices = np.asarray(np.where(a == 1)) plt.rcParams['figure.figsize'] = (40.0, 40.0) # set default size of plots num_images = len(mislabeled_indices[0]) for i in range(num_images): index = mislabeled_indices[1][i] plt.subplot(2, num_images, i + 1) plt.imshow(X[:, index].reshape(64, 64, 3), interpolation='nearest') plt.axis('off') plt.title( "Prediction: " + classes[int(p[0, index])].decode("utf-8") + " \n Class: " + classes[y[0, index]].decode( "utf-8"))StartDeepNeural.py是整个模型前向传播和后向传播的代码
import numpy as np import dnn_utils_v2 def initialize_parameters(n_x,n_h,n_y): W1 = np.random.randn(n_h,n_x)*0.01 b1 = np.zeros((n_h,1)) W2 = np.random.randn(n_y, n_h) * 0.01 b2 = np.zeros((n_y,1)) assert (W1.shape==(n_h,n_x)) assert (b1.shape == (n_h, 1)) assert (W2.shape == (n_y, n_h)) assert (b2.shape == (n_y, 1)) parameters={'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2} return parameters """ 多层神经网络的参数初始化:注意权重的初始化 为防止梯度爆炸和梯度消失 """ def initialize_parameters_deep(layer_dims): L=len(layer_dims) parameters={} for i in range(1,L): parameters['W'+str(i)] = np.random.randn(layer_dims[i],layer_dims[i-1])*np.sqrt(2.0/layer_dims[i-1]) parameters['b' + str(i)] = np.zeros((layer_dims[i], 1)) assert (parameters['W'+str(i)].shape==(layer_dims[i],layer_dims[i-1])) assert (parameters['b' + str(i)].shape == (layer_dims[i], 1)) return parameters """ 前向传播过程中某一层未加激活函数的操作 """ def linear_forward(A,W,b): Z = np.dot(W,A)+b assert(Z.shape==(W.shape[0],A.shape[1])) cache=(A,W,b) return Z,cache """ 前向传播过程中 某一层通过激活函数来采取相应的操作 """ def linear_activation_forward(A_prev,W,b,activation): if activation=='sigmoid': Z, linear_cache=linear_forward(A_prev,W,b) A, activation_cache = dnn_utils_v2.sigmoid(Z) elif activation=='relu': Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = dnn_utils_v2.relu(Z) assert(A.shape==(W.shape[0],A_prev.shape[1])) cache=(linear_cache,activation_cache)###save ( (A W b ),Z) tuple return A,cache """ 整个模型的前向传播过程 """ def L_model_forward(X,parameters): L=len(parameters)//2 A=X caches=[] for i in range(1,L): A_prev=A A, cache=linear_activation_forward(A_prev,parameters['W'+str(i)],parameters['b'+str(i)], activation='relu') caches.append(cache) AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation='sigmoid') caches.append(cache) assert(AL.shape==(parameters['W' + str(L)].shape[0],X.shape[1])) return AL,caches """ 计算损失值 """ def compute_cost(AL,Y): m = Y.shape[1] cost = (1. / m) * (-np.dot(Y, np.log(AL).T) - np.dot(1 - Y, np.log(1 - AL).T)) #cost = -1 / m * (np.dot(Y, np.log(AL).T) + np.dot((1 - Y), np.log(1 - AL).T)) cost = np.squeeze(cost) assert (cost.shape==()) return cost """ 后向传播过程中某一层未加激活函数的操作 """ def linear_backward(dZ,cache): A_prev,W,b=cache m=A_prev.shape[1] dW=1/m*np.dot(dZ,A_prev.T) db=1/m*np.sum(dZ,axis=1,keepdims=True) dA_prev=np.dot(W.T,dZ) assert (dW.shape == W.shape) assert (db.shape == b.shape) assert (dA_prev.shape == A_prev.shape) return dA_prev,dW,db """ 后向传播过程中 某一层通过激活函数来采取相应的操作 """ def linear_activation_backward(dA,cache,activation): linear_cache, activation_cache=cache if activation=='relu': dZ=dnn_utils_v2.relu_backward(dA, activation_cache) dA_prev, dW, db=linear_backward(dZ, linear_cache) elif activation=='sigmoid': dZ=dnn_utils_v2.sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db """ 整个模型的后向传播过程 """ def L_model_backward(AL,Y,caches): grads={} L=len(caches) ###[ [( (X W1 b1 ),Z1)],[( (A1 W2 b2 ),Z2) ] [( (A2 W3 b3 ),Z3) ]] m=AL.shape[1] Y=Y.reshape(AL.shape) dAL=-np.divide(Y,AL)+np.divide((1-Y),(1-AL)) current_cache=caches[L-1] grads['dA'+str(L)],grads['dW'+str(L)],grads['db'+str(L)]=linear_activation_backward(dAL, current_cache, activation='sigmoid') for i in reversed(range(L-1)): current_cache = caches[i] grads['dA' + str(i+1)], grads['dW' + str(i+1)], grads['db' + str(i+1)]=linear_activation_backward(grads['dA'+str(i+2)], current_cache, activation='relu') return grads """ 更新参数 """ def update_parameters(parameters,grads,learning_rate): L=len(parameters)//2 for i in range(L): parameters['W'+str(i+1)]=parameters['W'+str(i+1)]-learning_rate*grads['dW' + str(i+1)] parameters['b' + str(i + 1)] = parameters['b' + str(i + 1)] - learning_rate * grads['db' + str(i + 1)] return parameters
DeepNeuralCat_noCat.py就是训练的代码:首先看数据集
import numpy as np import dnn_utils_v2 import matplotlib.pyplot as plt import StartDeepNeural train_x_orig, train_y_orig, test_x_orig, test_y_orig,classa=dnn_utils_v2.load_data() print('train={}'.format(train_x_orig.shape)) print(train_y_orig.shape) print(train_y_orig[:,:10]) print(test_x_orig.shape) print(test_y_orig.shape) print(classa) plt.imshow(train_x_orig[0]) plt.show()
打印结果:
可知训练样本为209个,维度为(64,64,3),测试样本为50个,维度为(64,64,3),0代表不是猫
拉成二维向量 对于训练样本(209,64*64*3),测试样本(50,64*64*3)
import numpy as np import dnn_utils_v2 import matplotlib.pyplot as plt import StartDeepNeural train_x_orig, train_y_orig, test_x_orig, test_y_orig,classa=dnn_utils_v2.load_data() # print('train={}'.format(train_x_orig.shape)) # print(train_y_orig.shape) # print(train_y_orig[:,:10]) # print(test_x_orig.shape) # print(test_y_orig.shape) # print(classa) # plt.imshow(train_x_orig[0]) # plt.show() ##train train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0],train_x_orig.shape[1] *train_x_orig.shape[2] * 3).T train_x = train_x_flatten / 255. #print(train_x.shape) ##test test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0],test_x_orig.shape[1] *test_x_orig.shape[2] * 3).T test_x = test_x_flatten / 255. #print(test_x.shape)
开始训练,下面代码也写了两层神经网络,只是没有用到而已。
import numpy as np import dnn_utils_v2 import matplotlib.pyplot as plt import StartDeepNeural train_x_orig, train_y_orig, test_x_orig, test_y_orig,classa=dnn_utils_v2.load_data() # print('train={}'.format(train_x_orig.shape)) # print(train_y_orig.shape) # print(train_y_orig[:,:10]) # print(test_x_orig.shape) # print(test_y_orig.shape) # print(classa) # plt.imshow(train_x_orig[0]) # plt.show() ##train train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0],train_x_orig.shape[1] *train_x_orig.shape[2] * 3).T train_x = train_x_flatten / 255. #print(train_x.shape) ##test test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0],test_x_orig.shape[1] *test_x_orig.shape[2] * 3).T test_x = test_x_flatten / 255. #print(test_x.shape) def two_layer_model(X,Y,num_iterations,learning_rate): n_x = 12288 n_h = 7 n_y = 1 layer_dims = (n_x, n_h, n_y) #m=X.shape[1] #(n_x, n_h, n_y)=layer_dims parameters=StartDeepNeural.initialize_parameters_deep(layer_dims) # W1 = parameters['W1'] # W2 = parameters['W2'] # b1 = parameters['b1'] # b2 = parameters['b2'] costs=[] #return W1,W2,b1,b2 for i in range(0,num_iterations): # A1, cache1=StartDeepNeural.linear_activation_forward(X,W1,b1,activation='relu') # A2, cache2 = StartDeepNeural.linear_activation_forward(A1, W2, b2, activation='sigmoid') AL, caches=StartDeepNeural.L_model_forward(X, parameters) ##[ [( (X W1 b1 ),Z1)],[( (A1 W2 b2 ),Z2) ]] cost=StartDeepNeural.compute_cost(AL, Y) grads=StartDeepNeural.L_model_backward(AL, Y, caches) parameters=StartDeepNeural.update_paremeters(parameters, grads, learning_rate) if i %100==0: print('iterations{}:cost {}'.format(i,cost)) costs.append(cost) return costs,parameters def predict(X,Y,parameters): AL, caches = StartDeepNeural.L_model_forward(X, parameters)##AL.shape=(1,m) m=X.shape[1] p=np.zeros((1,m)) for i in range(AL.shape[1]): if AL[0][i]>0.5: p[0][i]=1 else: p[0][i] = 0 result = np.squeeze(np.dot(p, Y.T) + np.dot(1 - p, 1 - Y.T)) accuracy=result/m return accuracy,p def two_layer_model_test(): costs, parameters = two_layer_model(train_x, train_y_orig,num_iterations = 3000, learning_rate = 0.0075) # print(parameters) accuracy ,p_train= predict(train_x, train_y_orig, parameters) print('train accuracy is {}'.format(accuracy)) accuracy ,p_test= predict(test_x, test_y_orig, parameters) print('test accuracy is {}'.format(accuracy)) plt.plot(costs) plt.xlabel('iterations') plt.ylabel('costs') plt.title('learning rate is 0.0075') plt.show() def L_layer_model(X, Y, layer_dims,learning_rate=0.0075,num_iterations=2000): parameters = StartDeepNeural.initialize_parameters_deep(layer_dims) costs = [] # return W1,W2,b1,b2 for i in range(0, num_iterations): # A1, cache1=StartDeepNeural.linear_activation_forward(X,W1,b1,activation='relu') # A2, cache2 = StartDeepNeural.linear_activation_forward(A1, W2, b2, activation='sigmoid') AL, caches = StartDeepNeural.L_model_forward(X, parameters) ##[ [( (X W1 b1 ),Z1)],[( (A1 W2 b2 ),Z2) ]] cost = StartDeepNeural.compute_cost(AL, Y) grads = StartDeepNeural.L_model_backward(AL, Y, caches) parameters = StartDeepNeural.update_parameters(parameters, grads, learning_rate) if i % 100 == 0: #print(grads) print('iterations{}:cost {}'.format(i, cost)) costs.append(cost) return costs, parameters def print_mislabel_images(classes,test_x,test_y_orig,p_test): a=test_y_orig+p_test mislable_index=np.asarray(np.where(a==1))#[[1,0]] [[1,1]]np.where 返回(array([0]),array([1])) ##np.asarray 返回 array([[0],[1]]) plt.figure(figsize=(40, 40)) # set default size of plots 画布大小 4000×4000 num_images=len(mislable_index[0]) for i in range(num_images): index=mislable_index[1][i] plt.subplot(2,num_images,i+1) plt.imshow(test_x[:,index].reshape(64,64,3)) #plt.axis('off') plt.title('prediction '+classes[int(p_test[0][index])].decode('utf-8')+' real '+classes[int(test_y_orig[0][index])].decode('utf-8')) plt.savefig('1.jpg') def L_layer_model_test(): layers_dims = [12288, 20, 7, 5, 1] costs,parameters = L_layer_model(train_x, train_y_orig, layers_dims) accuracy , p_train= predict(train_x, train_y_orig, parameters) print('train accuracy is {}'.format(accuracy)) accuracy , p_test= predict(test_x, test_y_orig, parameters) print('test accuracy is {}'.format(accuracy)) print_mislabel_images(classa,test_x,test_y_orig,p_test) # plt.plot(costs) # plt.xlabel('iterations') # plt.ylabel('costs') # plt.title('learning rate is 0.0075') plt.show() if __name__=='__main__': L_layer_model_test()
打印结果: