思路:输入样本X与随机初始权重W相乘,利用sigmoid激活函数输出值,对于二分类问题,用交叉熵损失函数来计算损失值,通过交叉熵损失函数利用链式法则求出W和b的偏导,梯度下降更新W和b即可,(梯度下降又有很多,Momentum,Adam等后面在详细介绍)剩下的就是迭代次数和学习率的问题。
第一课作业直接给了数据集,无须对数据集操作,下面是读取数据集的代码,数据集链接点击打开链接
命名为:lr_utils.py
import numpy as np import h5py def load_dataset(): train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r") train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r") test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels classes = np.array(test_dataset["list_classes"][:]) # the list of classes train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0])) test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0])) return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes if __name__ == '__main__': train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes=load_dataset() print('训练样本数={}'.format(train_set_x_orig.shape)) print('训练样本对应的标签={}'.format(train_set_y_orig.shape)) print('前10张训练样本标签={}'.format(train_set_y_orig[:,:10])) print('测试样本数={}'.format(test_set_x_orig.shape)) print('测试样本对应的标签={}'.format(test_set_y_orig.shape)) print('{}'.format(classes))可看见打印结果:209个样本,64x64x3。
下面通过测试代码看看标签0 1 代表的是什么
import cv2 from lr_utils import load_dataset train_set_x_orig,train_set_y,test_set_x_orig,test_set_y,classes=load_dataset() cv2.imshow('img0',train_set_x_orig[0]) cv2.waitKey() cv2.imshow('img2',train_set_x_orig[2]) cv2.waitKey()
可知0代表不是猫,1代表是猫。
由于训练的标签结果是Y=(1,209),X将其拉成一个样本一行向量,X=(209,64*64*3)又W*X=Y,故权重W为(64*64*3,1),最终采用的是样本X=(64*64*3,209),W=(64*64*3,1),计算过程中W要采用转置。
先初始化权重W,激活函数采用sigmoid,输出值A;损失函数采用交叉熵,通过链式法则反向求W和b的导数,在更新W和b即可。计算过程中,注意维度的统一,可用assert 判断。
代码如下:
import numpy as np from lr_utils import load_dataset from matplotlib import pyplot as plt """ 函数功能:逻辑回归实现小猫分类 """ import cv2 #sigmoid激活函数 def sigmoid(z): s=1.0/(1+np.exp(-z)) return s #初始化权值 def initialize_zeros(dim): w=np.zeros(dim).reshape(dim,1) b=0 return w,b ######w(64*64*3,1) #传播过程 def propagate(w,b,X,Y): m=X.shape[1] A=sigmoid(np.dot(w.T,X)+b) assert np.dot(w.T,X).shape==Y.shape cost=-1/m*(np.dot(Y,np.log(A).T)+np.dot((1-Y),np.log(1-A).T)) dw=1/m*(np.dot(X,(A-Y).T)) db= 1 / m * (np.sum(A-Y)) grads={'dw':dw, 'db': db} cost=np.squeeze(cost) return cost,grads ''' 函数功能:更新权重 +迭代次数+学习率 返回最终更新的权重和损失值 ''' def optimize(w,b,X,Y,num_iterations,learning_rate,print_cost=False): costs=[] for i in range(num_iterations): cost, grads = propagate(w, b, X, Y) dw=grads['dw'] db=grads['db'] w = w - learning_rate * dw b = b - learning_rate * db if i%100==0: costs.append(cost) if print_cost and i%100==0: print('after iteration %i:%f'%(i,cost)) params={'w':w, 'b':b} grads = {'dw': dw, 'db': db} return params,grads,costs """ 函数功能:实现利用更新好的权重预测小猫 """ def predict(w,b,X): m = X.shape[1] Y_prediction=np.zeros((1,m)) w=w.reshape(X.shape[0],1) A=sigmoid(np.dot(w.T,X)+b) for i in range(A.shape[1]): if A[0,i]>0.5: Y_prediction[0,i]=1 else: Y_prediction[0,i]=0 return Y_prediction """ 函数功能:测试函数,在编写过程中,检查W和b的更新,最终注销掉,不调用 """ def test(): dim = 2 w, b = initialize_zeros(dim) print('initialize w,b=', w, b) w, b, X, Y = np.array([[1], [2]]), 2, np.array([[1, 2], [3, 4]]), np.array([[1, 0]]) cost, grads = propagate(w, b, X, Y) print('cost=', cost) print('dw=', grads['dw']) print('db=', grads['db']) params, grads, costs = optimize(w, b, X, Y, num_iterations=100, learning_rate=0.009, print_cost=False) print('w', params['w']) print('b', params['b']) print('iterations dw=', grads['dw']) print('iterations db=', grads['db']) print('costs=', costs) Y_prediction = predict(w, b, X) print('Y_prediction=', Y_prediction) def model(X_train,Y_train,X_test,Y_test,num_iterations,learning_rate,print_cost): w,b=initialize_zeros(X_train.shape[0]) params, grads,costs=optimize(w,b,X_train,Y_train,num_iterations,learning_rate,print_cost=True) Y_prediction_train=predict(params['w'],params['b'],X_train) Y_prediction_test = predict(params['w'], params['b'], X_test) print('train accuracy is {}'.format(np.mean(Y_prediction_train==Y_train))) print('test accuracy is {}'.format(np.mean(Y_prediction_test==Y_test))) d = {"costs":costs, 'w':w, 'b':b, 'Y_prediction_train':Y_prediction_train, 'Y_prediction_test':Y_prediction_test, 'learning_rate':learning_rate, 'num_iterations':num_iterations} return d if __name__=='__main__': #test() train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset() ##train train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0],train_set_x_orig.shape[1] * train_set_x_orig.shape[2] * 3).T train_set_x = train_set_x_flatten / 255. train_set_y_flatten = train_set_y.reshape(train_set_y.shape[0], -1) ###test test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0],test_set_x_orig.shape[1] * test_set_x_orig.shape[2] * 3).T test_set_x = test_set_x_flatten / 255. test_set_y_flatten = test_set_y.reshape(test_set_y.shape[0], -1) d=model(train_set_x,train_set_y_flatten,test_set_x,test_set_y_flatten,num_iterations=2000,learning_rate=0.002,print_cost=False) #paint costs line plt.plot(d['costs']) #print(d['costs']) plt.xlabel('iteration') plt.ylabel('cost') plt.show() #用自带的小猫检测 img=cv2.imread('images/my_image2.jpg') imgsize = cv2.resize(img, (64, 64), interpolation=cv2.INTER_CUBIC) cv2.imshow('imgsize', imgsize) cv2.waitKey(0) my_cat=np.array(imgsize.reshape(-1,1)) #print(my_cat.shape) My_cat_prediction=predict(d['w'], d['b'], my_cat) print('My_cat_prediction',My_cat_prediction)
打印如下:
测试精度还行,由于样本量少,小猫还是预测错了。