深度学习 course5 week1 Building+a+Recurrent+Neural+Network+-+Step+by+Step

 
def rnn_cell_forward(xt,a_prev,parameters):
    Wax=parameters['Wax']
    Waa=parameters['Waa']
    Wya=parameters['Wya']
    ba=parameters['ba']
    by=parameters['by']
    # a_next=np.tanh(np.dot(Wax,xt)+np.dot(Waa,a_prev)+ba)
    a_next = np.tanh(np.dot(Wax, xt) + np.dot(Waa, a_prev) + ba)
    yt_pred = softmax(np.dot(Wya, a_next) + by)
    # yt_pred=softmax(np.dot(Wya,a_next)+by)
    cache = (a_next, a_prev, xt, parameters)
    # cache=(a_next,a_prev,xt,parameters)
    return a_next,yt_pred,cache
def rnn_forward(x,a0,parameters):
    caches=[]
    (n_x,m,T_x)=x.shape
    n_y,n_a=parameters['Wya'].shape

    a=np.zeros((n_a,m,T_x))
    y_pred=np.zeros((n_y,m,T_x))

    a_next=a0

    for t in range (T_x):
        a_next,yt_pred,cache=rnn_cell_forward(x[:,:,t],a_next,parameters)
        a[:,:,t]=a_next
        y_pred[:,:,t]=yt_pred
        caches.append(cache)
    caches=(caches,x)
    return a,y_pred,caches
def lstm_cell_forward(xt,a_prev,c_prev,parameters):
    Wf=parameters['Wf']
    bf=parameters['bf']
    Wi=parameters['Wi']
    bi=parameters['bi']
    Wc=parameters['Wc']
    bc=parameters['bc']
    Wo=parameters['Wo']
    bo=parameters['bo']
    Wy=parameters['Wy']
    by=parameters['by']

    (n_x,m)=xt.shape
    n_y,n_a=parameters['Wy'].shape

    concat=np.zeros((n_a+n_x,m))
    concat[: n_a,:]=a_prev
    concat[n_a :,:]=xt

    ft=sigmoid(np.dot(Wf,concat)+bf)
    it=sigmoid(np.dot(Wi,concat)+bi)
    cct=np.tanh(np.dot(Wc,concat)+bc)
    c_next=ft*c_prev+it*cct
    ot=sigmoid(np.dot(Wo,concat)+bo)
    a_next=np.tanh(c_next)*ot

    yt_pred=softmax(np.dot(Wy,a_next)+by)

    cache=(a_next,c_next,a_prev,c_prev,ft,it,cct,ot,xt,parameters)
    return a_next,c_next,yt_pred,cache
def lstm_forward(x,a0,parameters):
   caches=[]
   n_x,m,T_x=x.shape
   n_y,n_a=parameters['Wy'].shape

   a=np.zeros((n_a,m,T_x))
   c=np.zeros((n_a,m,T_x))
   y=np.zeros((n_y,m,T_x))

   a_next=a0
   c_next=np.zeros((a_next.shape))

   for t in range(T_x):
       a_next,c_next,yt,cache=lstm_cell_forward(x[:,:,t],a_next,c_next,parameters)
       a[:,:,t]=a_next
       y[:,:,t]=yt
       c[:,:,t]=c_next
       caches.append(cache)
   caches=(caches,x)
   return a,y,c,caches
def rnn_cell_backward(da_next,cache):
    (a_next , a_prev , xt , parameters)=cache

    Wax=parameters['Wax']
    Waa=parameters['Waa']
    Wya=parameters['Wya']
    ba=parameters['ba']
    by=parameters['by']

    dtanh=(1-a_next**2)*da_next

    dxt=np.dot(Wax.T,dtanh)
    dWax=np.dot(dtanh,xt.T)

    da_prev=np.dot(Waa.T,dtanh)
    dWaa=np.dot(dtanh,a_prev.T)

    dba=np.sum(dtanh,keepdims=True,axis=-1)

    gradients={"dxt":dxt,"da_prev":da_prev,"dWax":dWax,"dWaa":dWaa,"dba":dba}
    return gradients
def rnn_backward(da,caches):
    (caches,x)=caches
    (a1, a0, x1, parameters)=caches[0]

    n_a,m,T_x=da.shape
    n_x,m=x1.shape

    dx=np.zeros((n_x,m,T_x))
    dWax=np.zeros((n_a,n_x))
    dWaa=np.zeros((n_a,n_a))
    dba=np.zeros((n_a,1))
    da_prev=np.zeros((n_a,m))

    for t in reversed(range(T_x)):
        gradients=rnn_cell_backward(da[:,:,t]+da_prev,caches[t])
        dxt, da_prevt, dWaxt, dWaat, dbat = gradients["dxt"], gradients["da_prev"], gradients["dWax"], gradients[
            "dWaa"], gradients["dba"]
        dx[:,:,t]=dxt
        dWax+=dWaxt
        dWaa+=dWaat
        dba+=dbat
    da0=da_prevt
    gradients={"dx":dx,"da0":da0,"dWax":dWax,"dWaa":dWaa,"dba":dba}
    return gradients
def lstm_cell_backward(da_next,dc_next,cache):
    a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt, parameters=cache
    n_x,m=xt.shape
    n_a,m=a_next.shape

    dot = da_next * np.tanh(c_next) * ot * (1 - ot)
    dcct = (dc_next * it + ot * (1 - np.square(np.tanh(c_next))) * it * da_next) * (1 - np.square(cct))
    dit = (dc_next * cct + ot * (1 - np.square(np.tanh(c_next))) * cct * da_next) * it * (1 - it)
    dft = (dc_next * c_prev + ot * (1 - np.square(np.tanh(c_next))) * c_prev * da_next) * ft * (1 - ft)

    concat=np.concatenate((a_prev,xt),axis=0).T
    dwf=np.dot(dft,concat)
    dWi=np.dot(dit,concat)
    dWc=np.dot(dcct,concat)
    dWo=np.dot(dot,concat)
    dbf=np.sum(dft,keepdims=True,axis=1)
    dbi=np.sum(dit,keepdims=True,axis=1)
    dbc=np.sum(dcct,keepdims=True,axis=1)
    dbo=np.sum(dot,keepdims=True,axis=1)

    da_prev = np.dot(parameters["Wf"][:, :n_a].T, dft) + np.dot(parameters["Wc"][:, :n_a].T, dcct) + np.dot(
        parameters["Wi"][:, :n_a].T, dit) + np.dot(parameters["Wo"][:, :n_a].T, dot)
    dc_prev = dc_next * ft + ot * (1 - np.square(np.tanh(c_next))) * ft * da_next
    dxt = np.dot(parameters["Wf"][:, n_a:].T, dft) + np.dot(parameters["Wc"][:, n_a:].T, dcct) + np.dot(
        parameters["Wi"][:, n_a:].T, dit) + np.dot(parameters["Wo"][:, n_a:].T, dot)

    gradients={"dxt":dxt,"da_prev":da_prev,"dc_prev":dc_prev,"dWf":dwf,"dWi":dWi,"dWc":dWc,"dWo":dWo,"dbf":dbf,"dbi":dbi,"dbc":dbc,"dbo":dbo}
    return gradients
def lstm_backward(da,caches):
    (caches , x)=caches
    (a1,c1,a0,c0,f1,i1,cc1,o1,x1,parameters)=caches[0]

    n_a,m,T_x=da
    n_x,m=x1

    dx=np.zeros((n_x,m,T_x))
    da0=np.zeros((n_a,m))
    da_prevt=np.zeros((n_a,m))
    dc_prevt=np.zeros((n_a,m))
    dWf=np.zeros((n_a,n_a+n_x))
    dWi=np.zeros((n_a,n_a+n_x))
    dWc=np.zeros((n_a,n_a+n_x))
    dWo=np.zeros((n_a,n_a+n_x))
    dbf=np.zeros((n_a,1))
    dbi=np.zeros((n_a,1))
    dbc=np.zeros((n_a,1))
    dbo=np.zeros((n_a,1))

    for t in reversed(range(T_x)):
        gradients=lstm_cell_forward(da[:,:,t],dc_prevt,caches)
        dx[:,:,t]=gradients["dxt"]
        dWf=dWf+gradients["dWf"]
        dWi=dWi+gradients["dWi"]
        dWc=dWc+gradients["dWc"]
        dWo=dWo+gradients["dWo"]
        dbf=dbf+gradients["dbf"]
        dbi=dbi+gradients["dbi"]
        dbc=dbc+gradients["dbc"]
        dbo=dbo+gradients["dbo"]

    da0=da_prevt

    gradients={"dx":dx,"da0":da0,"dWf":dWf,"dWi":dWi,"dWc":dWc,"dWo":dWo,"dbf":dbf,"dbi":dbi,"dbc":dbc,"dbo":dbo}
    return gradients

猜你喜欢

转载自blog.csdn.net/qq_31119155/article/details/81070061