林轩田机器学习基石课程-作业1-python实现

林轩田机器学习基石课程-作业1-python实现

作者：一直特立独行的猫

由于为首次做这个作业，故我先将林轩田老师的的作业安排发过来： https://www.csie.ntu.edu.tw/~htlin/course/ml15fall/

Homework #1

For Problems 15-20, you will play with PLA and pocket algorithm.

First, we use an artificial data set to study PLA. The data set is in
http://www.csie.ntu.edu.tw/~htlin/course/ml15fall/hw1/hw1_15_train.dat Each line of the data set contains one (xn, yn) with xn ∈ R4. The first 4 numbers of the line contains the components of xn orderly, the last number is yn. Please initialize your algorithm with w = 0 and take sign(0) as −1. As a friendly reminder, remember to add x0 = 1 as always!

15. (*) Implement a version of PLA by visiting examples in the na¨ıve cycle using the order of examples in the data set. Run the algorithm on the data set. What is the number of updates before the algorithm halts? What is the index of the example that results in the “last” mistake?

import sys
import numpy as np
import random as rd
import matplotlib.pylab as plt

### learning rate
rate = 1

def pla_error_rate(features, lables, parameter_vector):
    length = len(features)

    right = 0
    wrong = 0

    for i in range(length):

        if lables[i][0]*(np.dot(features[i], parameter_vector)[0]) <= 0:
            wrong += 1
        else:
            right += 1
    return float(wrong)/float(length)

def pla_pocket(features, lables, index_array, max_update_times, rate = 1):
    w_pocket = np.zeros((5, 1))
    w = np.zeros((5, 1))
    sample_len = len(features)
    flag = 1 ###algorithm halts flag, 1 for running
    index = 0
    count = 0
    while (flag):
        feature_index = index_array[index]
        ### check if need update w
        if lables[feature_index][0]*(np.dot(features[feature_index], w)[0]) <= 0:
            ### update w:  w = w + yi*xi  b = b + yi
            w = w + rate*lables[feature_index][0]*np.mat(features[feature_index]).T
            count += 1

            ### check if we need to update pocket
            if pla_error_rate(features, lables, w) < pla_error_rate(features, lables, w_pocket):
                w_pocket = w

        if count >= max_update_times:
            flag = 0
        elif index >= sample_len - 1:
            index = 0
        else:
            index += 1
    return w_pocket,w

def pla_fix_index(features, lables, index_array, rate = 1):
    w = np.zeros((5, 1))
    sample_len = len(features)
    flag = 1 ###algorithm halts flag, 1 for running
    index = 0
    right_items = 0  ### if right_items == feature len, algorithm halts
    count = 0
    while (flag):
        feature_index = index_array[index]
        ### check if need update w
        if lables[feature_index][0]*(np.dot(features[feature_index], w)[0]) <= 0:
            ### update w:  w = w + yi*xi  b = b + yi
            w = w + rate*lables[feature_index][0]*np.mat(features[feature_index]).T
            ### clean right items
            right_items = 0
            count += 1
        else:
            ### update
            right_items += 1

        if right_items >= sample_len:
            flag = 0
        elif index >= sample_len - 1:
            index = 0
        else:
            index += 1
    return count

### perceptron learning algorithm, input featrues and lables,learning rate, return w,number of iterations
def pla(features, lables, alpha = 1):
    w = np.zeros((5, 1))
    sample_len = len(features)
    flag = 1 ###algorithm halts flag, 1 for running
    index = 0
    right_items = 0  ### if right_items == feature len, algorithm halts
    count = 0
    while (flag):
        ### check if need update w
        if lables[index][0]*(np.dot(features[index], w)[0]) <= 0:
            ### update w:  w = w + yi*xi  b = b + yi
            w = w + lables[index][0]*np.mat(features[index]).T
            ### clean right items
            right_items = 0
            count += 1
        else:
            ### update
            right_items += 1

        if right_items >= sample_len:
            flag = 0
        elif index >= sample_len - 1:
            index = 0
        else:
            index += 1
    return count
### import data from file
def load_data(file_path):
    file_object = open(file_path)
    try:
        lines = file_object.readlines()
    finally:
        file_object.close()

    sample_num = len(lines)

    x = np.zeros((sample_num, 5))
    y = np.zeros((sample_num, 1))

    index = 0
    for line in lines:
        ### split feature and label
        items = line.strip().split('\t')
        x[index][1:5] = np.array([float(num) for num in items[0].strip().split()])[:]
        x[index][0] = 1
        y[index][0] = float(items[1])
        index += 1
    return x,y

if __name__ == '__main__':
    (X, Y) = load_data(r'C:\Users\ZN\Desktop\data.txt ')
    print(pla(X, Y, rate))

林轩田机器学习基石课程-作业1-python实现

猜你喜欢