推荐系统入门必读论文

《Item-Based Collaborative Filtering Recommendation Algorithms 》

基于物品的协同过滤推荐算法

https://blog.csdn.net/BTUJACK/article/details/84674967

《 Factorization Meets the Neighborhood: a Multifaceted Collaborative Filtering Model 》

因式分解满足邻域:多层面协同过滤模型

https://blog.csdn.net/fangqingan_java/article/details/50762296


《Matrix factorization techniques for recommender systems》

推荐系统矩阵分解技术

https://zhuanlan.zhihu.com/p/28577447?group_id=881547532893851649


《Factorization Machines with libFM》

带libFM的因数分解机器

从item-base到svd再到rbm,多种Collaborative Filtering(协同过滤算法)从原理到实现

https://blog.csdn.net/Dark_Scope/article/details/17228643

import numpy as np
import matplotlib.pyplot as plt
import math
number_of_bandits=10
number_of_arms=10
number_of_pulls=10000
epsilon=0.3
min_temp = 0.1
decay_rate=0.999

def pick_arm(q_values,counts,strategy,success,failure):
	global epsilon
	if strategy=="random":
		return np.random.randint(0,len(q_values))

	if strategy=="greedy":
		best_arms_value = np.max(q_values)
		best_arms = np.argwhere(q_values==best_arms_value).flatten()
		return best_arms[np.random.randint(0,len(best_arms))]

	if strategy=="egreedy" or strategy=="egreedy_decay": 
		if  strategy=="egreedy_decay": 
			epsilon=max(epsilon*decay_rate,min_temp)
		if np.random.random() > epsilon:
			best_arms_value = np.max(q_values)
			best_arms = np.argwhere(q_values==best_arms_value).flatten()
			return best_arms[np.random.randint(0,len(best_arms))]
		else:
			return np.random.randint(0,len(q_values))

	if strategy=="ucb":
		total_counts = np.sum(counts)
		q_values_ucb = q_values + np.sqrt(np.reciprocal(counts+0.001)*2*math.log(total_counts+1.0))
		best_arms_value = np.max(q_values_ucb)
		best_arms = np.argwhere(q_values_ucb==best_arms_value).flatten()
		return best_arms[np.random.randint(0,len(best_arms))]

	if strategy=="thompson":
		sample_means = np.zeros(len(counts))
		for i in range(len(counts)):
			sample_means[i]=np.random.beta(success[i]+1,failure[i]+1)
		return np.argmax(sample_means)


fig = plt.figure()
ax = fig.add_subplot(111)
for st in ["greedy","random","egreedy","egreedy_decay","ucb","thompson"]:

	best_arm_counts = np.zeros((number_of_bandits,number_of_pulls))

	for i in range(number_of_bandits):
		arm_means = np.random.rand(number_of_arms)
		best_arm = np.argmax(arm_means)

		q_values = np.zeros(number_of_arms)
		counts = np.zeros(number_of_arms)
		success=np.zeros(number_of_arms)
		failure=np.zeros(number_of_arms)

		for j in range(number_of_pulls):
			a = pick_arm(q_values,counts,st,success,failure)

			reward = np.random.binomial(1,arm_means[a])
			counts[a]+=1.0
			q_values[a]+= (reward-q_values[a])/counts[a]

			success[a]+=reward
			failure[a]+=(1-reward)
			best_arm_counts[i][j] = counts[best_arm]*100.0/(j+1)
		epsilon=0.3


	ys = np.mean(best_arm_counts,axis=0)
	xs = range(len(ys))
	ax.plot(xs, ys,label = st)

plt.xlabel('Steps')
plt.ylabel('Optimal pulls')

plt.tight_layout()
plt.legend()
plt.ylim((0,110))
plt.show()        


##################

相关代码:

# -*- coding: utf-8 -*-
import numpy as np
from matplotlib import pylab as plt
#from mpltools import style # uncomment for prettier plots
#style.use(['ggplot'])

'''
function definitions
'''
# generate all bernoulli rewards ahead of time
def generate_bernoulli_bandit_data(num_samples,K):
    CTRs_that_generated_data = np.tile(np.random.rand(K),(num_samples,1))
    true_rewards = np.random.rand(num_samples,K) < CTRs_that_generated_data
    return true_rewards,CTRs_that_generated_data

# totally random
def random(estimated_beta_params):
    return np.random.randint(0,len(estimated_beta_params))

# the naive algorithm
def naive(estimated_beta_params,number_to_explore=100):
    totals = estimated_beta_params.sum(1) # totals
    if np.any(totals < number_to_explore): # if have been explored less than specified
        least_explored = np.argmin(totals) # return the one least explored
        return least_explored
    else: # return the best mean forever
        successes = estimated_beta_params[:,0] # successes
        estimated_means = successes/totals # the current means
        best_mean = np.argmax(estimated_means) # the best mean
        return best_mean

# the epsilon greedy algorithm
def epsilon_greedy(estimated_beta_params,epsilon=0.01):
    totals = estimated_beta_params.sum(1) # totals
    successes = estimated_beta_params[:,0] # successes
    estimated_means = successes/totals # the current means
    best_mean = np.argmax(estimated_means) # the best mean
    be_exporatory = np.random.rand() < epsilon # should we explore?
    if be_exporatory: # totally random, excluding the best_mean
        other_choice = np.random.randint(0,len(estimated_beta_params))
        while other_choice == best_mean:
            other_choice = np.random.randint(0,len(estimated_beta_params))
        return other_choice
    else: # take the best mean
        return best_mean

# the UCB algorithm using 
# (1 - 1/t) confidence interval using Chernoff-Hoeffding bound)
# for details of this particular confidence bound, see the UCB1-TUNED section, slide 18, of: 
# http://lane.compbio.cmu.edu/courses/slides_ucb.pdf
def UCB(estimated_beta_params):
    t = float(estimated_beta_params.sum()) # total number of rounds so far
    totals = estimated_beta_params.sum(1)
    successes = estimated_beta_params[:,0]
    estimated_means = successes/totals # sample mean
    estimated_variances = estimated_means - estimated_means**2    
    UCB = estimated_means + np.sqrt( np.minimum( estimated_variances + np.sqrt(2*np.log(t)/totals), 0.25 ) * np.log(t)/totals )
    return np.argmax(UCB)

# the UCB algorithm - using fixed 95% confidence intervals
# see slide 8 for details: 
# http://dept.stat.lsa.umich.edu/~kshedden/Courses/Stat485/Notes/binomial_confidence_intervals.pdf
def UCB_bernoulli(estimated_beta_params):
    totals = estimated_beta_params.sum(1) # totals
    successes = estimated_beta_params[:,0] # successes
    estimated_means = successes/totals # sample mean
    estimated_variances = estimated_means - estimated_means**2
    UCB = estimated_means + 1.96*np.sqrt(estimated_variances/totals)
    return np.argmax(UCB)
    

# the bandit algorithm
def run_bandit_dynamic_alg(true_rewards,CTRs_that_generated_data,choice_func):
    num_samples,K = true_rewards.shape
    # seed the estimated params (to avoid )
    prior_a = 1. # aka successes 
    prior_b = 1. # aka failures
    estimated_beta_params = np.zeros((K,2))
    estimated_beta_params[:,0] += prior_a # allocating the initial conditions
    estimated_beta_params[:,1] += prior_b
    regret = np.zeros(num_samples) # one for each of the 3 algorithms

    for i in range(0,num_samples):
        # pulling a lever & updating estimated_beta_params
        this_choice = choice_func(estimated_beta_params)

        # update parameters
        if true_rewards[i,this_choice] == 1:
            update_ind = 0
        else:
            update_ind = 1
            
        estimated_beta_params[this_choice,update_ind] += 1
        
        # updated expected regret
        regret[i] = np.max(CTRs_that_generated_data[i,:]) - CTRs_that_generated_data[i,this_choice]

    cum_regret = np.cumsum(regret)

    return cum_regret



if __name__ == '__main__':
    '''
    main code
    '''
    # define number of samples and number of choices
    num_samples = 10000
    K = 5 # number of arms
    number_experiments = 100
    
    regret_accumulator = np.zeros((num_samples,5))
    for i in range(number_experiments):
        print "Running experiment:", i+1
        true_rewards,CTRs_that_generated_data = generate_bernoulli_bandit_data(num_samples,K)
        regret_accumulator[:,0] += run_bandit_dynamic_alg(true_rewards,CTRs_that_generated_data,random)
        regret_accumulator[:,1] += run_bandit_dynamic_alg(true_rewards,CTRs_that_generated_data,naive)
        regret_accumulator[:,2] += run_bandit_dynamic_alg(true_rewards,CTRs_that_generated_data,epsilon_greedy)
        regret_accumulator[:,3] += run_bandit_dynamic_alg(true_rewards,CTRs_that_generated_data,UCB)
        regret_accumulator[:,4] += run_bandit_dynamic_alg(true_rewards,CTRs_that_generated_data,UCB_bernoulli)
        
    plt.semilogy(regret_accumulator/number_experiments)
    plt.title('Simulated Bandit Performance for K = 5')
    plt.ylabel('Cumulative Expected Regret')
    plt.xlabel('Round Index')
    plt.legend(('Random','Naive','Epsilon-Greedy','(1 - 1/t) UCB','95% UCB'),loc='lower right')
    plt.show()

猜你喜欢

转载自blog.csdn.net/BTUJACK/article/details/84675270