基于社交网络的用户与基于物品的协同过滤推荐算法

基于社交网络的用户与基于物品的协同过滤推荐算法-java
2016080316:25:27
阅读数:3314
完整工程+数据源:https://github.com/scnuxiaotao/recom_sys
[java] view plain copy
import java.io.BufferedReader;  
import java.io.FileNotFoundException;  
import java.io.FileReader;  
import java.io.FileWriter;  
import java.io.IOException;  
import java.util.HashMap;  

public class itemcf {  
    /* 
     *  
     *     主函数~ 
     *  
     */  
    /*public static void main(String[] args) throws IOException {  

        _Run(); 

    }*/  

    /* 
     *  
     *     基于物品的实现~ 
     *  
     */  






    static int usersum = 20836;     //用户数  
    static int itemsum = 200;   //物品总数  
    static int N = 3;           //推荐个数  
    static int[][] train; //训练集合user item rate矩阵  
    static int[][] test;//测试集合user item rate矩阵  
    static double[][] trainuseritem; //训练集合user item 兴趣程度 矩阵  
    static int[][] recommend;  //为每个用户推荐N个物品  
    static simi [][]simiItem; //排序后的相似性矩阵  
    static double [][]itemsim; //未排序的相似性矩阵  
    static String road = "data/6 总评论情感分析结果/酒店-评论(已转化).txt";  //数据路径,格式为用户编号::物品编号::评分  
    static String road2 = "data/10 推荐/(已转化)天河酒店ID.txt";  
    static String road3 = "data/10 推荐/物品推荐.txt";  
    public static String road4 = "data/10 推荐/(已转化)用户ID.txt";  
    public static class simi  
    {  
        double value; //相似值  
        int num;     //相似物品号  
    };  

    public static void _Run() throws IOException {  

        get_user_hotel_num();  
        System.out.println("usersum: "+usersum);  
        System.out.println("itemsum: "+itemsum);  
        train = new int[itemsum][usersum]; train[0][0] = 0; //训练集合user item rate矩阵  
        test = new int[itemsum][usersum]; test[0][0] = 0;  //测试集合user item rate矩阵  
        trainuseritem =   
                new double[usersum][itemsum]; trainuseritem[0][0] = 0.0; //训练集合user item 兴趣程度 矩阵  
        recommend = new int[usersum][N]; recommend[0][0] = 0;  //为每个用户推荐N个物品  
        simiItem = new simi[itemsum][itemsum]; //排序后的相似性矩阵  

        itemsim = new double[itemsum][itemsum]; //未排序的相似性矩阵  

        int i,j,k = 8;        //去用户的k个最近邻居(相似度最高)来计算推荐物品  

        for(i = 0 ;i < itemsum;++i)  
            for(j = 0 ;j < itemsum;++j) simiItem[i][j] = new simi();  

        System.out.println("1.训练集");  
        SplitData(8,1);   
        //输出初始化的矩阵  
        /*for (i=0;i<10;i++) 
        { 
            System.out.println("Item"+i+":  "); 
            for (j=0;j<5;j++) 
            { 
                System.out.print(train[i][j]+"  "); 
            } 
            System.out.println(); 
        }*/  


        System.out.println("2.计算物品之间相似性,得到相似性矩阵");  
        for (i=0;i<itemsum;i++)   
        {  
            for (j=0;j<itemsum;j++)  
            {  
                itemsim[i][j] = Simility(train[i],train[j]);  
                if(i == j) itemsim[i][j] = 0;   //此处有bug,已修改  
            }  
        }  
        //输出物品相似性矩阵  
        /*for (i=0;i<5;i++) 
        { 
            System.out.println("Item"+":  "); 
            for (j=0;j<100;j++) 
            { 
                System.out.print(itemsim[i][j]+"  "); 
            } 
            System.out.println(); 
        }*/  

        System.out.println("3.物品相似度由高到低排序");  
        sort();  
        //输出排序后的物品相似性矩阵  

        /*for(i=0;i<5;i++) 
        { 
            System.out.println("Item"+i+":  "); 
            for(j=0;j<10;j++) 
            { 
                System.out.print(simiItem[i][j].num+","+simiItem[i][j].value+" "); 
            } 
            System.out.println(); 
        }*/  




        System.out.println("4.得到用户对物品兴趣程度的矩阵");  
        for(i=0;i<usersum;i++)  
        {  
            for(j=0;j<itemsum;j++)  
            {  
                if(train[j][i]==0)            //如果用户i对物品j没有过行为,才计算i对j的预测兴趣程度  
                    //trainuseritem[i][j]=  
                    getUserLikeItem(i,j,k);  

            }  
        }  
        //输出用户对物品兴趣的矩阵  
        /*for (i=0;i<5;i++) 
        { 
            System.out.println("User_ins"+i+":  "); 
            for (j=0;j<10;j++) 
            { 
                System.out.print(trainuseritem[i][j]+"  "); 
            } 
            System.out.println(); 
        }*/  
        System.out.println("5.通过物品兴趣程度,推荐前N个");  
        getRecommend();  
        //输出推荐矩阵  
        for (i=0;i<200;i++)  
        {  
            System.out.println("user"+(i+1));  
            for (j=0;j<N;j++)  
            {  
                if(recommend[i][j] != 0)  
                    System.out.print(recommend[i][j]+" ");  
            }  
            System.out.println();  
        }  

        System.out.println("6.输出到txt");  
        out_txt(road2,road3);  


    }  
    public static void get_user_hotel_num() throws IOException {   
        FileReader data_about = new FileReader(road2);  
        BufferedReader read_data_about=new BufferedReader(data_about);  
        int num = 0;  
        while(read_data_about.readLine() != null) num++;  
        itemsum = num;  
        data_about.close();  
        read_data_about.close();  

        FileReader data_about2 = new FileReader(road4);  
        BufferedReader read_data_about2=new BufferedReader(data_about2);  
        num = 0;  
        while(read_data_about2.readLine() != null) num++;  
        usersum = num;  
        data_about2.close();  
        read_data_about2.close();  

    }  

    public static void out_txt(String r1,String r2) throws IOException {  
        FileReader data_about=new FileReader(r1);  
        BufferedReader read_data_about=new BufferedReader(data_about);  
        FileWriter fw=new FileWriter(r2);  

        String id; //暂存文件一行记录  
        int id_num = 1;  
        String []tmps = new String[5];  
        String []hotel = new String[201];  
        while((id=read_data_about.readLine())!=null){   
            tmps = id.split("::");  
            String hotelname = tmps[0];  
            String number = tmps[1];  
            hotel[Integer.parseInt(number)] = hotelname;  
        }  
        int i,j;  
        for (i=0;i<usersum;i++)  
        {  
            fw.write("user"+(i+1));  
            for (j=0;j<N;j++)  
            {  
                if(recommend[i][j] != 0)  
                    fw.write("::"+hotel[recommend[i][j]]);  
            }  
            fw.write("\r\n");  
        }  
        data_about.close();  
        read_data_about.close();  
        fw.close();  
    }  

    //拆分数据集为测试集test和训练集trainuser,其中1/m为测试集,取不同的k<=m-1值 在相同的随即种子下可得到不同的测/训集合  
    public static int SplitData(int m, int k)  
    {        

        int usernum = 0;  
        int itemnum = 0;  

        try {  
            FileReader data_about=new FileReader(road);  
            BufferedReader read_data_about=new BufferedReader(data_about);  
            String s2; //暂存文件一行记录  
            try {  
                while((s2=read_data_about.readLine())!=null){  
                    //寻找数据集每条记录对应的用户号和物品号  
                    int sum = 0,ok = 0;  
                    for(int m_ = 0;m_ < s2.length();++m_) {  
                        if(s2.charAt(m_) != ':')  
                            sum = sum * 10 + s2.charAt(m_) - 48;  
                        else {  
                            m_ += 1;  
                            if(ok == 0) {usernum = sum;ok = 1;}  
                            else {  
                                itemnum = sum;  
                                break;  
                            }  
                            sum = 0;  
                        }  
                    }  

                    if (usernum <= usersum && itemnum <= itemsum)  
                    {  
                        //if(System.currentTimeMillis()%(m-1)==k) //设置当前时间为随机种子  //判断随机产生0-7之间的随机数是否等于k  
                        //   test[itemnum-1][usernum-1] = 1;        //rate为评分,再此实验中只需统计有无评分的,无需讨论具体评分  
                        //else  
                           train[itemnum-1][usernum-1] = 1;  //用户号的物品号均从0开始算起,  
                    }  
                }  
            } catch (IOException e1) {  
                // TODO Auto-generated catch block  
                e1.printStackTrace();  
            }  
            try {  
                data_about.close();  
            } catch (IOException e) {  
                // TODO Auto-generated catch block  
                e.printStackTrace();  
            }  
            try {  
                read_data_about.close();  
            } catch (IOException e) {  
                // TODO Auto-generated catch block  
                e.printStackTrace();  
            }  
        } catch (FileNotFoundException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        }  

        return 1;  
    }  

    //利用训练集计算用户之间相似度  
    /* 计算向量ItemA和ItemB的相似性,返回值为ItemA和ItemB的相似度 */  
    public static double Simility(int[] ItemA, int[] ItemB)  
    {  
        int comUser = 0;                   //ItemA与ItemB的都被用户评论的用户个数  
        double simility = 0.0;  
        int countIa = 0;  
        int countIb = 0;  

        int i;  
        for (i=0;i<usersum;i++)      //此处有bug,已修改  
        {  
            if (ItemA[i]>0&&ItemB[i]>0)  
            {  
                comUser++;//查找ItemA与ItemB的都被用户评论的用户个数  
            }  
            if (ItemA[i]>0){  
                countIa++;//评论ItemA的用户数量  
            }  
            if (ItemB[i]>0){  
                countIb++;//评论ItemB的用户数量  
            }  
        }  
        double tem = Math.sqrt(countIa*countIb);  
        //double tem = 1;  
        //System.out.println(tem);  
        if(tem == 0)  
        {  
            return 0;  
        }  
        else  
        {  
            simility = comUser/tem;  
            return simility;  
        }  

    }  


    /*物品相似性矩阵排序(根据相似性由高到低排序)*/  
    public static void quickSort(int x, int start, int end) {     
        if (start < end) {     
            double base = simiItem[x][start].value; // 选定的基准值(第一个数值作为基准值)     
            double temp; // 记录临时中间值     
            int i_tmp;  
            int i = start, j = end;     
            do {     
                while ((simiItem[x][i].value > base) && (i < end))     
                    i++;     
                while ((simiItem[x][j].value < base) && (j > start))     
                    j--;     
                if (i <= j) {      
                    temp = simiItem[x][i].value;     
                    simiItem[x][i].value = simiItem[x][j].value;     
                    simiItem[x][j].value = temp;    
                    i_tmp = simiItem[x][i].num;     
                    simiItem[x][i].num = simiItem[x][j].num;     
                    simiItem[x][j].num = i_tmp;     
                    i++;     
                    j--;     
                }     
            } while (i <= j);     
            if (start < j)     
                quickSort(x, start, j);     
            if (end > i)     
                quickSort(x, i, end);     
        }     
    }    
    public static int sort()  
    {  
        for (int i=0;i<itemsum;i++)  
        {  

            for(int j = 0; j < itemsum; ++j) {  
                simiItem[i][j].num = j;  
                simiItem[i][j].value = itemsim[i][j];  
            }  
            quickSort(i,0,itemsum-1);  
        }  
        return 1;  

    }  

    //得到用户i对物品j预测兴趣程度,用于推荐  
    public static double getUserLikeItem(int i,int j,int k)  
    {  
        for(int x=0;x<k;x++)//从物品j最相似的k个物品中,找出用户i有过行为的物品  
        {  
            //System.out.println(simiItem[j][x].num);  
            if(train[simiItem[j][x].num][i]>0)//若这个用户同样对相似物品也有过行为  
            {  
                trainuseritem[i][j]+=simiItem[j][x].value;  
            }  
        }  
        return trainuseritem[i][j];  
    }  

    /*通过物品兴趣程度,推荐前N个*/   
    public static int getRecommend() //有bug,已修改  
    {  
        int maxnum;//当前最感兴趣物品号  
        for(int i=0;i<usersum;i++)  
        {  

            int []finflag = new int[itemsum];  

            for (int x=0;x<N;x++)//推荐N个  
            {  
                maxnum = 0;  
                while(maxnum < itemsum && finflag[maxnum]!=0)  
                    maxnum++;  
                for (int j=0;j<itemsum;j++)  //每循环一次就寻找此次感兴趣最大的物品  
                {  

                    if (trainuseritem[i][maxnum] < trainuseritem[i][j]&&finflag[j]==0)  
                        maxnum = j;  
                }  
                finflag[maxnum] = 1;  
                if(trainuseritem[i][maxnum] != 0)  
                    recommend[i][x]=maxnum+1;//recommend数组从1开始使用  
            }  
        }  
        return 1;  
    }  

}  


[java] view plain copy
package WjPack;  

import java.io.BufferedReader;  
import java.io.FileNotFoundException;  
import java.io.FileReader;  
import java.io.FileWriter;  
import java.io.IOException;  
import java.util.ArrayList;  
import java.util.HashMap;  
import java.util.HashSet;  
import java.util.Iterator;  
import java.util.Map;  
import java.util.Set;  

public class new_ojld_dis {  
    /* 
     *  
     *     主函数~ 
     *  
     */   

    /*public static void main(String[] args) throws IOException { 

        run(); 

    }*/  


    /* 
     *  
     *     基于用户的实现~ 
     *  
     */  


    static String road_main = "data";  
    static String road = road_main + "/6 总评论情感分析结果/酒店-评论(已转化).txt";//数据路径,格式为用户编号::物品编号::评分  
    static String road2 = road_main + "/10 推荐/(已转化)用户-关注.txt";  //数据路径,格式为用户编号::关注编号  
    static String road3 = road_main + "/10 推荐/(已转化)天河酒店ID.txt";  
    static String road4 = road_main + "/10 推荐/用户推荐.txt";    
    static String road5 = road_main + "/10 推荐/(已转化)用户ID.txt";  

    static int usersum = 20836;     //用户数  
    static int itemsum = 200;   //物品总数  

    static Map<String,HashMap<String,Integer>> score = new HashMap<String,HashMap<String,Integer>>();  
    static Set<String> userSet = new HashSet<String>();  
    static Set<String> filmSet = new HashSet<String>();  

    static FileWriter txtw;  

    static String tjhotel = "";  

    static ArrayList<String> arr;  
    static {  

        arr = new ArrayList<String>();  
        try {  
            score = get_score_from_road();  
        } catch (IOException e) {  
        }  
    }  

    public static void run() throws IOException {  
        txtw=new FileWriter(road4);  

        get_hotelid(road3);  
        for(int m = 0;m < 20836;++m) {  
            tjhotel = "";  
            new_ojld_dis.outNearbyUserList(arr.get(m));  

            if(tjhotel.length() > 1)  
                txtw.write("user"+arr.get(m)+tjhotel+"\r\n");  

        }  
        txtw.close();  

    }  
    public static void init() {  
        try {  
            get_user_hotel_num();  
        } catch (IOException e1) {  
            // TODO Auto-generated catch block  
            e1.printStackTrace();  
        }  
        System.out.println(usersum);  
        System.out.println(itemsum);  

    }  
    public static void get_user_hotel_num() throws IOException {   
        FileReader data_about = new FileReader(road3);  
        BufferedReader read_data_about=new BufferedReader(data_about);  
        int num = 0;  
        while(read_data_about.readLine() != null) num++;  
        itemsum = num;  
        data_about.close();  
        read_data_about.close();  

        data_about = new FileReader(road5);  
        read_data_about=new BufferedReader(data_about);  
        num = 0;  
        while(read_data_about.readLine() != null) num++;  
        usersum = num;  
        data_about.close();  
        read_data_about.close();  

    }  

    public static Map<String,HashMap<String,Integer>> get_score_from_road() throws IOException {  
        init();  
        String []tmps = new String[5];  
        FileReader data_about=new FileReader(road);  
        BufferedReader read_data_about=new BufferedReader(data_about);  
        String s2; //暂存文件一行记录  
        String usertmp = null;  
        Map<String,HashMap<String,Integer>> score = new HashMap<String,HashMap<String,Integer>>();  
        HashMap<String,Integer> tempScore = new HashMap<String,Integer>();  
        while((s2=read_data_about.readLine())!=null){  

            //寻找数据集每条记录对应的用户号和物品号  
            tmps = s2.split("::");  
            String username = tmps[0];  
            String filmname = tmps[1];  
            Integer socrename = Integer.valueOf(tmps[2]);  

            if(usertmp == null) {usertmp = username;arr.add(usertmp);}  
            else if(!usertmp.equals(username)) {  
                score.put(usertmp, tempScore);  
                usertmp = username;  
                arr.add(usertmp);  
                tempScore = new HashMap<String,Integer>();  
            }  

            tempScore.put(filmname, socrename);        
        }  
        score.put(usertmp, tempScore);  
        arr.add(usertmp);  
        return score;  
    }  



    public static void outNearbyUserList(String user) throws IOException {  
        FileReader data_about=new FileReader(road2);  
        BufferedReader read_data_about=new BufferedReader(data_about);  

        Map<String,Double> scores = new HashMap<String,Double>();  

        String []tmps = new String[5];  
        HashMap<String,Integer> thing = new HashMap<String,Integer>();  

        String id; //暂存文件一行记录  
        int num = 0;  
        while((id=read_data_about.readLine())!=null){   

            tmps = id.split("::");  
            String username = tmps[0];  
            String fansname = tmps[1];  

            if(username.equals(user)) {  
                thing.put(fansname, 1);  
            }  

        }  

        for (int m = 0;m < arr.size()-1;++m) {  
            String tempUser = arr.get(m);  
            if (tempUser.equals(user) || !thing.containsKey(tempUser)) {  
                continue;  
            }  

            double score = getOSScore(user, tempUser);  

            if(score >= 0)  
                scores.put(tempUser, score);  
        }  
        data_about.close();  
        read_data_about.close();  

    }  

    private static Double getOSScore(String user1, String user2) throws NumberFormatException, IOException {  
        HashMap<String,Integer> user1Score = (HashMap<String,Integer>) score.get(user1);  
        HashMap<String,Integer> user2Score = (HashMap<String,Integer>) score.get(user2);  
        double totalscore = 100;  
        ArrayList<String> hobby = new ArrayList<String>();  
        Iterator<String> it = user1Score.keySet().iterator();  
        while (it.hasNext()) {  
            String film = (String) it.next();  
            int a1 = (Integer) user1Score.get(film);  
            //System.out.println(film);  
            if(user2Score.get(film) == null) continue;   
            int b1 = (Integer) user2Score.get(film);  
            int a = a1 * a1 - b1 * b1;  
            //System.out.println(Math.abs(a));  
            totalscore = Math.sqrt(Math.abs(a));  
        }  
        if(totalscore == 0) {  
            int ok = 0;  
            it = user2Score.keySet().iterator();  
            if(it != null) {  
                while (it.hasNext()) {  
                    String film = (String) it.next();  
                    if(user1Score.get(film) == null) {  
                        if(ok == 0) {  
                            ok = 1;   
                        }  
                        tjhotel+="::"+hotel[Integer.parseInt(film)];  

                    }  

                }  


            }  


        }  
        return totalscore;  
    }  

    static String []hotel = new String[201];  
    public static void get_hotelid(String r1) throws IOException {  
        FileReader data_about=new FileReader(r1);  
        BufferedReader read_data_about=new BufferedReader(data_about);  

        String id; //暂存文件一行记录  
        String []tmps = new String[5];  

        while((id=read_data_about.readLine())!=null){   
            tmps = id.split("::");  
            String hotelname = tmps[0];  
            String number = tmps[1];  
            hotel[Integer.parseInt(number)] = hotelname;  
        }  
        data_about.close();  
        read_data_about.close();  
    }  

}  

课程设计写的代码,可以用的,不过不写注释,但也不是很难看懂,先了解以下原理再看看代码就差不多了~



PS:因为抓到的用户ID和酒店都是数据都是类似434132这么长的编号,为了方便我用数组存,事先我全部转化为1开始的编号了。比如说有两个4654654,32131321,那我就转成1,2了,处理完推荐完再把1,2转成4654654,32131321

猜你喜欢

转载自blog.csdn.net/w690333243/article/details/80304808