Java进行词频统计并排序

要求:当前目录下给出一个名为"news.txt"的文件,该文件每个word是以空格间隔,标点符号已经被去除,请写出一个完整的可执行的Java程序,对"news.txt"文件中出现的,每个word进行词频统计,并按从大到小的顺序输出出现词频最高的10个word。

package com.lyp.exam;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;


public class exam2 {
	public static void main(String[] args) throws Exception {
		// 解析文本,保存为字符串数组
		File file = new File("=\news.txt");
		BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file));
		byte[] data = bis.readAllBytes();
		String s = new String(data);
		String[] words = s.split(" ");
		// 统计词频率
		HashMap<String,Integer> map = new HashMap<String, Integer>();
		for(int i = 0; i < words.length; i++) {
			if(map.containsKey(words[i]) == false) {
				map.put(words[i], 1);
			}else {
				map.replace(words[i], map.get(words[i])+1);
			}
		}
		
		//进行排序
		List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
		Collections.sort(list, new Comparator<Map.Entry<String , Integer>>() {
			@Override
			public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
				return o2.getValue().compareTo(o1.getValue());  
			}
			
		});
		//输出排序后结果
//		for (Map.Entry<String, Integer> mapping : list) {  
//            System.out.println(mapping.getKey() + ":" + mapping.getValue());  
//        }  
		
		//频率最高的十个单词
		for(int i=0;i<10;i++) {
			System.out.println(list.get(i).getKey() + "   " + list.get(i).getValue());
		}
	
	}
}




猜你喜欢

转载自blog.csdn.net/qq_14809159/article/details/102943803