要求:当前目录下给出一个名为"news.txt"的文件,该文件每个word是以空格间隔,标点符号已经被去除,请写出一个完整的可执行的Java程序,对"news.txt"文件中出现的,每个word进行词频统计,并按从大到小的顺序输出出现词频最高的10个word。
package com.lyp.exam;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
public class exam2 {
public static void main(String[] args) throws Exception {
// 解析文本,保存为字符串数组
File file = new File("=\news.txt");
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file));
byte[] data = bis.readAllBytes();
String s = new String(data);
String[] words = s.split(" ");
// 统计词频率
HashMap<String,Integer> map = new HashMap<String, Integer>();
for(int i = 0; i < words.length; i++) {
if(map.containsKey(words[i]) == false) {
map.put(words[i], 1);
}else {
map.replace(words[i], map.get(words[i])+1);
}
}
//进行排序
List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
Collections.sort(list, new Comparator<Map.Entry<String , Integer>>() {
@Override
public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
//输出排序后结果
// for (Map.Entry<String, Integer> mapping : list) {
// System.out.println(mapping.getKey() + ":" + mapping.getValue());
// }
//频率最高的十个单词
for(int i=0;i<10;i++) {
System.out.println(list.get(i).getKey() + " " + list.get(i).getValue());
}
}
}