用java实现文本词频统计并且把结果输入到指定的文件中

import java.io.BufferedReader;

import java.io.File;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.FileReader;

import java.io.IOException;

import java.io.PrintStream;

import java.io.Reader;

import java.util.ArrayList;

import java.util.Collection;

import java.util.Collections;

import java.util.Comparator;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

import java.util.Map.Entry;

import java.util.SortedMap;

import java.util.TreeMap;

 

import javax.jws.Oneway;

 

public class WordCount {

 

    public static void main(String[] args) throws Exception {

       PrintStream ps = new PrintStream(new FileOutputStream("2017113782.txt"));

       System.setOut(ps);

       BufferedReader br = new BufferedReader(new FileReader("C:/work/word.txt"));

       List <String> list = new ArrayList<String>();

       String readLine = null;

       while((readLine=br.readLine())!=null){

           String [] onlyWord = readLine.split("[^a-zA-Z]");//只有字母

           for(String word : onlyWord){

              if(word.length()!=0){

                  list.add(word);

              }

           }

       }

       br.close();//关闭流操作

       Map<String,Integer> map = new TreeMap<String, Integer>();//利用 TreeMap进行统计并且排序

       for(String mapWord : list){

           if(map.get(mapWord)!=null){

              map.put(mapWord, map.get(mapWord)+1);

           }else{

              map.put(mapWord, 1);

           }

       }

       SortMap(map);

    }

    public static void SortMap(Map<String,Integer>oldmap){

       ArrayList<Map.Entry<String, Integer>>newList = new ArrayList<Map.Entry<String,Integer>>(oldmap.entrySet());

       Collections.sort(newList,new Comparator<Map.Entry<String, Integer>>() {

           @Override

           public int compare(Entry<String, Integer> o1,

                  Entry<String, Integer> o2) {

              return o2.getValue()-o1.getValue();//

           }

          

       });

       for(int i=0;i<newList.size();i++){

           System.out.println(newList.get(i).getKey()+": "+ newList.get(i).getValue());

       }

    }

 

}

the: 12

China: 9

Boeing: 6

Eastern: 6

Airlines: 5

MAX: 5

to: 5

and: 4

for: 4

has: 4

in: 4

The: 3

compensation: 3

it: 3

on: 3

with: 3

Paper: 2

a: 2

been: 2

by: 2

from: 2

had: 2

issue: 2

more: 2

of: 2

two: 2

According: 1

Air: 1

At: 1

Bloomberg: 1

Co: 1

Corp: 1

Fourteen: 1

Ltd: 1

March: 1

Southern: 1

Tuesday: 1

Wednesday: 1

about: 1

adding: 1

after: 1

airline: 1

an: 1

any: 1

are: 1

as: 1

ask: 1

asks: 1

caused: 1

cited: 1

claims: 1

clients: 1

communication: 1

communications: 1

companies: 1

company: 1

confirmed: 1

controllable: 1

crashes: 1

deadly: 1

delivery: 1

disruption: 1

effect: 1

first: 1

grounded: 1

grounding: 1

have: 1

impact: 1

industry: 1

its: 1

jets: 1

knowledge: 1

last: 1

late: 1

limited: 1

lives: 1

lodged: 1

losses: 1

makes: 1

matter: 1

months: 1

move: 1

news: 1

not: 1

officially: 1

outlet: 1

over: 1

people: 1

preliminary: 1

present: 1

previously: 1

quoted: 1

refused: 1

report: 1

representative: 1

s: 1

saying: 1

seek: 1

since: 1

six: 1

stay: 1

stoppages: 1

stressed: 1

suffered: 1

suspensions: 1

talk: 1

talks: 1

than: 1

that: 1

took: 1

under: 1

verified: 1

which: 1

whole: 1

will: 1

worldwide: 1

yet: 1

 

猜你喜欢

转载自blog.csdn.net/qq_41479464/article/details/91879423