fork/join 使用词云统计

使用fork/join 词云统计

创建线程池
    //使用 Fork/Join
    ForkJoinPool forkJoinPool = new ForkJoinPool();
	//提交任务
    Map<String, Integer> map = forkJoinPool.invoke(new ForkRecursiveTask(crawlerData));
 

fork 主线程

  • extends RecursiveTask<Map<String, Integer>> 继承 RecursiveTask 重写 compute
  • 设置子线程(join )批量执行的数量 提交到 主线程
  • 执行子线程
package com.ikfti.task;

import com.ikfti.model.OriginalPostVo;
import org.apache.commons.lang3.StringUtils;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ForkJoinTask;
import java.util.concurrent.RecursiveTask;

/**
 * @author: craywen
 * @date: 2020-07-24 16:32
 * @desc:
 */
public class ForkRecursiveTask  extends RecursiveTask<Map<String, Integer>> {
    
    

    private static final long serialVersionUID = 1L;
    private  List<OriginalPostVo>  originalPostVoList = null;

    public ForkRecursiveTask(List<OriginalPostVo>  originalPostVos) {
    
    
        originalPostVoList=originalPostVos;
    }

    @Override
    protected Map<String, Integer> compute() {
    
    
        //总join数
        List<ForkJoinTask<Map<String, Integer>>> tasks = new ArrayList<>();
        List<String> stringList= new ArrayList<>();
        for (OriginalPostVo originalPostVo : originalPostVoList) {
    
    
            if(StringUtils.isNotBlank(originalPostVo.getTitle() )||StringUtils.isNotBlank(originalPostVo.getContent() )){
    
    
                stringList.add(originalPostVo.getTitle()+originalPostVo.getContent());
            }
        }
        List<List<String>> lists = groupList(stringList);
        for (List<String> list : lists) {
    
    
            WordCloudTask cloudTask= new WordCloudTask(list);
            //子进程添加到主进程
            tasks.add(cloudTask.fork());
        }
        Map<String, Integer> result = new HashMap<>();
        for(ForkJoinTask<Map<String, Integer>> task : tasks){
    
    
            Map<String, Integer> map = task.join();
            for(String key : map.keySet()){
    
    
                if(result.containsKey(key)){
    
    
                    result.put(key, result.get(key) + map.get(key));
                } else {
    
    
                    result.put(key, map.get(key));
                }
            }
        }
        return result;

    }
	//子进程执行长度
    public static List<List<String>> groupList(List<String> list) {
    
    
        List<List<String>> listGroup = new ArrayList<List<String>>();
        int listSize = list.size();
        //子集合的长度
        int toIndex = 200;
        for (int i = 0; i < list.size(); i += 200) {
    
    
            if (i + 200 > listSize) {
    
    
                toIndex = listSize - i;
            }
            List<String> newList = list.subList(i, i + toIndex);
            listGroup.add(newList);
        }
        return listGroup;
    }

    public static void main(String[] args) {
    
    
        List<String> list = new ArrayList<>();
        for (int i = 0; i < 101; i++) {
    
    
            list.add(i+"");
        }
        List<List<String>> lists = groupList(list);
        System.out.println("list:" + list.toString());
        System.out.println(lists);

    }

}

join子线程
  • 继承extends RecursiveTask 重写 compute
  • 业务逻辑处理
package com.ikfti.task;

import com.ikfti.model.OriginalPostVo;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;

import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.RecursiveTask;

/**
 * @author: craywen
 * @date: 2020-07-24 16:35
 * @desc:
 */
public class WordCloudTask extends RecursiveTask<Map<String, Integer>> {
    
    
    private static final long serialVersionUID = 1L;
    private final List<String> contents;
    public WordCloudTask(List<String> file) {
    
    
            contents =file;
    }

    /**
     * 业务逻辑处理,相当于map
     */
    @Override
    protected Map<String, Integer> compute() {
    
    
        Map<String, Integer> map = new HashMap<>();
        for(String content : contents){
    
    
           /* String[] strs = content.split(" ");
            for(String str : strs){
                if(map.containsKey(str)){
                    int val = map.get(str);
                    map.put(str, val+1);
                } else {
                    map.put(str, 1);
                }
            }*/
            //分词统计 
            IKSegmenter ikSegmenter = new IKSegmenter(new StringReader(content),true);
            try {
    
    
                Lexeme lexeme;
                while ((lexeme = ikSegmenter.next()) != null) {
    
    
                    final String text = lexeme.getLexemeText();
                    if (text.length() > 1) {
    
    
                        //递增
                            if (map.containsKey(text)) {
    
    
                                map.put(text, map.get(text) + 1);
                            } else {
    
    //首次出现
                                map.put(text, 1);
                            }
                    }
                }
            } catch (IOException e) {
    
    
                e.printStackTrace();
            }
        }
        return map;
    }

}

效果图

在这里插入图片描述

  • 参考 https://blog.csdn.net/mn960mn/article/details/52595844?utm_source=blogxgwz7

猜你喜欢

转载自blog.csdn.net/qq_38893133/article/details/107759674