一、浏览器的控制台(consloe)引入js
var jquery = document.createElement('script');
jquery.src = 'https://code.jquery.com/jquery-3.3.1.min.js';
document.getElementsByTagName('head')[0].appendChild(jquery);
其中https://code.jquery.com/jquery-3.3.1.min.js
可以替换其他js。
二、浏览器识别不了ES2019的js编码格式
Babel 是一个 JavaScript 编译器
地址:https://www.babeljs.cn/
将 JavaScript 语法输出为浏览器识别的语法。
三、数据清洗工具
OpenRefine 、Hawk、DataWrangler
OpenRefine 地址:http://openrefine.org/download.html
Hawk 地址:https://github.com/ferventdesert/Hawk
DataWrangler 地址:http://vis.stanford.edu/wrangler/app/
四、并发处理方法
package com.citydo.checkandbigdataquery.parallel;
import java.util.concurrent.*;
import java.util.stream.LongStream;
/**
* 几种并行处理方式
* @author nick
*/
public class Parallel extends java.util.concurrent.RecursiveTask<Long> {
private static final long serialVersionUID = 1L;
public static long[] numbers;
public static final int THRESHOLD = 10_000;
private static long allSum;
public final long[] number;
private final int start;
private final int end;
public static void main(String[] args) throws Exception {
test1();
test2();
test3();
forkJoinSum(10_000_000);
parallelRangedSum(10_000_000);
}
/**
* 单线程处理
*/
public static void test1(){
numbers = LongStream.rangeClosed(1, 10_00_000).toArray();
long sum = 0;
int length = numbers.length;
for (int i = 0; i <length; i++) {
sum += numbers[i];
}
System.out.println(sum);
}
/**
* 多线程方式
* 并行处理
* @throws Exception
*/
public static void test2() throws Exception {
numbers = LongStream.rangeClosed(1, 10_000_000).toArray();
int taskSize = (int) (numbers.length / THRESHOLD);
for (int i = 1; i <= taskSize; i++) {
final int key = i;
new Thread(new Runnable() {
@Override
public void run() {
sumAll(sum((key - 1) * THRESHOLD, key * THRESHOLD));
}
}).start();
}
Thread.sleep(100);
System.out.println("allSum = " + getAllSum());
}
private static synchronized long sumAll(long threadSum) {
return allSum += threadSum;
}
public static synchronized long getAllSum() {
return allSum;
}
private static long sum(int start, int end) {
long sum = 0;
for (int i = start; i < end; i++) {
sum += numbers[i];
}
return sum;
}
/**
* 线程池
* 并行处理
*/
public static void test3() throws Exception{
numbers = LongStream.rangeClosed(1, 10_000_000).toArray();
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() + 1);
CompletionService<Long> completionService = new ExecutorCompletionService<Long>(executor);
int taskSize = (int) (numbers.length / THRESHOLD);
for (int i = 1; i <= taskSize; i++) {
final int key = i;
completionService.submit(new Callable<Long>() {
@Override
public Long call() throws Exception {
return sum((key - 1) * THRESHOLD, key * THRESHOLD);
}
});
}
long sumValue = 0;
for (int i = 0; i < taskSize; i++) {
sumValue += completionService.take().get();
}
// 所有任务已经完成,关闭线程池
System.out.println("sumValue = " + sumValue);
executor.shutdown();
}
/**
* fork / join框架
* 并行处理
* @return
*/
@Override
protected Long compute() {
int length = end - start;
if (length <= THRESHOLD) {
return computeSequentially();
}
Parallel leftTask = new Parallel(numbers, start, start + length / 2);
leftTask.fork();
Parallel rightTask = new Parallel(numbers, start + length / 2, end);
Long rightResult = rightTask.compute();
// 注:join方法会阻塞,因此有必要在两个子任务的计算都开始之后才执行join方法
Long leftResult = leftTask.join();
return leftResult + rightResult;
}
private long computeSequentially() {
long sum = 0;
for (int i = start; i < end; i++) {
sum += numbers[i];
}
return sum;
}
public static long forkJoinSum(long n) {
long[] numbers = LongStream.rangeClosed(1, n).toArray();
ForkJoinTask<Long> task = new Parallel(numbers);
return new ForkJoinPool().invoke(task);
}
public Parallel(long[] number) {
this(number, 0, number.length);
}
private Parallel(long[] number, int start, int end) {
this.number = number;
this.start = start;
this.end = end;
}
/**
* 流方式
* 并发处理
*/
public static long parallelRangedSum(long n) {
return LongStream.rangeClosed(1, n).parallel().reduce(0L, Long::sum);
}
}
五、字符串包含关键词判断
package com.citydo.checkandbigdataquery.ahocorasick;
import org.ahocorasick.trie.Emit;
import org.ahocorasick.trie.Trie;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.regex.Pattern;
public class AhoCorasick {
public static void main(String[] args) {
String inputString = "$$$$$$ 我们 测试 谢谢";
Trie trie = Trie.builder().onlyWholeWords().addKeywords("$").build();
Collection<Emit> emits = trie.parseText(inputString);
emits.forEach(System.out::println);
System.out.println(emits.size());
}
/**
* 判断是否包含关键词
* @param inputString
* @param words
* @return
*/
public static boolean containsWordsAhoCorasick(String inputString, String[] words) {
Trie trie = Trie.builder().onlyWholeWords().addKeywords(words).build();
Collection<Emit> emits = trie.parseText(inputString);
emits.forEach(System.out::println);
boolean found = true;
for(String word : words) {
boolean contains = Arrays.toString(emits.toArray()).contains(word);
if (!contains) {
found = false;
break;
}
}
return found;
}
/**
* 判断是否包含关键词
* @param inputString
* @param words
* @return
*/
public static boolean containsWordsArray(String inputString, String[] words) {
List<String> inputStringList = Arrays.asList(inputString.split(" "));
List<String> wordsList = Arrays.asList(words);
return inputStringList.containsAll(wordsList);
}
/**
* 判断是否包含关键词
* @param inputString
* @param words
* @return
*/
public static boolean containsWordsJava8(String inputString, String[] words) {
List<String> inputStringList = Arrays.asList(inputString.split(" "));
List<String> wordsList = Arrays.asList(words);
return wordsList.stream().allMatch(inputStringList::contains);
}
/**
* 判断是否包含关键词
* @param inputString
* @param words
* @return
*/
public static boolean containsWordsPatternMatch(String inputString, String[] words) {
StringBuilder regexp = new StringBuilder();
for (String word : words) {
regexp.append("(?=.*").append(word).append(")");
}
Pattern pattern = Pattern.compile(regexp.toString());
return pattern.matcher(inputString).find();
}
/**
* 判断是否包含关键词
* @param inputString
* @param words
* @return
*/
public static boolean containsWordsIndexOf(String inputString, String[] words) {
boolean found = true;
for (String word : words) {
if (inputString.indexOf(word) == -1) {
found = false;
break;
}
}
return found;
}
/**
* 判断是否包含关键词
* @param inputString
* @param words
* @return
*/
public static boolean containsWords(String inputString, String[] words) {
boolean found = true;
for (String item : words) {
if (!inputString.contains(item)) {
found = false;
break;
}
}
return found;
}
}
<!--Aho-Corasick算法用于使用多个关键字进行文本搜索。无论我们搜索多少关键字或文本长度是多长,它都具有O(n)时间复杂度-->
<dependency>
<groupId>org.ahocorasick</groupId>
<artifactId>ahocorasick</artifactId>
<version>0.4.0</version>
</dependency>
六、计算单词个数
package com.citydo.checkandbigdataquery.map;
import lombok.var;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
/**
* @author nick
*/
public class merge {
public static void main(String[] args) {
test1();
test2();
test3();
}
/**
* 计算单词个数
*/
private static void test1(){
List<String> words = Arrays.asList("Foo", "Bar", "Foo", "Buzz", "Foo", "Buzz", "Fizz", "Fizz");
var map = new HashMap<String, Integer>(words.size());
words.forEach(word -> {
var prev = map.get(word);
if (prev == null) {
map.put(word, 1);
} else {
map.put(word, prev + 1);
}
});
System.out.println(map.toString());
}
/**
* 计算单词个数
*/
private static void test2(){
List<String> words = Arrays.asList("Foo", "Bar", "Foo", "Buzz", "Foo", "Buzz", "Fizz", "Fizz");
var map = new HashMap<String, Integer>(words.size());
words.forEach(word -> {
map.putIfAbsent(word, 0);
map.put(word, map.get(word) + 1);
});
System.out.println(map.toString());
}
/**
* 计算单词个数
*/
private static void test3(){
List<String> words = Arrays.asList("Foo", "Bar", "Foo", "Buzz", "Foo", "Buzz", "Fizz", "Fizz");
var map = new HashMap<String, Integer>(words.size());
words.forEach(word -> {
map.putIfAbsent(word, 0);
map.computeIfPresent(word, (w, prev) -> prev + 1);
});
System.out.println(map.toString());
}
}