学习Java几个月了。
做ACM是突然对字符串匹配感兴趣。
就总结以下Java字符串匹配的几种方法。
但是通过时间记录。结果不是很理想。
可能是字符串的长度有点短。
导致算法区别不是很大。
但是发现index of是比较快的。
下面有注释。
package one;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 字符串匹配 算法
*
* @author 轩xuan
*
*/
public class OK {
/**
* 母串中子串的匹配个数
*
* @param args
*/
// 记录程序运行时间
static long time_暴力 = 0;
static long time_KMP = 0;
static long time_JavaIndexOf = 0;
static long time_正则表达式 = 0;
static long time_split = 0;
// 主方法
public static void main(String[] args) {
// 输入母串和子串
String mother = "94189123456469584185165156415615616894156132513202195609818651864165165486741874948464184861564984864894616479278678675527737975634465615878646565845894175691556473152321418434161555648164616546165411415153151"
+ "123456123456122311234457867123456867878675781056098156191234568121031544854507374682341456234589167664865567162341125824333115155416841561494185469484161864648641658461468468418642161581211132"
+ "16156156151654156156454187496184156468123456931482309815609871950320464563644589345123498491123354911311123456121312368584861641849641684846849816541741749498491513161068489114";
String son = "123456";
// 返回匹配个数
int ans_暴力 = Violence(mother, son);
int ans_KMP = KMP(mother, son);
int ans_JavaIndexOf = JavaIndexOf(mother, son);
int ans_正则表达式 = Regular_expression(mother, son);
int ans_split = JavaSplit(mother, son);
// 打印匹配个数
System.out.println("\t匹配算法\t\t匹配个数\t\t匹配时间(纳秒)");
System.out.println("\t暴力查找:\t\t" + ans_暴力 + "\t\t" + time_暴力);
System.out.println("\tKMP 算法:\t" + ans_KMP + "\t\t" + time_KMP);
System.out.println("\t字符串IndexOf:\t" + ans_JavaIndexOf + "\t\t" + time_JavaIndexOf);
System.out.println("\t正则表达式:\t" + ans_正则表达式 + "\t\t" + time_正则表达式);
System.out.println("\t字符串Split:\t" + ans_split + "\t\t" + time_split);
}
// 1. 暴力匹配 复杂度:O(n*m)
public static int Violence(String mother, String son) {
int ans = 0;// 记录
char[] str_mother = mother.toCharArray();
char[] str_son = son.toCharArray();
long begin = System.nanoTime();
int i = 0, j = 0;// 两个索引
while (i < str_mother.length) {
if (str_mother[i] == str_son[j]) {
i++;
j++;
} else {
i -= j - 1;
j = 0;
}
if (j == str_son.length) {
ans++;
i -= j - 1;
j = 0;
}
}
long end = System.nanoTime();
time_暴力 = end - begin;
return ans;
}
// 2. KMP
public static int KMP(String mother, String son) {
int ans = 0;
long begin = System.nanoTime();
int[] next = new int[mother.length() + 1];
char[] str_mother = mother.toCharArray();
char[] str_son = son.toCharArray();
next[0] = 0;
// 找next[]数组
for (int i = 1, j = 0; i < str_mother.length; i++) {
while (j > 0 && str_mother[i] != str_mother[j]) {
j = next[j - 1];
}
if (str_mother[i] == str_mother[j]) {// 相同往后移动
j++;
}
next[i] = j;
}
// 开始匹配
for (int i = 0, j = 0; i < str_mother.length; i++) {
while (j > 0 && str_mother[i] != str_son[j]) {
j = next[j - 1];
}
if (str_mother[i] == str_son[j]) {
j++;
}
if (j == str_son.length) {
ans++;
j = 0;
}
}
long end = System.nanoTime();
time_KMP = end - begin;
return ans;
}
// 3. 利用Java自带的index of 匹配
public static int JavaIndexOf(String mother, String son) {
int ans = 0;
long begin = System.nanoTime();
int i = 0;
while ((i = mother.indexOf(son, i)) != -1) {
i += son.length();
ans++;
}
long end = System.nanoTime();
time_JavaIndexOf = end - begin;
return ans;
}
// 4. 正则表达式匹配
public static int Regular_expression(String mother, String son) {
int ans = 0;
long begin = System.nanoTime();
Pattern p = Pattern.compile(son);
Matcher m = p.matcher(mother);
while (m.find()) {
ans++;
}
long end = System.nanoTime();
time_正则表达式 = end - begin;
return ans;
}
// 5.Java字符串的split方法
public static int JavaSplit(String mother, String son) {
int ans = 0;
long begin = System.nanoTime();
//返回匹配到的下标数组
ans = mother.split(son).length - 1;
long end = System.nanoTime();
time_split = end - begin;
return ans;
}
}