AC自动机:先根据若干子串创造一颗字典树,再通过BFS构建fail指针,最后查询。
首先普通字典树的建立。
void add()
{
char s[200+10]; scanf("%s", s);
int rt = 0, len = strlen(s);
for(int i = 0; i < len; i ++) {
int now = s[i];
if(tree[rt].child[now] == 0) {
tree[rt].child[now] = cnt ++; //儿子中没有该字母则加点
}
rt = tree[rt].child[now];
if(i == len - 1) tree[rt].rail = num ++; //记录子串的结尾
}
}
fail指针的建立方法:
首先root的儿子们的fail全部指向root。
若a节点的fail指向b,则a节点的儿子c的fail指向b的儿子中和c相同字母的节点。
用一句话:fail[tree[now].child[i]] = tree[fail[now]].child[i]; (now为当前节点即上文a,i为now儿子的字母编号)
void build()
{
queue<int> Q;
for(int i = 0; i < 130; i ++) { //root节点的所有儿子压入队列中
if(tree[0].child[i]) { //且fail指向root(此处root为0)
Q.push(tree[0].child[i]);
}
}
while(!Q.empty()) {
int now = Q.front(); Q.pop();
for(int i = 0; i < 130; i ++) {
if(tree[now].child[i]) { //若 i 儿子存在
fail[tree[now].child[i]] = tree[fail[now]].child[i];
Q.push(tree[now].child[i]);
}
//若 i 儿子不存在
//就给now提供一个虚拟的i儿子
//tree[now].child[i]实际上是连到了它的fail上。
else tree[now].child[i] = tree[fail[now]].child[i];
}
}
}
最后匹配:从根节点开始在自动机上匹配,如果当前节点儿子有字符串上下一个字符,则转移到这个儿子节点。如果没有,就先转移到当前节点的fail,再查找fail是否有下一个字符的儿子。
int query(char s[])
{
memset(vis, 0, sizeof(vis));
cnt = 0;
int flag = 0;
int len = strlen(s), now = 0;
for(int i = 0; i < len; i ++) {
now = tree[now].child[s[i]];
for(int j = now; j && vis[j] == 0; j = fail[j]) {
if(tree[j].rail) {
ans[cnt++] = tree[j].rail;
flag = 1;
}
vis[j] = 1;
}
}
return flag;
}
完整代码:
#include <cstdio>
#include <cmath>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <iostream>
#include <string>
#include <queue>
#include <stack>
#include <vector>
#include <map>
#include <set>
#include <bitset>
using namespace std;
typedef long long ll;
//#define int ll
#define INF 0x3f3f3f3f3f3f3f3f
#define MAXM 2000000 + 10
#define MAXN 100000 + 10
const ll mod = 1e9 + 7;
#define fir first
#define sec second
int n, m, cnt = 1, num = 1;
struct TREE {
int rail;
int child[130];
} tree[MAXN];
int fail[MAXN];
int ans[MAXN], vis[MAXN];
void add()
{
char s[200+10]; scanf("%s", s);
int rt = 0, len = strlen(s);
for(int i = 0; i < len; i ++) {
int now = s[i];
if(tree[rt].child[now] == 0) {
tree[rt].child[now] = cnt ++;
}
rt = tree[rt].child[now];
if(i == len - 1) tree[rt].rail = num ++;
}
}
void build()
{
queue<int> Q;
for(int i = 0; i < 130; i ++) {
if(tree[0].child[i]) {
Q.push(tree[0].child[i]);
}
}
while(!Q.empty()) {
int now = Q.front(); Q.pop();
for(int i = 0; i < 130; i ++) {
if(tree[now].child[i]) {
fail[tree[now].child[i]] = tree[fail[now]].child[i];
Q.push(tree[now].child[i]);
}
else tree[now].child[i] = tree[fail[now]].child[i];
}
}
}
int query(char s[])
{
memset(vis, 0, sizeof(vis));
cnt = 0;
int flag = 0;
int len = strlen(s), now = 0;
for(int i = 0; i < len; i ++) {
now = tree[now].child[s[i]];
for(int j = now; j && vis[j] == 0; j = fail[j]) {
if(tree[j].rail) {
ans[cnt++] = tree[j].rail;
flag = 1;
}
vis[j] = 1;
}
}
return flag;
}
int main()
{
scanf("%d", &n);
for(int i = 0; i < n; i ++) add();
build();
scanf("%d", &m);
int tot = 0;
for(int i = 1; i <= m; i ++) {
char s[10000+10]; scanf("%s", s);
int ok = query(s);
if(ok) {
tot ++;
printf("web %d: ", i);
sort(ans, ans + cnt);
for(int j = 0; j < cnt; j ++)
printf("%d%c", ans[j], j == cnt - 1 ? '\n' : ' ');
}
}
printf("total: %d\n", tot);
}
/*
The WAM is F**KING interesting .
*/