HDU - 2896 (Aho-Corasick AC自动机)

题目链接

AC自动机:先根据若干子串创造一颗字典树,再通过BFS构建fail指针,最后查询。

首先普通字典树的建立。

void add()
{
	char s[200+10]; scanf("%s", s);
	int rt = 0, len = strlen(s);
	for(int i = 0; i < len; i ++) {
		int now = s[i];
		if(tree[rt].child[now] == 0) {
			tree[rt].child[now] = cnt ++;   //儿子中没有该字母则加点
		}

		rt = tree[rt].child[now];
		if(i == len - 1)  tree[rt].rail = num ++;   //记录子串的结尾
	}
}

fail指针的建立方法:

首先root的儿子们的fail全部指向root。

若a节点的fail指向b,则a节点的儿子c的fail指向b的儿子中和c相同字母的节点。

用一句话:fail[tree[now].child[i]] = tree[fail[now]].child[i]; (now为当前节点即上文a,i为now儿子的字母编号)

void build()
{
	queue<int> Q;
	for(int i = 0; i < 130; i ++) {     //root节点的所有儿子压入队列中
		if(tree[0].child[i]) {          //且fail指向root(此处root为0)
			Q.push(tree[0].child[i]);
		}
	}

	while(!Q.empty()) {
		int now = Q.front(); Q.pop();
		for(int i = 0; i < 130; i ++) {
			if(tree[now].child[i]) {    //若 i 儿子存在
                fail[tree[now].child[i]] = tree[fail[now]].child[i];
                Q.push(tree[now].child[i]);

			}
			//若 i 儿子不存在
			//就给now提供一个虚拟的i儿子
			//tree[now].child[i]实际上是连到了它的fail上。
			else tree[now].child[i] = tree[fail[now]].child[i];
		}
	}
}

最后匹配:从根节点开始在自动机上匹配,如果当前节点儿子有字符串上下一个字符,则转移到这个儿子节点。如果没有,就先转移到当前节点的fail,再查找fail是否有下一个字符的儿子。

int query(char s[])
{
    memset(vis, 0, sizeof(vis));
	cnt = 0;
	int flag = 0;
	int len = strlen(s), now = 0;
	for(int i = 0; i < len; i ++) {
		now = tree[now].child[s[i]];
		for(int j = now; j && vis[j] == 0; j = fail[j]) {
            if(tree[j].rail) {
                ans[cnt++] = tree[j].rail;
                flag = 1;
            }
            vis[j] = 1;
		}
	}
	return flag;
}

完整代码:

#include <cstdio>
#include <cmath>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <iostream>
#include <string>
#include <queue>
#include <stack>
#include <vector>
#include <map>
#include <set>
#include <bitset>
using namespace std;

typedef long long ll;
//#define int ll
#define INF 0x3f3f3f3f3f3f3f3f
#define MAXM 2000000 + 10
#define MAXN 100000 + 10
const ll mod = 1e9 + 7;
#define fir first
#define sec second

int n, m, cnt = 1, num = 1;
struct TREE {
	int rail;
	int child[130];
} tree[MAXN];
int fail[MAXN];
int ans[MAXN], vis[MAXN];

void add()
{
	char s[200+10]; scanf("%s", s);
	int rt = 0, len = strlen(s);
	for(int i = 0; i < len; i ++) {
		int now = s[i];
		if(tree[rt].child[now] == 0) {
			tree[rt].child[now] = cnt ++;   
		}

		rt = tree[rt].child[now];
		if(i == len - 1)  tree[rt].rail = num ++;
	}
}

void build()
{
	queue<int> Q;
	for(int i = 0; i < 130; i ++) {
		if(tree[0].child[i]) {
			Q.push(tree[0].child[i]);
		}
	}

	while(!Q.empty()) {
		int now = Q.front(); Q.pop();
		for(int i = 0; i < 130; i ++) {
			if(tree[now].child[i]) {
                fail[tree[now].child[i]] = tree[fail[now]].child[i];
                Q.push(tree[now].child[i]);

			}
			else tree[now].child[i] = tree[fail[now]].child[i];
		}
	}
}

int query(char s[])
{
    memset(vis, 0, sizeof(vis));
	cnt = 0;
	int flag = 0;
	int len = strlen(s), now = 0;
	for(int i = 0; i < len; i ++) {
		now = tree[now].child[s[i]];
		for(int j = now; j && vis[j] == 0; j = fail[j]) {
            if(tree[j].rail) {
                ans[cnt++] = tree[j].rail;
                flag = 1;
            }
            vis[j] = 1;
		}
	}
	return flag;
}

int main()
{
	scanf("%d", &n);
	for(int i = 0; i < n; i ++) add();

    build();

	scanf("%d", &m);
	int tot = 0;
	for(int i = 1; i <= m; i ++) {
		char s[10000+10]; scanf("%s", s);
		int ok = query(s);
		if(ok) {
			tot ++;
			printf("web %d: ", i);
			sort(ans, ans + cnt);
			for(int j = 0; j < cnt; j ++)
				printf("%d%c", ans[j], j == cnt - 1 ? '\n' : ' ');
		}
	}
	printf("total: %d\n", tot);

}

/*

The WAM is F**KING interesting .

*/

猜你喜欢

转载自blog.csdn.net/ooB0Boo/article/details/88053278