AC自动机(kmp算法+Tire树)

1、应用(目的)

处理多模式串的问题,例如给出n个单词t,再给出包含m个字符的文章。

2、主要步骤:

(1)构建一个Tire树

(2)对Tire树上的所有节点构造前缀指针。

(3)利用前缀指针对主串进行匹配。

3、重点是第二部的构建next前缀指针。

以hdu2222为例

Keywords Search

Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 131072/131072 K (Java/Others)
Total Submission(s): 78325    Accepted Submission(s): 27223


 

Problem Description

扫描二维码关注公众号,回复: 3498521 查看本文章

In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.

 

Input

First line will contain one integer means how many cases will follow by.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.

 

Output

Print how many keywords are contained in the description.

 

Sample Input

 

1 5 she he say shr her yasherhs

 

Sample Output

 

3

 

Author

Wiskey

 

Recommend

lcy   |   We have carefully selected several similar problems for you:  2896 3065 2243 2825 3341 

 

Statistic | Submit | Discuss | Note

题意:第一行输入一个T,表示有T组测试数据,每组测试数据先输入一个n,接下来输入n串字符串,

最后一行在输入一个字符串,问最后一个字符串里出现了几个给出的n个单词;

#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;

struct Node{
	struct Node* fail;
	int cnt;
	struct Node* next[26];
};
typedef struct Node* Tire;
char s[1000005]; //主字符串 
char keyword[55]; //待查找的字符串 
Tire queue[500005],root;

void Init(Tire t) //结点初始化 
{
	t->cnt=0;
	t->fail=NULL; 
	for(int i=0;i<26;i++) t->next[i]=NULL;
}

void Build_Tire(char* s) //构建Tire树 
{
	int i,l=strlen(s),v;
	Tire p=root,q;
	for(i=0;i<l;i++)
	{
		v=s[i]-'a';
		if(p->next[v]==NULL) //插入新节点 
		{
			q = new Node;
			Init(q);
			p->next[v]=q;
		}
		p=p->next[v]; //连接到下一个节点 
	}
	p->cnt++; //标记结尾的节点 
}

void Build_AC(Tire t) //构建next的查找数组,用bfs,依次查找每个节点的前缀节点 
{
	int head=0,tail=0;
	queue[head++]=t;
	while(head!=tail)
	{
		Tire p=NULL;
		Tire tmp=queue[tail++]; //从根节点开始 
		for(int i=0;i<26;i++) //遍历根节点的儿子节点 
		{
			if(tmp->next[i]!=NULL) //如果存在儿子节点,进行查找 
			{
				if(tmp==t) tmp->next[i]->fail=t; //此节点的父节点是根节点 
				else
				{
					//沿着此节点的父节点回溯查找 
					p=tmp->fail;
					while(p!=NULL) 
					{
						if(p->next[i]!=NULL) //此节点和要查询的节点有相同的值 
						{
							tmp->next[i]->fail=p->next[i]; //找到 匹配的前缀节点,结束查询 
							break;
						}
						p=p->fail;
					}
					if(p==NULL) tmp->next[i]->fail=t; //没有找到匹配的前缀节点,就指向根节点 
				}
				queue[head++]=tmp->next[i]; //广度优先搜索将相同层次的节点入队。 
			}
		}
	}
}

int query(Tire t)
{
	int i,v,l=strlen(s),count=0;
	Tire p=t;
	for(i=0;i<l;i++)
	{
		v=s[i]-'a';
		while(p->next[v]==NULL&&p!=t) p=p->fail;//沿着此节点的前缀查询匹配的节点。 
		p=p->next[v]; //找到指向该节点 
		if(p==NULL) p=t; //匹配到的节点为空 
		Tire tmp=p; 
		while(tmp!=t) //沿着这个指针回溯,判断其他节点是否匹配 
		{
			if(tmp->cnt>=0) //判断节点是否匹配 
			{
				count+=tmp->cnt;
				tmp->cnt=-1;
			}
			else break;
			tmp=tmp->fail;
		}
	}
	return count;
}

int main(void)
{
	int T,n;
	scanf("%d",&T);
	while(T--)
	{
		root=new Node;
		Init(root);
		scanf("%d",&n);
		for(int i=0;i<n;i++)
		{
			scanf("\n%s",keyword);
			Build_Tire(keyword);
		}
		Build_AC(root);
		scanf("\n%s",s);
		printf("%d\n",query(root));
	}
	return 0;
}

参考文章:https://blog.csdn.net/liu940204/article/details/51345954

https://blog.csdn.net/u013371163/article/details/60469534

另一种解法:

#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
const int maxn = 1e5+5;

int ans,cnt,nxt[maxn],bo[maxn],que[maxn],ch[maxn][30];
void Build_Tire(char* s)
{
	int v,i,l=strlen(s),u=1;
	for(i=0;i<l;i++)
	{
		int c=s[i]-'a';
		if(!ch[u][c]) 
		{
			ch[u][c]=++cnt;
			memset(ch[cnt],0,sizeof(ch[cnt]));
		}
		u=ch[u][c];
	}
	bo[u]++;
	return ;
}

void bfs()
{
	for(int i=0;i<26;i++) ch[0][i]=1;
	que[1]=1;nxt[1]=0;
	for(int q1=1,q2=1;q1<=q2;q1++)
	{
		int u=que[q1];
		for(int i=0;i<26;i++)
		{
			if(!ch[u][i]) ch[u][i]=ch[nxt[u]][i];
			else
			{
				que[++q2]=ch[u][i];
				int v=nxt[u];
				nxt[ch[u][i]]=ch[v][i];
			}
		}
	}
}

void find(char* s)
{
	int u=1,l=strlen(s),c,k;
	for(int i=0;i<l;i++)
	{
		c=s[i]-'a';
		k=ch[u][c];
		while(k>1)
		{
			ans+=bo[k];
			bo[k]=0;
			k=nxt[k];
		}
		u=ch[u][c];
	}
	return ;
}

int main(void)
{
	int i,n,t;
	char s[maxn<<1];
	scanf("%d",&t);
	for(int j=0;j<t;j++)
	{
		ans=0;
		cnt=1;
		memset(bo,0,sizeof(bo));
		for(i=0;i<n;i++) ch[0][i]=1,ch[1][i]=0;
		scanf("%d",&n);
		for(i=0;i<n;i++)
		{
			scanf("%s",s);
			Build_Tire(s);
		}
		bfs();
		scanf("%s",s);
		find(s);
		printf("%d\n",ans);
	}
	return 0;
}

猜你喜欢

转载自blog.csdn.net/qq_41829060/article/details/82828541
今日推荐