[Ybt Advanced 2-5-3] Prefix matching

Prefix match

Topic link: ybt efficient advanced 2-5-3

General idea

There are a large string and some small strings, and ask you what is the longest matching length of the prefix of each small string on the big string.
Each string consists of only four characters: E, S, W, and N.

Ideas

Since we see that the string length is very large, we consider not combining the AC automata with other things, but transforming the AC automata.

How can it be deformed?
We can enumerate each prefix of each small string, and then ask whether it appears in the large string, that is, O (1) O(1)O ( 1 ) query.

How do you do it? We consider preprocessing it out.
So we originally inquired whether each small string appeared in the big string, now if we build a Trie tree composed of each small string, and then enumerate the prefixes of the big string, and then look at those places on the Trie tree that there is this string (that is, Jump to the fail side), then the strings you find are all substrings of large strings.

Then it's fine.

Code

#include<queue>
#include<cstdio>
#include<cstring>

using namespace std;

struct Trie {
    
    
	int son[4], fail;
	bool in;
}tree[10000001];
int n, m, ans[100001], tot;
char s[10000001], c[100001][101];
queue <int> q;

int get_go(char c) {
    
    //给每个字符给一个独特的位置,减少空间
	if (c == 'E') return 0;
	if (c == 'S') return 1;
	if (c == 'W') return 2;
	if (c == 'N') return 3;
	return -1;//读入的时候这四个都不是,就要重新读入
}

void insert(int op) {
    
    //AC 自动机插入串
	int size = strlen(c[op]);
	int now = 0;
	for (int i = 0; i < size; i++) {
    
    
		int go = get_go(c[op][i]);
		if (!tree[now].son[go]) tree[now].son[go] = ++tot;
		now = tree[now].son[go];
	}
}

void build_fail() {
    
    //建 Trie 树上的 fail 边
	for (int i = 0; i <= 3; i++)
		if (tree[0].son[i]) {
    
    
			q.push(tree[0].son[i]);
			tree[tree[0].son[i]].fail = 0;
		}
	
	while (!q.empty()) {
    
    
		int now = q.front();
		q.pop();
		
		for (int i = 0; i <= 3; i++)
			if (tree[now].son[i]) {
    
    
				q.push(tree[now].son[i]);
				tree[tree[now].son[i]].fail = tree[tree[now].fail].son[i];
			}
			else tree[now].son[i] = tree[tree[now].fail].son[i];
	}
}

void add() {
    
    //给这个大串查询
	int now = 0;
	for (int i = 1; i <= n; i++) {
    
    //枚举它的前缀
		int go = get_go(s[i]);
		int noww = tree[now].son[go];
		while (noww) {
    
    //找到 Trie 树上所有的这个串
			tree[noww].in = 1;
			noww = tree[noww].fail;
		}
		now = tree[now].son[go];
	}
}

int ask(int op) {
    
    
	int size = strlen(c[op]);
	int now = 0, re = 0;
	for (int i = 0; i < size; i++) {
    
    //询问每个前缀是否存在于 Trie 数中
		int go = get_go(c[op][i]);
		now = tree[now].son[go];
		if (tree[now].in) re = i + 1;//存在,要找最长距离的
	}
	return re;
}

int main() {
    
    
	scanf("%d %d", &n, &m);
	for (int i = 1; i <= n; i++) {
    
    
		s[i] = getchar();
		while (get_go(s[i]) == -1) s[i] = getchar();
	}
	
	for (int i = 1; i <= m; i++) {
    
    
		scanf("%s", &c[i]);
		insert(i);
	}
	
	build_fail();
	
	add();
	
	for (int i = 1; i <= m; i++) {
    
    
		printf("%d\n", ask(i));
	}
	
	return 0;
}

Guess you like

Origin blog.csdn.net/weixin_43346722/article/details/114415750