New Distinct Substrings

SP705

Given a string, we need to find the total number of its distinct substrings.
Input
T T - number of test cases. T < = 20 T<=20 ; Each test case consists of one string, whose length is < = 50000 <= 50000
Output
For each test case output one number saying the number of distinct substrings.
Example
Input:
2
CCCCC
ABABA
Output:
5
9

两种做法:
1.DP
由于在SAM中每一条路径都能走出不同的子串。
定义
dp[u]为从状态u出发能到达的所有路径,包括长度为0的。
转移方程
d p [ u ] = 1 + u > v d p [ v ] dp[u]=1+\sum\limits_{u->v}{dp[v]}
加1是含不往下走,到此为止。那么答案就是dp[0]-1,去除空串。
2.后缀连接树的性质
根据自动机和后缀连接树的性质,当前状态的子串的最短长度为其后缀连接树中父状态的最大长度加1。又因为自动机中每个状态内的串集合中串的长度是连续的,所以状态 v v 的子串个数= m a x l e n ( v ) m a x l e n ( l i n k ( v ) ) maxlen(v)-maxlen(link(v))
AC代码

#include<iostream>
#include<string>
#include<cstring>
#include<algorithm>
#include<vector>
#include<cmath>
#include<map>
using namespace std;
const int MAXN = 50005;
int n;
char S[MAXN];
struct SAM {
	int size, last;
	struct Node {
		int len, link;
		int next[26];
		void clear() {
			len = link = 0;
			memset(next, 0, sizeof(next));
		}
	} node[MAXN * 2];
	void init() {
		for (int i = 0; i < size; i++) {
			node[i].clear();
		}
		node[0].link = -1;
		size = 1;
		last = 0;
	}
	void insert(char x) {
		int ch = x - 'a';
		int cur = size++;
		node[cur].len = node[last].len + 1;
		int p = last;
		while (p != -1 && !node[p].next[ch]) {
			node[p].next[ch] = cur;
			p = node[p].link;
		}
		if (p == -1) {
			node[cur].link = 0;
		}
		else {
			int q = node[p].next[ch];
			if (node[p].len + 1 == node[q].len) {
				node[cur].link = q;
			}
			else {
				int clone = size++;
				node[clone] = node[q];
				node[clone].len = node[p].len + 1;
				while (p != -1 && node[p].next[ch] == q) {
					node[p].next[ch] = clone;
					p = node[p].link;
				}
				node[q].link = node[cur].link = clone;
			}
		}
		last = cur;
	}
}sam;
int main() {
	int T;
	scanf("%d", &T);
	while (T--) {
		sam.init();
		scanf("%s", S);
		n = strlen(S);
		for (int i = 0; i < n; i++) {
			sam.insert(S[i]);
		}
		long long&& Ans = 0;
		for (int i = 1; i < sam.size; ++i) {
			Ans += sam.node[i].len;
			//如果存在父状态
			if (sam.node[i].link != -1) {
				Ans -= sam.node[sam.node[i].link].len;
			}
		}
		printf("%lld\n", Ans);
	}
	return 0;
}
发布了43 篇原创文章 · 获赞 44 · 访问量 2万+

猜你喜欢

转载自blog.csdn.net/qq_42971794/article/details/104087448