[SPOJ 705]New Distinct Substrings

Description

题库链接

给定一个长度为 \(n\) 的字符串,求不相同的子串的个数。

\(1\leq n\leq 50000\)

Solution

每个子串一定是某个后缀的前缀,那么原问题等价于求所有后缀之间的不相同的前缀的个数。如果所有的后缀按照 \(sa\) 的顺序计算,不难发现,对于每一次新加进来的后缀 \(suffix(sa_k)\) ,它将产生 \(n-sa_k+1\) 个新的前缀。但是其中有 \(height_k\) 个是和前面的字符串的前缀是相同的。所以 \(suffix(sa_k)\) 将贡献出 \(n-sa_k+1-height_k\) 个不同的子串。累加后便是原问题的答案。

Code

#include <bits/stdc++.h>
#define ll long long
using namespace std;
const int N = 50000+5;

char ch[N];
int t, n, m, x[N<<1], y[N<<1], c[N], sa[N], rk[N], height[N];

void get() {
    for (int i = 1; i <= m; i++) c[i] = 0;
    for (int i = 1; i <= n; i++) c[x[i] = ch[i]]++;
    for (int i = 2; i <= m; i++) c[i] += c[i-1];
    for (int i = n; i >= 1; i--) sa[c[x[i]]--] = i;
    for (int k = 1; k <= n; k <<= 1) {
        int num = 0;
        for (int i = n-k+1; i <= n; i++) y[++num] = i;
        for (int i = 1; i <= n; i++) if (sa[i] > k) y[++num] = sa[i]-k;
        for (int i = 1; i <= m; i++) c[i] = 0;
        for (int i = 1; i <= n; i++) c[x[i]]++;
        for (int i = 2; i <= m; i++) c[i] += c[i-1];
        for (int i = n; i >= 1; i--) sa[c[x[y[i]]]--] = y[i];
        swap(x, y); x[sa[1]] = num = 1;
        for (int i = 2; i <= n; i++)
            x[sa[i]] = (y[sa[i]] == y[sa[i-1]] && y[sa[i]+k] == y[sa[i-1]+k]) ? num : ++num;
        if ((m = num) == n) break;
    }
    for (int i = 1; i <= n; i++) rk[sa[i]] = i;
    for (int i = 1, k = 0; i <= n; i++) {
        if (rk[i] == 1) continue;
        if (k) --k; int j = sa[rk[i]-1];
        while (i+k <= n && j+k <= n && ch[i+k] == ch[j+k]) ++k;
        height[rk[i]] = k;
    }
}
void work() {
    scanf("%d", &t);
    while (t--) {
        scanf("%s", ch+1); n = strlen(ch+1); m = 255;
        get(); ll ans = 0;
        for (int i = 1; i <= n; i++) ans += n-sa[i]+1-height[i];
        printf("%lld\n", ans);
    }
}
int main() {work(); return 0; }

猜你喜欢

转载自www.cnblogs.com/NaVi-Awson/p/9265684.html