New Distinct Substrings(后缀数组)

New Distinct Substrings(后缀数组)

给定一个字符串,求不相同的子串的个数。\(n<=50005\)

显然,任何一个子串一定是后缀上的前缀。先(按套路)把后缀排好序,对于当前的后缀\(S_i\),显然他有\(n-sa[i]\)个前缀。其中,有\(height[i]\)个前缀字符串在编号比它小的后缀中出现过,因此它对答案的贡献是\(n-sa[i]-height[i]\)

#include <cstdio>
#include <cstring> 
using namespace std;

const int maxn=50005;
int T, n, m=maxn;
char s[maxn];

bool cmp(int *r, int a, int b, int j){
    return r[a]==r[b]&&r[a+j]==r[b+j]; }
int *x, *y, *t, wa[maxn], wb[maxn], ws[maxn], sa[maxn], wv[maxn], ht[maxn];
void Suffixsort(char *r){
    int i, j, p=0; x=wa, y=wb; m=maxn;
    for (i=0; i<m; ++i) ws[i]=0;
    for (i=0; i<n; ++i) ++ws[x[i]=r[i]];
    for (i=1; i<m; ++i) ws[i]+=ws[i-1];
    for (i=0; i<n; ++i) sa[--ws[r[i]]]=i;
    for (j=1; p<n&&j<n; j<<=1, m=p+1){
        for (p=0, i=n-j; i<n; ++i) y[p++]=i;
        for (i=0; i<n; ++i) if (sa[i]>=j) y[p++]=sa[i]-j;
        for (i=0; i<n; ++i) wv[i]=x[y[i]];
        for (i=0; i<m; ++i) ws[i]=0;
        for (i=0; i<n; ++i) ++ws[x[i]];
        for (i=1; i<m; ++i) ws[i]+=ws[i-1];
        for (i=n-1; i>0; --i) sa[--ws[wv[i]]]=y[i];  //这句话依然是背下来的
        t=x; x=y; y=t; x[sa[0]]=1;
        for (p=1, i=1; i<n; ++i) 
            x[sa[i]]=cmp(y, sa[i-1], sa[i], j)?p:++p;  //+1 
    }
    memset(ht, 0, sizeof(ht));
    for (int i=0; i<n; ++i) --x[i]; p=0;
    for (int i=0; i<n; ht[x[i++]]=p){
        if (!x[i]) continue;
        for (p?p--:0, j=sa[x[i]-1]; r[i+p]==r[j+p]&&i+p<n; ++p);
    } ht[0]=0;
    return;
}

int main(){
    scanf("%d", &T); int ans;
    while (T--) {
        scanf("%s", s); n=strlen(s); 
        Suffixsort(s); ans=0;
        //for (int i=0; i<n; ++i) printf("%d ", sa[i]); puts("");
        //for (int i=1; i<n; ++i) printf("%d ", ht[i]);
        for (int i=0; i<n; ++i) 
            ans+=n-sa[i]-ht[i];
        printf("%d\n", ans);
    }
    return 0;
}

猜你喜欢

转载自www.cnblogs.com/MyNameIsPc/p/9177830.html