POJ - 3261 Milk Patterns【后缀数组】【可重叠最长重复子串】

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/niiick/article/details/84729081

Time limit 5000 ms
Case time limit 2000 ms
Memory limit 65536 kB

Farmer John has noticed that the quality of milk given by his cows varies from day to day. On further investigation, he discovered that although he can’t predict the quality of milk from one day to the next, there are some regular patterns in the daily milk quality.

To perform a rigorous study, he has invented a complex classification scheme by which each milk sample is recorded as an integer between 0 and 1,000,000 inclusive, and has recorded data from a single cow over N (1 ≤ N ≤ 20,000) days. He wishes to find the longest pattern of samples which repeats identically at least K (2 ≤ K ≤ N) times. This may include overlapping patterns – 1 2 3 2 3 2 3 1 repeats 2 3 2 3 twice, for example.

Help Farmer John by finding the longest repeating subsequence in the sequence of samples. It is guaranteed that at least one subsequence is repeated at least K times.

Input

Line 1: Two space-separated integers: N and K
Lines 2… N+1: N integers, one per line, the quality of the milk on day i appears on the ith line.

Output

Line 1: One integer, the length of the longest pattern which occurs at least K times


题意

在原字符串中找到一个子串,满足该子串不小于k次,这些子串可以重叠


题目分析

可重叠最长重读子串,比较经典的后缀数组height数组的应用

首先题目所求可以转化为
k个不同后缀lcp最大长度

在n个后缀里面枚举k个不同后缀显然T爆,但仔细思考可以发现
要使k个不同后缀的lcp尽量长,那么这k个后缀的排名一定是连续的
这就正好对应到了height数组

后缀排序求出height数组后
height数组每个长度为k-1连续区间最小值的最大值即为答案
单调队列 O ( n ) O(n) 维护即可

因为 h e i g h t [ i ] height[i] 表示排名为 i i 与排名为 i 1 i-1 的后缀的lcp
连续k-1个height的最小值即为这k个后缀的lcp

最后注意要离散化数值,虽然实际好像并没有卡掉


#include<iostream>
#include<cmath>
#include<algorithm>
#include<queue>
#include<cstring>
#include<cstdio>
#include<map>
using namespace std;
typedef long long lt;

int read()
{
    int x=0,f=1;
    char ss=getchar();
    while(ss<'0'||ss>'9'){if(ss=='-')f=-1;ss=getchar();}
    while(ss>='0'&&ss<='9'){x=x*10+ss-'0';ss=getchar();}
    return x*f;
} 

const int maxn=1000010;
int n,k,m;
int a[maxn],b[maxn];
int pos[maxn],cnt;
int rak[maxn],sa[maxn],tp[maxn],tax[maxn];
int height[maxn];
int q[maxn],ll,rr,ans;

void rsort()
{
    for(int i=0;i<=m;++i) tax[i]=0;
    for(int i=1;i<=n;++i) tax[rak[i]]++;
    for(int i=1;i<=m;++i) tax[i]+=tax[i-1];
    for(int i=n;i>=1;--i) sa[tax[rak[tp[i]]]--]=tp[i];
}

void ssort()
{
    m=20010;
    for(int i=1;i<=n;++i)
    rak[i]=a[i],tp[i]=i;
    
    rsort();
    for(int k=1;k<=n;k<<=1)
    {
        int p=0;
        for(int i=n-k+1;i<=n;++i) tp[++p]=i;
        for(int i=1;i<=n;++i) if(sa[i]>k) tp[++p]=sa[i]-k;
        
        rsort();
        swap(rak,tp);
        rak[sa[1]]=p=1;
        for(int i=2;i<=n;++i)
        rak[sa[i]]=(tp[sa[i]]==tp[sa[i-1]]&&tp[sa[i]+k]==tp[sa[i-1]+k])?p:++p;
        if(p>=n) break;
        m=p;
    }
}

void getH()
{
    int k=0;
    for(int i=1;i<=n;++i)
    {
        if(k) k--;
        int j=sa[rak[i]-1];
        while(a[i+k]==a[j+k]) k++;
        height[rak[i]]=k;
    }
}

int main()
{
    n=read();k=read();
    for(int i=1;i<=n;++i)
    a[i]=b[i]=read();
    
    sort(b+1,b+1+n);
    for(int i=1;i<=n;++i)
    if(i==1||b[i]!=b[i-1])
    pos[++cnt]=b[i];
    
    for(int i=1;i<=n;++i)
    a[i]=lower_bound(pos+1,pos+1+cnt,a[i])-pos;
    
    ssort(); getH();
    
    ll=rr=1;
    for(int i=1;i<=n;++i)
    {
    	while(ll<rr&&q[ll]<i-k+2) ll++;
    	while(ll<rr&&height[i]<=height[q[rr-1]]) rr--;
    	q[rr++]=i;
    	ans=max(ans,height[q[ll]]);
    }
    printf("%d",ans);
    return 0;
}

猜你喜欢

转载自blog.csdn.net/niiick/article/details/84729081