[kuangbin带你飞]专题十六A-K KMP & 扩展KMP & Manacher 题目分析

@(ACM题目)[字符串]

KMP

A - Number Sequence

描述:给定A和B两个数列,问B在A中出现的位置。
分析:B为A子串,KMP模板题。
代码

#include<bits/stdc++.h>
using namespace std;

const int maxn = 1000000 + 5;
const int maxm = 10000 + 5;
int fail[maxn], a[maxn], b[maxm];
int n, m;

int main()
{
    int T;
    scanf("%d", &T);
    while(T--)
    {
        int n, m;
        scanf("%d%d", &n, &m);
        for(int i = 0; i < n; ++i) scanf("%d", &a[i]);
        for(int i = 0; i < m; ++i) scanf("%d", &b[i]);

        fail[0] = fail[1] = 0;
        for(int i = 1; i < m; ++i)
        {
            int j = fail[i];
            while(j && b[i] != b[j]) j = fail[j];
            fail[i + 1] = (b[i] == b[j]) ? j + 1 : 0;
        }

        int j = 0;
        int res = -1;
        for(int i = 0; i < n; ++i)
        {
            while(j && b[j] != a[i]) j = fail[j];
            if(b[j] == a[i]) ++j;
            if(j == m)
            {
                res = i - m + 1 + 1;
                break;
            }
        }
        printf("%d\n", res);
    }
    return 0;
}

B - Oulipo

描述:给定字符串a和b,问b在a中出现的次数(各匹配的子串可以重叠)。
分析:KMP中,每次匹配后沿fail指针跳一下。
代码

#include<bits/stdc++.h>
using namespace std;

const int maxn = 1000000 + 5;
const int maxm = 10000 + 5;
int fail[maxn];
char a[maxn], b[maxm];
int n, m;

int main()
{
    int T;
    scanf("%d", &T);
    while(T--)
    {
        int n, m;
        scanf("%s%s", b, a);
        n = strlen(a);
        m = strlen(b);

        fail[0] = fail[1] = 0;
        for(int i = 1; i < m; ++i)
        {
            int j = fail[i];
            while(j && b[i] != b[j]) j = fail[j];
            fail[i + 1] = (b[i] == b[j]) ? j + 1 : 0;
        }

        int j = 0;
        int res = 0;
        for(int i = 0; i < n; ++i)
        {
            while(j && b[j] != a[i]) j = fail[j];
            if(b[j] == a[i]) ++j;
            if(j == m)
            {
                ++res;
                j = fail[j];
            }
        }
        printf("%d\n", res);
    }
    return 0;
}

C - 剪花布条

描述:给定字符串a和b,问b在a中出现的次数(各匹配的子串不可以重叠)。
分析:KMP中,每次匹配后不是fail指针跳一下,而是再从模式串开始位置0进行匹配。
代码

#include<bits/stdc++.h>
using namespace std;

const int maxn = 1000 + 5;
const int maxm = 1000 + 5;
int fail[maxn];
char a[maxn], b[maxm];
int n, m;

int main()
{

        int n, m;
        while(~scanf("%s", a) && a[0] != '#')
        {
            scanf("%s", b);
            n = strlen(a);
            m = strlen(b);

            fail[0] = fail[1] = 0;
            for(int i = 1; i < m; ++i)
            {
                int j = fail[i];
                while(j && b[i] != b[j]) j = fail[j];
                fail[i + 1] = (b[i] == b[j]) ? j + 1 : 0;
            }

            int j = 0;
            int res = 0;
            for(int i = 0; i < n; ++i)
            {
                while(j && b[j] != a[i]) j = fail[j];
                if(b[j] == a[i]) ++j;
                if(j == m)
                {
                    ++res;
                    j = 0;
                }
            }
            printf("%d\n", res);
        }
    return 0;
}

D - Cyclic Nacklace

描述:给定一个字符串,问最少在后面添加几个字符能使它周期的字符串。
分析:kmp求循环节模板题,循环节长度为n-fail[n]
代码

#include<bits/stdc++.h>
using namespace std;
const int maxn = 100000  + 5;
char s[maxn];
int fail[maxn];

int main()
{
    int T;
    scanf("%d", &T);
    while(T--)
    {
        scanf("%s", s);
        int n = strlen(s);
        fail[0] = fail[1] = 0;
        for(int i = 1; i < n; ++i)
        {
            int j = fail[i];
            while(j && s[j] != s[i]) j = fail[j];
            fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
        }

        int res;
        int len = n - fail[n];
        if(len != n && n % len == 0) res = 0;
        else res = len - fail[n] % len;

        printf("%d\n", res);
    }
    return 0;
}

E - Period

描述:求字符串的所有周期。
分析:KMP求循环节模板题,字符串总长度除以循环节长度即为周期。注意对每个周期求解。
代码

#include<bits/stdc++.h>
using namespace std;
const int maxn = 1e6 + 5;
char s[maxn];
int fail[maxn];
int main()
{
    int n;
    int Cas = 1;
    while(~scanf("%d", &n) && n)
    {
        scanf("%s", s);

        fail[0] = fail[1] = 0;
        for(int i = 1; i < n; ++i)
        {
            int j = fail[i];
            while(j && s[j] != s[i]) j = fail[j];
            fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
        }

        printf("Test case #%d\n", Cas++);
        int len, period;
        for(int i = 2; i <=n; ++i)
        {
            int len = i - fail[i];
            period = i / len;
            if(i != len && i % len == 0)
                printf("%d %d\n", i, period);
        }
        puts("");
    }
    return 0;
}

F - The Minimum Length

描述:将一个字符串A重复若干次得到字符串B=AAA…,取出B的一个子串C。现给定C,问A最短是多少
分析:KMP循环节模板题,模板中的循环节就满足后面可以有残缺的,如abcabcab中,n-fail[n]结果为3,允许后面有残缺的ab
代码

#include<bits/stdc++.h>
using namespace std;
const int maxn = 1e6 + 5;
char s[maxn];
int fail[maxn];
int main()
{
    int n;
    int Cas = 1;
    while(~scanf("%d", &n) && n)
    {
        scanf("%s", s);

        fail[0] = fail[1] = 0;
        for(int i = 1; i < n; ++i)
        {
            int j = fail[i];
            while(j && s[j] != s[i]) j = fail[j];
            fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
        }

       printf("%d\n", n - fail[n]);
    }
    return 0;
}

G - Power Strings

描述:给定周期字符串,求周期。
分析:KMP求循环节模板题。先判断是否是周期大于1的周期字符串(字符串长度是循环节的倍数),是则输出周期,不是则周期为1。
代码

#include<cstdio>
#include<cstring>
using namespace std;
const int maxn = 1e6 + 5;
char s[maxn];
int fail[maxn];
int main()
{
    int n;
    int Cas = 1;
    while(~scanf("%s", s) && s[0] != '.')
    {
        n = strlen(s);
        fail[0] = fail[1] = 0;
        for(int i = 1; i < n; ++i)
        {
            int j = fail[i];
            while(j && s[j] != s[i]) j = fail[j];
            fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
        }

        int len = n - fail[n];
        if(n % len == 0) printf("%d\n", n / len);
        else puts("1");
    }
    return 0;
}

H - Seek the Name, Seek the Fame

描述:给定一个字符串,找出所有“既是该字符串前缀,又是该字符串后缀”的字符串。
分析:KMP中,从fail[n]一直沿着fail指针跳到字符串开始即可。
代码

#include<cstdio>
#include<cstring>
#include<stack>
using namespace std;

const int maxn = 400000 + 5;
char s[maxn];
int fail[maxn];
stack<int> st;

int main()
{
    while(~scanf("%s", s))
    {
        while(!st.empty()) st.pop();
        int n = strlen(s);
        fail[0] = fail[1] = 0;
        for(int i = 1; i < n; ++i)
        {
            int j = fail[i];
            while(j && s[j] != s[i]) j = fail[j];
            fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
        }

        st.push(n);
        int cur = fail[n];
        while(cur)
        {
            st.push(cur);
            cur = fail[cur];
        }

        if(st.size() == 1) printf("%d\n", st.top());
        else
        {
            printf("%d", st.top());
            st.pop();
            while(!st.empty())
            {
                printf(" %d", st.top());
                st.pop();
            }
            puts("");
        }
    }
    return 0;
}

I - Blue Jeans

描述:给定最多10个字符串,每个字符串长度均为60,求它们的最长公共子串中字典序最小的。最长公共子串长度小于3认为无解。
分析:枚举第一个字符串的所有子串,对每个子串对每个字符串跑KMP。
代码

#include<cstdio>
#include<cstring>
#include<cmath>
#include<iostream>
using namespace std;
const int LEN = 60;
const int maxn = 100;
const int maxm = 15;
char s[maxm][maxn];
char p[maxn];
int fail[maxn];

void getFail(char *s)
{
    fail[0] = fail[1] = 0;
    int n = strlen(s);
    for(int i = 1; i < n; ++i)
    {
        int j = fail[i];
        while(j && s[j] != s[i]) j = fail[j];
        fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
    }
}

bool finda(char *P, char *T)
{
    int n = strlen(T);
    int m = strlen(P);
    getFail(P);
    int j = 0;
    for(int i = 0; i < n; ++i)
    {
        while(j && P[j] != T[i]) j = fail[j];
        if(P[j] == T[i]) ++j;
        if(j == m) return true;
    }
    return false;
}
string res = "Z";
int main()
{
    int T;
    scanf("%d", &T);
    while(T--)
    {
        res = "Z";
        int n;
        scanf("%d", &n);
        for(int i = 0; i < n; ++i) scanf("%s", s[i]);
        for(int len = 3; len <= LEN; ++len)
        {
            p[len] = '\0';
            for(int i = 0; i + len - 1 < LEN; ++i)
            {
                bool ck = true;
                strncpy(p, s[0] + i, len);
                for(int j = 0; j < n && ck; ++j)
                    ck = ck & finda(p, s[j]);
                if(ck)
                {
                    string cur = p;
                    if(cur.length() > res.length()) res = cur;
                    else if(cur.length() == res.length()) res = min(res, cur);
                }
            }
        }
        if(res == "Z") res = "no significant commonalities";
        printf("%s\n", res.c_str());
    }
    return 0;
}

J - Simpsons’ Hidden Talents

描述:给定两个字符串A和B,求一个字符串C,它是A的前缀,B的后缀。求最长的C。
分析
- 解法一:将A与B拼接,求fail[n],并不断沿着fail指针跳转,直到长度小于字符串A和B的长度。
- 解法二:将A与B拼接,中间加一个特殊字符,如@
- 解法三:将A作为模式串,跑KMP,记录能到达的最大前缀即可。

代码
下面代码中使用解法一。

#include<cstdio>
#include<cstring>
#include<cmath>
#include<iostream>
using namespace std;
const int maxn = 50000 + 50000 + 5;
string s;
char s1[maxn];
int fail[maxn];
int main()
{
    while(~scanf("%s", s1))
    {
        s = s1;
        int bound = s.size();
        scanf("%s", s1);
        bound = min(bound, (int)strlen(s1));
        s = s + s1;
        fail[0] = fail[1] = 0;
        int n = s.size();
        for(int i = 1; i < n; ++i)
        {
            int j = fail[i];
            while(j && s[j] != s[i]) j = fail[j];
            fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
        }
        int res = fail[n];
        while(res && res > bound) res = fail[res];
        if(res) printf("%s %d\n", s.substr(0, res).c_str(), res);
        else puts("0");
    }
    return 0;
}

K - Count the string

描述:给定一个字符串,求它的所有前缀在该字符串中出现的次数之和,即对于前缀i,它在字符串中出现 s i 次,求 s i
分析:考察每个前缀i,统计它的后缀中是前缀的个数 t i ,则 s i = t i 。fail数组的一种含义为:fail[i]=j代表字符串的前i个字符中,最大的j(j小于i),使得前j个字符等于后j个字符。那么 t i 就等于 t fail i 加上1(该字符串本身)。
代码

#include<cstdio>
#include<cstring>
#include<cmath>
#include<iostream>
using namespace std;
const int maxn = 2e5 + 5;
char s[maxn];
int fail[maxn];
long long dp[maxn];
const long long MOD = 10007;
int main()
{
    int T;
    scanf("%d", &T);
    while(T--)
    {
        int n;
        scanf("%d%s", &n, &s);
        fail[0] = fail[1] = 0;
        for(int i = 1; i < n; ++i)
        {
            int j = fail[i];
            while(j && s[j] != s[i]) j = fail[j];
            fail[i + 1] = s[j] == s[i] ? j + 1 : 0;
        }
        dp[0] = 0;
        long long res = 0;
        for(int i = 1; i <= n; ++i)
        {
            if(fail[i] != 0) dp[i] = (dp[fail[i]] + 1) % MOD;
            else dp[i] = 1;
            res = (res + dp[i]) % MOD;
        }
        printf("%lld\n", res);
    }
    return 0;
}

猜你喜欢

转载自blog.csdn.net/gdymind/article/details/78516565