Test question algorithm training suffix array-the longest repeated substring

Resource limit
Time limit: 100ms Memory limit: 256.0MB


Problem description
  Given a number string of length n, find the length of the longest repeated substring that appears at least k times. These k substrings can overlap. It is guaranteed that the substring appears at least k times.


Input format    The
  first line: two integers n, k; the
  second line: 2 to n + 1 line: n integers, these n integers form a number string.
Output format
  an integer, which represents the length of the longest repeated substring.


Sample input
8 2
1 2 3 2 3 2 3 1
Sample output
4


Data size and convention
  0 ≤ n ≤ 20000, 2 ≤ k ≤ n, 0 ≤ integer in number string ≤ 1000000


Suffix array:

  • Use suff[i ]: to indicate the suffix starting with the i-th position.
  • Suffix array sa[i] : Indicates the position of the suffix with rank i in the original string after all suffixes are sorted.
  • First name group rank[i] : Indicates the current rank of the i-th place in the original string after all suffixes are sorted.
    The relationship between the three:
    Insert picture description here

Find the suffix array:

There are generally two ways to construct the sa array:

  • Multiplication algorithm: O(nlogn)
  • DC3 algorithm: O(n)

The algorithm will use the radix sorting
multiplication algorithm and the radix sorting can be found online.

LCP-longest common prefix:

height[i] : Represents the largest common prefix of suff[sa[i]] and suff[sa[i−1]], that is, the longest common prefix of two adjacent suffixes after ranking.
Illustration:
Insert picture description here

Application of suffix array:

1. The longest repeated substring can be overlapped

Given a string, find the longest repeated substring, these two substrings can overlap

2. The longest repeating substring cannot be overlapped

Given a string, find the longest repeating substring, these two substrings cannot overlap

3. The longest repeating substring of K times that can be overlapped

Given a string, find the longest repeating substring that appears at least K times, and these K substrings can overlap

Limited ability, you can understand by yourself : suffix array-a powerful tool for processing strings

Source program of this question:

#include<iostream>
using namespace std;
#define maxsize 100000
int Rank[maxsize], sa[maxsize];
int height[maxsize];

int sec[maxsize], t[maxsize];
int s[maxsize];//接收用户输入的数组
int num = maxsize;

int len, number;//长度和次数
inline void SA() //获得sa[]数组
{
    
    
	for (int i = 1; i <= num; i++) t[i] = 0;
	for (int i = 1; i <= len; i++) ++t[Rank[i] = s[i]];
	for (int i = 1; i <= num; i++) t[i] += t[i - 1];
	for (int i = len; i >= 1; i--) sa[t[Rank[i]]--] = i;
	for (int k = 1; k <= len; k++) 
	{
    
    
		int cnt = 0;
		for (int i = len - k + 1; i <= len; i++) sec[++cnt] = i;
		for (int i = 1; i <= len; i++) if (sa[i] > k) sec[++cnt] = sa[i] - k;
		for (int i = 1; i <= num; i++) t[i] = 0;
		for (int i = 1; i <= len; i++) ++t[Rank[i]];
		for (int i = 1; i <= num; i++) t[i] += t[i - 1];
		for (int i = len; i >= 1; i--) sa[t[Rank[sec[i]]]--] = sec[i], sec[i] = 0;
		swap(Rank, sec);
		Rank[sa[1]] = 1, cnt = 1;
		for (int i = 2; i <= len; i++)
			Rank[sa[i]] = (sec[sa[i]] == sec[sa[i - 1]] && sec[sa[i] + k] == sec[sa[i - 1] + k]) ? cnt : ++cnt;
		if (cnt == len) break;
		num = cnt;
	}
}
void Getheight() //获得height[]数组
{
    
    
	int j, k = 0;   
	for (int i = 1; i <= len; i++) 
	{
    
    
		if (k) k--;  
		int j = sa[Rank[i] - 1];   
		while (s[i + k] == s[j + k]) k++;
		height[Rank[i]] = k;
	}
}
bool check(int mid)//利用height[]数组进行分类,然后统计组内是否满足重复的次数。
{
    
    
	int group = 0;
	for (int i = 1; i <= len; i++)
	{
    
    
		if (height[i] >= mid)
			group++;
		if (group >= number)
			return true;
		if (height[i] < mid)
			group = 1;
	}
	return false;
}
int main() 
{
    
    
	cin >> len >> number;
	for (int i = 1; i <= len; i++)
		cin >> s[i];
	SA();
	Getheight();
	int front=1, rear=len;
	int maxlen = 0;
	while (front <= rear)//二分法!
	{
    
    
		int mid = (front + rear) / 2;
		if (check(mid))
		{
    
    
			front = mid + 1;
			maxlen = mid;
		}
		else
			rear = mid - 1;
	}
	cout << maxlen << endl;
}

Evaluation results:

Insert picture description here

Guess you like

Origin blog.csdn.net/weixin_49243150/article/details/113486870