1.暴力匹配
int brute_one_match(const char *s, const char *t) {
for (int j = 0; t[j]; j++)
{
if (s[j] == t[j]) continue;
return 0;
}
return 1;
}
int brute_force(const char *s, const char *t) {
for (int i = 0; s[i]; i++)
{
if (brute_one_match(s + i, t)) return i;
}
return -1;
}
2.hash匹配
int quick_mod(int a, int b, int c){
int ans = 1, temp = a;
while (b)
{
if (b & 1) ans = ans * temp % c;
temp = temp * temp % c;
b >>= 1;
}
return ans;
}
int hash_match(const char *s, const char *t) {
int len = strlen(t), base = 31, P = 9973, nbase = quick_mod(base, len, P);
int h = 0, th = 0;
for (int i = 0; t[i]; i++)
{
th = (th * base + t[i]) % P;
}
for (int i = 0; s[i]; i++)
{
h = (h * base + s[i]) % P;
if (i >= len) h = (h - (s[i - len] * nbase % P) + P) % P;
if (i + 1 < len) continue;
if (h != th) continue;
if (brute_one_match(s + i - len + 1, t)) return i - len + 1;
}
return -1;
}
Q1: 哈希匹配算法算法时间复杂度?
O(n*m/p)
3.KMP算法
int *getNext(const char *t, int *n){
*n = strlen(t);
int *next = (int *)malloc(sizeof(int) * (*n));
next[0] = -1;
for (int i = 1, j = -1; t[i]; i++)
{
while (j != -1 && t[j + 1] != t[i])
{
j = next[j];
}
if (t[j + 1] == t[i]) j += 1;
next[i] = j;
}
return next;
}
int kmp(const char *s, const char *t) {
int len;
int *next = getNext(t, &len);
for (int i = 0, j = -1; s[i]; i++)
{
while (j != -1 && t[j + 1] != s[i])
{
j = next[j];
}
if (t[j + 1] == s[i]) j += 1;
if (t[j + 1] == '\0') {
free(next);
return i - len + 1;
}
}
free(next);
return -1;
}
KMP优化:
int **getJump(int *next, const char *t, int n) {
int **jump = (int **)malloc(sizeof(int *) * n);
for (int i = 0; i < n; i++) jump[i] = (int *)malloc(sizeof(int) * 26);
jump += 1;
for (int i = 0; i < 26; i++) jump[-1][i] = -1;
jump[-1][t[0] - 'a'] = 0;
for (int i = 0, I = n - 1; i < I; i++)
{
for (int j = 0; j < 26; j++) jump[i][j] = jump[next[i]][j];
jump[i][t[i+1] - 'a'] = i + 1;
}
return jump;
}
int kmp_opt(const char *s, const char *t) {
int len;
int *next = getNext(t, &len);
int **jump = getJump(next, t, len);
for (int i = 0, j = -1; s[i]; i++)
{
j = jump[j][s[i] - 'a'];
if (j == len - 1) return i - len + 1;
}
free(next);
for(int i = 0; i < len; i++) free(jump[i-1]);
free(jump - 1);
return -1;
}
4.Sunday算法
int sunday(const char *s, const char *t) {
int tlen = strlen(t), slen = strlen(s);
int jump[128] = {0};
for (int i = 0; i < 128; i++) jump[i] = tlen + 1;
for (int i = 0; t[i]; i++) jump[t[i]] = tlen - i;
for (int i = 0; i + tlen <= slen; )
{
if (brute_one_match(s + i, t)) return i;
i += jump[s[i + tlen]];
}
return -1;
}
5.shift-and算法
int shift_and(const char *s, const char *t) {
int code[128] = {0}, n = 0;
for (; t[n]; n++) code[t[n]] |= (1 << n);
int p = 0;
for (int i = 0; s[i]; i++)
{
p = (p << 1 | 1) & code[s[i]];
if (p & (1 << (n - 1))) return i - n + 1;
}
return -1;
}
all:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_N 10000
#define DEFAULT_LEN 40
char s[MAX_N+5], t[MAX_N+5];
#define TEST(func) { \
char temp_s[MAX_N + 5]; \
sprintf(temp_s, "%s(\"%s\", \"%s\") = %3d\n", #func, s, t, func(s, t)); \
int n = DEFAULT_LEN - strlen(temp_s); \
while (n--) printf(" "); \
printf("%s", temp_s); \
}
int brute_one_match(const char *s, const char *t) {
printf("brute_one_match called\n");
for (int j = 0; t[j]; j++)
{
if (s[j] == t[j]) continue;
return 0;
}
return 1;
}
int brute_force(const char *s, const char *t) {
for (int i = 0; s[i]; i++)
{
if (brute_one_match(s + i, t)) return i;
}
return -1;
}
int quick_mod(int a, int b, int c){
int ans = 1, temp = a;
while (b)
{
if (b & 1) ans = ans * temp % c;
temp = temp * temp % c;
b >>= 1;
}
return ans;
}
int hash_match(const char *s, const char *t) {
int len = strlen(t), base = 31, P = 9973, nbase = quick_mod(base, len, P);
int h = 0, th = 0;
for (int i = 0; t[i]; i++)
{
th = (th * base + t[i]) % P;
}
for (int i = 0; s[i]; i++)
{
h = (h * base + s[i]) % P;
if (i >= len) h = (h - (s[i - len] * nbase % P) + P) % P;
if (i + 1 < len) continue;
if (h != th) continue;
if (brute_one_match(s + i - len + 1, t)) return i - len + 1;
}
return -1;
}
int *getNext(const char *t, int *n){
*n = strlen(t);
int *next = (int *)malloc(sizeof(int) * (*n));
next[0] = -1;
for (int i = 1, j = -1; t[i]; i++)
{
while (j != -1 && t[j + 1] != t[i])
{
j = next[j];
}
if (t[j + 1] == t[i]) j += 1;
next[i] = j;
}
return next;
}
int kmp(const char *s, const char *t) {
int len;
int *next = getNext(t, &len);
for (int i = 0, j = -1; s[i]; i++)
{
while (j != -1 && t[j + 1] != s[i])
{
j = next[j];
}
if (t[j + 1] == s[i]) j += 1;
if (t[j + 1] == '\0') {
free(next);
return i - len + 1;
}
}
free(next);
return -1;
}
int **getJump(int *next, const char *t, int n) {
int **jump = (int **)malloc(sizeof(int *) * n);
for (int i = 0; i < n; i++) jump[i] = (int *)malloc(sizeof(int) * 26);
jump += 1;
for (int i = 0; i < 26; i++) jump[-1][i] = -1;
jump[-1][t[0] - 'a'] = 0;
for (int i = 0, I = n - 1; i < I; i++)
{
for (int j = 0; j < 26; j++) jump[i][j] = jump[next[i]][j];
jump[i][t[i+1] - 'a'] = i + 1;
}
return jump;
}
int kmp_opt(const char *s, const char *t) {
int len;
int *next = getNext(t, &len);
int **jump = getJump(next, t, len);
for (int i = 0, j = -1; s[i]; i++)
{
j = jump[j][s[i] - 'a'];
if (j == len - 1) return i - len + 1;
}
free(next);
for(int i = 0; i < len; i++) free(jump[i-1]);
free(jump - 1);
return -1;
}
int sunday(const char *s, const char *t) {
int tlen = strlen(t), slen = strlen(s);
int jump[128] = {0};
for (int i = 0; i < 128; i++) jump[i] = tlen + 1;
for (int i = 0; t[i]; i++) jump[t[i]] = tlen - i;
for (int i = 0; i + tlen <= slen; )
{
if (brute_one_match(s + i, t)) return i;
i += jump[s[i + tlen]];
}
return -1;
}
int shift_and(const char *s, const char *t) {
int code[128] = {0}, n = 0;
for (; t[n]; n++) code[t[n]] |= (1 << n);
int p = 0;
for (int i = 0; s[i]; i++)
{
p = (p << 1 | 1) & code[s[i]];
if (p & (1 << (n - 1))) return i - n + 1;
}
return -1;
}
int test_func(const char *s, const char *t) {
return -1;
}
int main(int argc, char const *argv[])
{
while (~scanf("%s%s", s, t))
{
//TEST(brute_force);
//TEST(test_func);
TEST(hash_match);
TEST(kmp);
TEST(kmp_opt);
TEST(sunday);
}
return 0;
}