the trie & ac automata

trie tree

trie is a trie, can be understood as a word tree, the tree letters each edge, the labeled nodes represent root section to the letters of the word.
Here Insert Picture Description
Data structures: a two-dimensional array trie [maxn] [N], tire [u] [c] represents the number u of the tree to the parent node is connected to the edges of the word c son number.
Create a trie tree: Each time you add a word, if the current path has been established here to connect this word as a node on the side of the son go down, or create a new node.

Learning Link: On the Trie

Trie tree template

const int maxn=5e5+7;
const int N=26;
struct Tire{
    int trie[maxn][N],tot;
    bool book[maxn];
    void Init(){
        memset(trie,0,sizeof trie);
        memset(book,0,sizeof book);
        tot=0;
    }
    void Insert(string a){
        int u=0;
        for(int i=0;i<a.size();++i){
            int v=a[i]-'a';
            if(trie[u][v]==0){
               trie[u][v]=++tot;
            }
            u=trie[u][v];
        }
        book[u]=true;
    }
}test;

Example:
Phone List

#include<iostream>
#include<set>
#include<string.h>
#include<string>
#include<stdio.h>
using namespace std;
struct Tire{
    int trie[100100][11],tot;
    bool book[100100];
    bool Insert(string a){
        int u=0;
        bool f=false;
        for(int i=0;i<a.size();++i){
            int v=a[i]-'0';
            if(book[u]==true) f=true;
            if(trie[u][v]==0){
               trie[u][v]=++tot;
            }
            u=trie[u][v];
        }

        if(book[u]==true) f=true;
        book[u]=true;
        for(int i=0;i<=9;++i){
            if(trie[u][i]!=0) {
                f=true;
                break;
            }
        }
        return f;
    }
    void Clear(){
        memset(trie,0,sizeof trie);
        memset(book,0,sizeof book);
        tot=0;
    }
}test;
string str;

int main(){
    int T;
    scanf("%d",&T);
    while(T--){
        int n;
        test.Clear();
        scanf("%d",&n);
        bool ans=false;
        for(int i=1;i<=n;++i){
            cin>>str;
            if(test.Insert(str)){
                ans=true;
            }
        }
        if(ans) printf("NO\n");
        else printf("YES\n");
    }
    return 0;
}


ac automata

kmp single pattern string matching, ac automaton can run multiple string matching.
ac automaton fail to increase trie tree pointer, used to implement the current node mismatch, the node jumps to the same part of the same prefix and suffix matched.
Pointer is the real meaning of Fail: Fail If the pointer points to a point i j. The character string to the root to root j is a suffix i of the string.

For chestnut:

i: j. 4:. 7
the root i is the character string "ABC"
the root to j string "the BC"
"the BC" is the "ABC" is a suffix
so Fail pointer i to point j

Query: traversing a text string from the beginning, for each letter in the text string, the prefix tree constantly find the same node as part of the suffix currently matched.

Learning Link: AC automaton ultra-detailed explanation , AC automatic machine algorithm Comments (graphic) and templates

ac automatic machine template:

const int maxn=5e5+7;
const int N=26;
struct acAutomaton{
    int trie[maxn][N],cntword[maxn],fail[maxn],tot;
    void Clear(){
        memset(trie,0,sizeof trie);
        memset(cntword,0,sizeof cntword);
        memset(fail,0,sizeof fail);
        tot=0;
    }
    //创建字典树
    void insertWord(string str){
        int u=0;
        for(int i=0;i<str.length();++i){
            int v=str[i]-'a';
            if(trie[u][v]==0){
                trie[u][v]=++tot;
            }
            u=trie[u][v];
        }
        ++cntword[u];
    }
    void getFail(){
        queue<int> q;
        //将根节点存在的子节点扔进队列
        for(int i=0;i<26;++i){
            if(trie[0][i]){
                fail[trie[0][i]]=0;
                q.push(trie[0][i]);
            }
        }
        while(!q.empty()){
            int u=q.front();
            q.pop();
            for(int i=0;i<26;++i){
                if(trie[u][i]==0){
        //当前字母没有为i+'a'的子节点,将其子节点指向它fail指针的为i+'a'子节点,(根节点的fail指针指向自己)
        //因为当前后缀和fail的前缀相同,可以共用子节点
                    trie[u][i]=trie[fail[u]][i];
                }
                else {
        //为的i+'a'子节点的fail指针指向父节点fail指针为i+'a'的子节点
                    fail[trie[u][i]]=trie[fail[u]][i];
                    q.push(trie[u][i]);
                }
            }
        }
    }
    int query(string str){
        int u=0,ans=0;
        for(int i=0;i<str.size();++i){
       //对于一个字母,不停地在树上找前缀与当前已匹配部分的后缀相同的节点,但是注意u指针不变
       //当单词被计算过或fail不存在时结束
            u=trie[u][str[i]-'a'];
            for(int j=u;j&&cntword[j]!=-1;j=fail[j]){
                ans+=cntword[j];
                cntword[j]=-1;
            }
        }
        return ans;
    }
}Test;

Example:
P3808 [template] AC automaton (simple version)

#include<bits/stdc++.h>
using namespace std;
const int maxn=5e5+7;
struct acAutomaton{
    int trie[maxn][26],cntword[maxn],fail[maxn],tot;
    void Clear(){
        memset(trie,0,sizeof trie);
        memset(cntword,0,sizeof cntword);
        memset(fail,0,sizeof fail);
        tot=0;
    }
    //创建字典树
    void insertWord(string str){
        int u=0;
        for(int i=0;i<str.length();++i){
            int v=str[i]-'a';
            if(trie[u][v]==0){
                trie[u][v]=++tot;
            }
            u=trie[u][v];
        }
        ++cntword[u];
    }
    void getFail(){
        queue<int> q;
        //将根节点存在的子节点扔进队列
        for(int i=0;i<26;++i){
            if(trie[0][i]){
                fail[trie[0][i]]=0;
                q.push(trie[0][i]);
            }
        }
        while(!q.empty()){
            int u=q.front();
            q.pop();
            for(int i=0;i<26;++i){
                if(trie[u][i]==0){
        //当前字母没有为i+'a'的子节点,将其子节点指向它fail指针的为i+'a'子节点
        //因为当前后缀和fail的前缀相同
                    trie[u][i]=trie[fail[u]][i];
                }
                else {
        //为的i+'a'子节点的fail指针指向父节点fail指针为i+'a'的子节点
                    fail[trie[u][i]]=trie[fail[u]][i];
                    q.push(trie[u][i]);
                }
            }
        }
    }
    int query(string str){
        int u=0,ans=0;
        for(int i=0;i<str.size();++i){
            //对于一个字母,不停地在树上找与其后缀相同的串
            //当单词被计算过或fail不存在结束
            u=trie[u][str[i]-'a'];
            for(int j=u;j&&cntword[j]!=-1;j=fail[j]){
                ans+=cntword[j];
                cntword[j]=-1;
            }
        }
        return ans;
    }
}Test;
string str;
int main(){
    int n;
    cin>>n;
    Test.Clear();
    for(int i=1;i<=n;++i){
        cin>>str;
        Test.insertWord(str);
    }
    Test.getFail();
    cin>>str;
    cout<<Test.query(str)<<endl;

    return 0;
}
Published 96 original articles · won praise 11 · views 2263

Guess you like

Origin blog.csdn.net/weixin_43769146/article/details/104010583