AC自动机

算法简述

ac自动机是一类在字典树上进行kmp的算法。

学习ac自动机一般考点都在于对字典树上的点做处理,一般考的都是在自动机上的dp,而这类的dp中几乎必定有一维表示的是自动机上的点。

模板

#include<iostream>
#include<string.h>
#include<stdio.h>
#include<algorithm>
#include<queue>
using namespace std;
const int maxa = 500000;
const int cha = 26;
int n, m, k;
struct Tire{
    int next[maxa][cha], fail[maxa], end[maxa];
    int root, L;
    int newnode(){
        for(int i = 0; i < cha; i++){
            next[L][i] = -1;
        }
        end[L++] = 0;
        return L-1;
    }
    void init(){
        L = 0;
        root = newnode();
    }
    void insert(char buf[]){
        int len = strlen(buf);
        int now = root;
        for(int i = 0; i < len; i++){
            if(next[now][buf[i] - 'a'] == -1)
                next[now][buf[i]-'a'] = newnode();
            now = next[now][buf[i]-'a'];
            //printf("%d ", now);
        }//puts("");
        end[now] ++;
    }
    void build(){
        queue<int>Q;
        fail[root] = root;
        for(int i = 0; i < cha; i++){
            if(next[root][i] == -1)
                next[root][i] = root;
            else{
                fail[next[root][i]] = root;
                Q.push(next[root][i]);
            }
        }
        while(!Q.empty()){
            int now = Q.front();
            Q.pop();
           // end[now] |= end[fail[now]];
            for(int i = 0; i < cha; i++){
                if(next[now][i] == -1)
                    next[now][i] = next[fail[now]][i];
                else{
                    fail[next[now][i]] = next[fail[now]][i];
                    Q.push(next[now][i]);
                   // printf("**%d %d\n",next[now][i],next[fail[now]][i]);
                }
            }
        }
    }
    int solve(char *s){
        int ans = 0, k = 0;
        for(int i = 0; s[i]; i++){
            int t = s[i] - 'a';
            k = next[k][t];
            int j = k;
            while(j){
                ans += end[j];
                //if(end[j]) printf("%d ",j);
                end[j] = 0;
                j = fail[j];
            }//puts("");
        }
        return ans;
    }
};
char buf[1000005];
Tire ac;
int main(){
    int t, n;
    scanf("%d", &t);
    while(t--){
        scanf("%d", &n);
        ac.init();
        //memset(ac.end, 0, sizeof(ac.end));
        for(int i = 0; i < n; i++){
            scanf("%s", buf);
            ac.insert(buf);
        }
        ac.build();
        scanf("%s", buf);
        printf("%d\n", ac.solve(buf));
    }
}
/*
abcdefg
bcdefg
cdef
de
e
ssaabcdefg
*/

HDU 3247

题目

给出 个资源, 个病毒,将资源串拼接成一个串,必须包含所有的资源串,可以重叠,但是不能包含病毒。

思路

ac自动机预处理广搜出所有文本串到文本串的最短安全距离(即不通过病毒串)之后用状压dp求出答案。

难点

难点在于繁琐了一些,bfs加ac自动机加状压dp,不过知道每步都细心一点其实也没什么问题。
关键点,要对自动机了解的深入一些才能做出来,ac自动机往往需要对模板进行改动,只会套模板没什么用。

#include<iostream>
#include<string.h>
#include<stdio.h>
#include<algorithm>
#include<queue>
using namespace std;
const int maxa = 60000;
const int cha = 2;
int dp[10][1024];
int dis[10][maxa];
int point[10];
int n, m, k;
int leng[10];
struct Tire{
    int next[maxa][cha], fail[maxa], end[maxa];
    int root, L;
    int newnode(){
        for(int i = 0; i < cha; i++){
            next[L][i] = -1;
        }
        end[L++] = 0;
        return L-1;
    }
    void init(){
        L = 0;
        root = newnode();
    }
    int insert(char buf[], int ii){
        int len = strlen(buf);
        int now = root;
        for(int i = 0; i < len; i++){
            int x = buf[i] -'0';
            if(next[now][x] == -1)
                next[now][x] = newnode();
            now = next[now][x];
            //printf("%d ", now);
        }//puts("");
        end[now] = ii;
        return now;
    }
    void build(){//printf("build\n");
        queue<int>Q;
        fail[root] = root;
        for(int i = 0; i < cha; i++){
            if(next[root][i] == -1)
                next[root][i] = root;
            else{
                fail[next[root][i]] = root;
                Q.push(next[root][i]);
            }
        }
        while(!Q.empty()){
            int now = Q.front();
            Q.pop();
            end[now] |= end[fail[now]];
            for(int i = 0; i < cha; i++){
                if(next[now][i] == -1)
                    next[now][i] = next[fail[now]][i];
                else{
                    fail[next[now][i]] = next[fail[now]][i];
                    Q.push(next[now][i]);
                   // printf("**%d %d\n",next[now][i],next[fail[now]][i]);
                }
            }
        }
    }
    void bfs(int ii){//printf("bfs\n");
        queue<int> que;
        int vis[maxa];
        memset(vis, 0, sizeof(vis));
        que.push(point[ii]);
        vis[0] = 1;
        dis[ii][point[ii]] = 0;
        while(!que.empty()){
            int now = que.front(); que.pop();//printf("%d %d\n", dis[ii][now], now);
            for(int i =0; i < 2; i++){
                int Next = next[now][i];
                if(vis[Next] == 0 && end[Next] != -1){
                    dis[ii][Next] = dis[ii][now] +1;
                    vis[Next] = 1;
                    que.push(Next);
                }
                /*Next = fail[Next];
                while(Next){
                    if(vis[Next] == 0 && end[Next] != -1){
                        dis[ii][Next] = dis[ii][now] +1;
                        vis[Next] = 1;
                        que.push(Next);
                    }
                    Next = fail[Next];
                }*/
            }
        }
    }
    int solve(){
        memset(dis, -1, sizeof(dis));
        for(int i = 0;i < n; i++){
            bfs(i);
        }//printf("solve%d %d\n", dis[0][point[1]], dis[1][point[0]]);//printf("%d %d\n", point[0], point[1]);
        for(int i =0 ;i  < n; i++){
            for(int k = 0; k < (1<<n); k++){
                dp[i][k] = 10000000;
            }
        }
        for(int i = 0; i < n; i++){
            dp[i][(1<<i)] = leng[i];
        }
        for(int i =1 ;i < n; i++){
            for(int k = 0; k < n; k++){
                for(int j = 0; j < (1<<n); j++){
                    if(dp[k][j] < 10000000){
                        for(int h = 0; h < n; h++){
                            if(!(j&(1<<h)) && dp[k][j]!=-1){
                                dp[h][j|(1<<h)] = min(dp[h][j|(1<<h)], dp[k][j] + dis[k][point[h]]);
                            }
                        }
                    }
                }
            }
        }
            int ans = 10000000;
        for(int i = 0; i < n; i++){
            ans = min(ans, dp[i][(1<<n)-1]);
        }
        return ans;
    }
};
char buf[1000005];
Tire ac;
int main(){
    int m;
    while(scanf("%d%d", &n, &m), n+m){
        ac.init();
        for(int i =0 ;i  < n; i++){
            scanf("%s", buf);
            leng[i] = strlen(buf);
            point[i]=ac.insert(buf, 1+i);
        }
        for(int i =0; i < m; i++){
            scanf("%s", buf);
            ac.insert(buf, -1);
        }
        ac.build();
        printf("%d\n", ac.solve());
    }
}
/*
abcdefg
bcdefg
cdef
de
e
ssaabcdefg
*/

LightOJ 1427

题目大意

给一个长度小于 的字符串以及小于 500 个长度小于 500 的字符串,问每个字符串在大字符串中出现的次数。

分析

基础ac自动机问题,对模板做一些小修改就可以了。需要注意的是字符串有可能重复。
这是比较裸的思路,他的时间主要取决于主串的长度。

代码

#include<iostream>
#include<string.h>
#include<stdio.h>
#include<algorithm>
#include<queue>
using namespace std;
const int maxa = 500000;
const int cha = 26;
int n, m, k;
int ans[505];
struct Tire{
    int next[maxa][cha], fail[maxa], end[maxa];
    int root, L;
    int newnode(){
        for(int i = 0; i < cha; i++){
            next[L][i] = -1;
        }
        end[L++] = 0;
        return L
    }
    void init(){
        L = 0;
        root = newnode();
    }
    void insert(char buf[], int ii){
        int len = strlen(buf);
        int now = root;
        for(int i = 0; i < len; i++){
            if(next[now][buf[i] - 'a'] == -1)
                next[now][buf[i]-'a'] = newnode();
            now = next[now][buf[i]-'a'];
            //printf("%d ", now);
        }//puts("");
        ans[ii] = now;
    }
    void build(){
        queue<int>Q;
        fail[root] = root;
        for(int i = 0; i < cha; i++){
            if(next[root][i] == -1)
                next[root][i] = root;
            else{
                fail[next[root][i]] = root;
                Q.push(next[root][i]);
            }
        }
        while(!Q.empty()){
            int now = Q.front();
            Q.pop();
           // end[now] |= end[fail[now]];
            for(int i = 0; i < cha; i++){
                if(next[now][i] == -1)
                    next[now][i] = next[fail[now]][i];
                else{
                    fail[next[now][i]] = next[fail[now]][i];
                    Q.push(next[now][i]);
                   // printf("**%d %d\n",next[now][i],next[fail[now]][i]);
                }
            }
        }
    }
    void solve(char *s){
        //memset(ans, 0, sizeof(ans));
        int k = 0;
        for(int i = 0; s[i]; i++){
            int t = s[i] - 'a';
            k = next[k][t];
            int j = k;
            while(j){
                end[j]++;
                j = fail[j];
            }//puts("");
        }
        return ;
    }
};
char buf[1000005], buf1[1000005];
Tire ac;
int main(){
    int t, n;
    scanf("%d", &t);
    for(int cas = 1; cas <= t; cas++){
        scanf("%d", &n);
        ac.init();
        //memset(ac.end, 0, sizeof(ac.end));
        scanf("%s", buf1);
        for(int i = 0; i < n; i++){
            scanf("%s", buf);
            ac.insert(buf, i+1);
        }
        ac.build();
        ac.solve(buf1);
        printf("Case %d:\n", cas);
        for(int i = 1; i <= n; i++){
            printf("%d\n", ac.end[ans[i]]);
        }
    }
}
/*
abcdefg
bcdefg
cdef
de
e
ssaabcdefg
*/

这是改进过的,时间取决于树的规模,只考虑solve的过场的话时间至少优化掉四倍,从最终跑的时间来看也是这样的。

思路是在build树的时候将树上的每个节点按bfs的顺序存至qq数组中,遍历到树上某点的时候仅将以当前点为结尾的字符串数量加一,然后将qq数组逆序边历,遍历到j点的时候end[fail[j]] += end[j];

#include<iostream>
#include<string.h>
#include<stdio.h>
#include<algorithm>
#include<queue>
using namespace std;
const int maxa = 500000;
const int cha = 26;
int n, m, k;
int ans[505];
struct Tire{
    int next[maxa][cha], fail[maxa], end[maxa];
    int qq[maxa];
    int pos[maxa], tt;
    int root, L;
    int newnode(){
        for(int i = 0; i < cha; i++){
            next[L][i] = -1;
        }
        end[L++] = 0;
        return L-1;
    }
    void init(){
        tt = 0;
        L = 0;
        root = newnode();
    }
    void insert(char buf[], int ii){
        int len = strlen(buf);
        int now = root;
        for(int i = 0; i < len; i++){
            if(next[now][buf[i] - 'a'] == -1)
                next[now][buf[i]-'a'] = newnode();
            now = next[now][buf[i]-'a'];
            //printf("%d ", now);
        }//puts("");
        pos[ii] = now;
    }
    void build(){
        queue<int>Q;
        fail[root] = root;
        for(int i = 0; i < cha; i++){
            if(next[root][i] == -1)
                next[root][i] = root;
            else{
                qq[tt++] = next[root][i];
                fail[next[root][i]] = root;
                Q.push(next[root][i]);
            }
        }
        while(!Q.empty()){
            int now = Q.front();
            Q.pop();
           // end[now] |= end[fail[now]];
            for(int i = 0; i < cha; i++){
                if(next[now][i] == -1)
                    next[now][i] = next[fail[now]][i];
                else{
                    qq[tt++] = next[now][i];
                    fail[next[now][i]] = next[fail[now]][i];
                    Q.push(next[now][i]);
                   // printf("**%d %d\n",next[now][i],next[fail[now]][i]);
                }
            }
        }
    }
    void solve(char *s, int n){
        memset(ans, 0, sizeof(ans));
        int k = 0;
        for(int i = 0; s[i]; i++){
            int t = s[i] - 'a';
            k = next[k][t];
            end[k] ++;
        }
        for(int i = tt-1; i >= 0; i--){
            int j = qq[i];
                end[fail[j]] += end[j];
        }
        /*printf("*");
        for(int i = 0; i < L; i++){
            printf("%d ", end[i]);
        }puts("");*/
        for(int i = 1; i <= n; i++){
            printf("%d\n", end[pos[i]]);
        }
        return ;
    }
};
char buf[1000005], buf1[1000005];
Tire ac;
int main(){
    int t, n;
    scanf("%d", &t);
    for(int cas = 1; cas <= t; cas++){
        scanf("%d", &n);
        ac.init();
        //memset(ac.end, 0, sizeof(ac.end));
        scanf("%s", buf1);
        for(int i = 0; i < n; i++){
            scanf("%s", buf);
            ac.insert(buf, i+1);
        }
        ac.build();
        printf("Case %d:\n", cas);
        ac.solve(buf1, n);
    }
}
/*
abcdefg
bcdefg
cdef
de
e
ssaabcdefg
*/

results matching ""

    No results matching ""