AC自动机
算法简述
ac自动机是一类在字典树上进行kmp的算法。
学习ac自动机一般考点都在于对字典树上的点做处理,一般考的都是在自动机上的dp,而这类的dp中几乎必定有一维表示的是自动机上的点。
模板
#include<iostream>
#include<string.h>
#include<stdio.h>
#include<algorithm>
#include<queue>
using namespace std;
const int maxa = 500000;
const int cha = 26;
int n, m, k;
struct Tire{
int next[maxa][cha], fail[maxa], end[maxa];
int root, L;
int newnode(){
for(int i = 0; i < cha; i++){
next[L][i] = -1;
}
end[L++] = 0;
return L-1;
}
void init(){
L = 0;
root = newnode();
}
void insert(char buf[]){
int len = strlen(buf);
int now = root;
for(int i = 0; i < len; i++){
if(next[now][buf[i] - 'a'] == -1)
next[now][buf[i]-'a'] = newnode();
now = next[now][buf[i]-'a'];
//printf("%d ", now);
}//puts("");
end[now] ++;
}
void build(){
queue<int>Q;
fail[root] = root;
for(int i = 0; i < cha; i++){
if(next[root][i] == -1)
next[root][i] = root;
else{
fail[next[root][i]] = root;
Q.push(next[root][i]);
}
}
while(!Q.empty()){
int now = Q.front();
Q.pop();
// end[now] |= end[fail[now]];
for(int i = 0; i < cha; i++){
if(next[now][i] == -1)
next[now][i] = next[fail[now]][i];
else{
fail[next[now][i]] = next[fail[now]][i];
Q.push(next[now][i]);
// printf("**%d %d\n",next[now][i],next[fail[now]][i]);
}
}
}
}
int solve(char *s){
int ans = 0, k = 0;
for(int i = 0; s[i]; i++){
int t = s[i] - 'a';
k = next[k][t];
int j = k;
while(j){
ans += end[j];
//if(end[j]) printf("%d ",j);
end[j] = 0;
j = fail[j];
}//puts("");
}
return ans;
}
};
char buf[1000005];
Tire ac;
int main(){
int t, n;
scanf("%d", &t);
while(t--){
scanf("%d", &n);
ac.init();
//memset(ac.end, 0, sizeof(ac.end));
for(int i = 0; i < n; i++){
scanf("%s", buf);
ac.insert(buf);
}
ac.build();
scanf("%s", buf);
printf("%d\n", ac.solve(buf));
}
}
/*
abcdefg
bcdefg
cdef
de
e
ssaabcdefg
*/
HDU 3247
题目
给出 个资源, 个病毒,将资源串拼接成一个串,必须包含所有的资源串,可以重叠,但是不能包含病毒。
思路
用ac自动机预处理广搜出所有文本串到文本串的最短安全距离(即不通过病毒串)之后用状压dp求出答案。
难点
难点在于繁琐了一些,bfs加ac自动机加状压dp,不过知道每步都细心一点其实也没什么问题。
关键点,要对自动机了解的深入一些才能做出来,ac自动机往往需要对模板进行改动,只会套模板没什么用。
#include<iostream>
#include<string.h>
#include<stdio.h>
#include<algorithm>
#include<queue>
using namespace std;
const int maxa = 60000;
const int cha = 2;
int dp[10][1024];
int dis[10][maxa];
int point[10];
int n, m, k;
int leng[10];
struct Tire{
int next[maxa][cha], fail[maxa], end[maxa];
int root, L;
int newnode(){
for(int i = 0; i < cha; i++){
next[L][i] = -1;
}
end[L++] = 0;
return L-1;
}
void init(){
L = 0;
root = newnode();
}
int insert(char buf[], int ii){
int len = strlen(buf);
int now = root;
for(int i = 0; i < len; i++){
int x = buf[i] -'0';
if(next[now][x] == -1)
next[now][x] = newnode();
now = next[now][x];
//printf("%d ", now);
}//puts("");
end[now] = ii;
return now;
}
void build(){//printf("build\n");
queue<int>Q;
fail[root] = root;
for(int i = 0; i < cha; i++){
if(next[root][i] == -1)
next[root][i] = root;
else{
fail[next[root][i]] = root;
Q.push(next[root][i]);
}
}
while(!Q.empty()){
int now = Q.front();
Q.pop();
end[now] |= end[fail[now]];
for(int i = 0; i < cha; i++){
if(next[now][i] == -1)
next[now][i] = next[fail[now]][i];
else{
fail[next[now][i]] = next[fail[now]][i];
Q.push(next[now][i]);
// printf("**%d %d\n",next[now][i],next[fail[now]][i]);
}
}
}
}
void bfs(int ii){//printf("bfs\n");
queue<int> que;
int vis[maxa];
memset(vis, 0, sizeof(vis));
que.push(point[ii]);
vis[0] = 1;
dis[ii][point[ii]] = 0;
while(!que.empty()){
int now = que.front(); que.pop();//printf("%d %d\n", dis[ii][now], now);
for(int i =0; i < 2; i++){
int Next = next[now][i];
if(vis[Next] == 0 && end[Next] != -1){
dis[ii][Next] = dis[ii][now] +1;
vis[Next] = 1;
que.push(Next);
}
/*Next = fail[Next];
while(Next){
if(vis[Next] == 0 && end[Next] != -1){
dis[ii][Next] = dis[ii][now] +1;
vis[Next] = 1;
que.push(Next);
}
Next = fail[Next];
}*/
}
}
}
int solve(){
memset(dis, -1, sizeof(dis));
for(int i = 0;i < n; i++){
bfs(i);
}//printf("solve%d %d\n", dis[0][point[1]], dis[1][point[0]]);//printf("%d %d\n", point[0], point[1]);
for(int i =0 ;i < n; i++){
for(int k = 0; k < (1<<n); k++){
dp[i][k] = 10000000;
}
}
for(int i = 0; i < n; i++){
dp[i][(1<<i)] = leng[i];
}
for(int i =1 ;i < n; i++){
for(int k = 0; k < n; k++){
for(int j = 0; j < (1<<n); j++){
if(dp[k][j] < 10000000){
for(int h = 0; h < n; h++){
if(!(j&(1<<h)) && dp[k][j]!=-1){
dp[h][j|(1<<h)] = min(dp[h][j|(1<<h)], dp[k][j] + dis[k][point[h]]);
}
}
}
}
}
}
int ans = 10000000;
for(int i = 0; i < n; i++){
ans = min(ans, dp[i][(1<<n)-1]);
}
return ans;
}
};
char buf[1000005];
Tire ac;
int main(){
int m;
while(scanf("%d%d", &n, &m), n+m){
ac.init();
for(int i =0 ;i < n; i++){
scanf("%s", buf);
leng[i] = strlen(buf);
point[i]=ac.insert(buf, 1+i);
}
for(int i =0; i < m; i++){
scanf("%s", buf);
ac.insert(buf, -1);
}
ac.build();
printf("%d\n", ac.solve());
}
}
/*
abcdefg
bcdefg
cdef
de
e
ssaabcdefg
*/
LightOJ 1427
题目大意
给一个长度小于 的字符串以及小于 500 个长度小于 500 的字符串,问每个字符串在大字符串中出现的次数。
分析
基础ac自动机问题,对模板做一些小修改就可以了。需要注意的是字符串有可能重复。
这是比较裸的思路,他的时间主要取决于主串的长度。
代码
#include<iostream>
#include<string.h>
#include<stdio.h>
#include<algorithm>
#include<queue>
using namespace std;
const int maxa = 500000;
const int cha = 26;
int n, m, k;
int ans[505];
struct Tire{
int next[maxa][cha], fail[maxa], end[maxa];
int root, L;
int newnode(){
for(int i = 0; i < cha; i++){
next[L][i] = -1;
}
end[L++] = 0;
return L
}
void init(){
L = 0;
root = newnode();
}
void insert(char buf[], int ii){
int len = strlen(buf);
int now = root;
for(int i = 0; i < len; i++){
if(next[now][buf[i] - 'a'] == -1)
next[now][buf[i]-'a'] = newnode();
now = next[now][buf[i]-'a'];
//printf("%d ", now);
}//puts("");
ans[ii] = now;
}
void build(){
queue<int>Q;
fail[root] = root;
for(int i = 0; i < cha; i++){
if(next[root][i] == -1)
next[root][i] = root;
else{
fail[next[root][i]] = root;
Q.push(next[root][i]);
}
}
while(!Q.empty()){
int now = Q.front();
Q.pop();
// end[now] |= end[fail[now]];
for(int i = 0; i < cha; i++){
if(next[now][i] == -1)
next[now][i] = next[fail[now]][i];
else{
fail[next[now][i]] = next[fail[now]][i];
Q.push(next[now][i]);
// printf("**%d %d\n",next[now][i],next[fail[now]][i]);
}
}
}
}
void solve(char *s){
//memset(ans, 0, sizeof(ans));
int k = 0;
for(int i = 0; s[i]; i++){
int t = s[i] - 'a';
k = next[k][t];
int j = k;
while(j){
end[j]++;
j = fail[j];
}//puts("");
}
return ;
}
};
char buf[1000005], buf1[1000005];
Tire ac;
int main(){
int t, n;
scanf("%d", &t);
for(int cas = 1; cas <= t; cas++){
scanf("%d", &n);
ac.init();
//memset(ac.end, 0, sizeof(ac.end));
scanf("%s", buf1);
for(int i = 0; i < n; i++){
scanf("%s", buf);
ac.insert(buf, i+1);
}
ac.build();
ac.solve(buf1);
printf("Case %d:\n", cas);
for(int i = 1; i <= n; i++){
printf("%d\n", ac.end[ans[i]]);
}
}
}
/*
abcdefg
bcdefg
cdef
de
e
ssaabcdefg
*/
这是改进过的,时间取决于树的规模,只考虑solve的过场的话时间至少优化掉四倍,从最终跑的时间来看也是这样的。
思路是在build树的时候将树上的每个节点按bfs的顺序存至qq数组中,遍历到树上某点的时候仅将以当前点为结尾的字符串数量加一,然后将qq数组逆序边历,遍历到j点的时候end[fail[j]] += end[j];
#include<iostream>
#include<string.h>
#include<stdio.h>
#include<algorithm>
#include<queue>
using namespace std;
const int maxa = 500000;
const int cha = 26;
int n, m, k;
int ans[505];
struct Tire{
int next[maxa][cha], fail[maxa], end[maxa];
int qq[maxa];
int pos[maxa], tt;
int root, L;
int newnode(){
for(int i = 0; i < cha; i++){
next[L][i] = -1;
}
end[L++] = 0;
return L-1;
}
void init(){
tt = 0;
L = 0;
root = newnode();
}
void insert(char buf[], int ii){
int len = strlen(buf);
int now = root;
for(int i = 0; i < len; i++){
if(next[now][buf[i] - 'a'] == -1)
next[now][buf[i]-'a'] = newnode();
now = next[now][buf[i]-'a'];
//printf("%d ", now);
}//puts("");
pos[ii] = now;
}
void build(){
queue<int>Q;
fail[root] = root;
for(int i = 0; i < cha; i++){
if(next[root][i] == -1)
next[root][i] = root;
else{
qq[tt++] = next[root][i];
fail[next[root][i]] = root;
Q.push(next[root][i]);
}
}
while(!Q.empty()){
int now = Q.front();
Q.pop();
// end[now] |= end[fail[now]];
for(int i = 0; i < cha; i++){
if(next[now][i] == -1)
next[now][i] = next[fail[now]][i];
else{
qq[tt++] = next[now][i];
fail[next[now][i]] = next[fail[now]][i];
Q.push(next[now][i]);
// printf("**%d %d\n",next[now][i],next[fail[now]][i]);
}
}
}
}
void solve(char *s, int n){
memset(ans, 0, sizeof(ans));
int k = 0;
for(int i = 0; s[i]; i++){
int t = s[i] - 'a';
k = next[k][t];
end[k] ++;
}
for(int i = tt-1; i >= 0; i--){
int j = qq[i];
end[fail[j]] += end[j];
}
/*printf("*");
for(int i = 0; i < L; i++){
printf("%d ", end[i]);
}puts("");*/
for(int i = 1; i <= n; i++){
printf("%d\n", end[pos[i]]);
}
return ;
}
};
char buf[1000005], buf1[1000005];
Tire ac;
int main(){
int t, n;
scanf("%d", &t);
for(int cas = 1; cas <= t; cas++){
scanf("%d", &n);
ac.init();
//memset(ac.end, 0, sizeof(ac.end));
scanf("%s", buf1);
for(int i = 0; i < n; i++){
scanf("%s", buf);
ac.insert(buf, i+1);
}
ac.build();
printf("Case %d:\n", cas);
ac.solve(buf1, n);
}
}
/*
abcdefg
bcdefg
cdef
de
e
ssaabcdefg
*/