后缀数组

后缀数组又被称为字符串处理神器;
http://blog.csdn.net/xymscau/article/details/8798046 这里讲的非常好
实现rank排名是用到了倍增法和一个比较神奇的计数排序,时间复杂度是

  • height[i]存放的是排名第i的后缀与排名第i-1的后缀的最长前缀
  • sa[i]存的是排名第i的后缀是第几位开头的
  • rk[i]存放第i个位置开头的后缀的字典序排名

做后缀数组的题基本上会套模板就行,但是要理解这几个数组的意义。

POJ 2774

题意:给出两串字符,要你找出在这两串字符中都出现过的最长子串。

思路:先用个分隔符将两个字符串连接起来,再用后缀数组求出height数组的值,找出sa[i]sa[i-1]分别在分割的两个字符串时最大的height值。

#include<iostream>
#include<string.h>
#include<stdio.h>
using namespace std;

#define rep(i,n) for(int i = 0;i < n; i++)
using namespace std;
const int size  = 200005,INF = 1<<30;
int rk[size],sa[size],height[size],w[size],wa[size],res[size];
void getSa (int len,int up) {
    int *k = rk,*id = height,*r = res, *cnt = wa;
    rep(i,up) cnt[i] = 0;
    rep(i,len) cnt[k[i] = w[i]]++;
    rep(i,up) cnt[i+1] += cnt[i];
    for(int i = len - 1; i >= 0; i--) {
        sa[--cnt[k[i]]] = i;
    }
    int d = 1,p = 0;
    while(p < len){
        for(int i = len - d; i < len; i++) id[p++] = i;
        rep(i,len)    if(sa[i] >= d) id[p++] = sa[i] - d;
        rep(i,len) r[i] = k[id[i]];
        rep(i,up) cnt[i] = 0;
        rep(i,len) cnt[r[i]]++;
        rep(i,up) cnt[i+1] += cnt[i];
        for(int i = len - 1; i >= 0; i--) {
            sa[--cnt[r[i]]] = id[i];
        }
        swap(k,r);
        p = 0;
        k[sa[0]] = p++;
        rep(i,len-1) {
            if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
                k[sa[i+1]] = p - 1;
            else k[sa[i+1]] = p++;
        }
        if(p >= len) return ;
        d *= 2,up = p, p = 0;
    }
}
void getHeight(int len) {
    rep(i,len) rk[sa[i]] = i;
    height[0] =  0;
    for(int i = 0,p = 0; i < len - 1; i++) {
        int j = sa[rk[i]-1];
        while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) {
            p++;
        }
        height[rk[i]] = p;
        p = max(0,p - 1);
    }
}
int getSuffix(char s[]) {
    int len = strlen(s),up = 0;
    for(int i = 0; i < len; i++) {
        w[i] = s[i];
        up = max(up,w[i]);
    }
    w[len++] = 0;
    getSa(len,up+1);
    getHeight(len);
    return len;
}const int maxa = 100000*2+1;
char str[maxa];
int main(){
    while(scanf("%s", str)!=EOF){
        int l = strlen(str);
        str[l] = ' ';
        scanf("%s", str+l+1);
        getSuffix(str);
        int ans = 0;
        int L = strlen(str);
        for(int i = 1;i < L; i++){
            if((sa[i-1] < l && sa[i] > l) || (sa[i-1] > l && sa[i] < l)){
                ans = max(ans, height[i]);
            }
        }
        printf("%d\n", ans);
    }
}   
/*
abcde
bcde
*/

POJ 1743

题意:给一串数字,求变化相同,且不重叠的最长字符串。

思路:变化相同就是将字符串s[i]变成s[i]-s[i-1]

那么再求后缀数组的话height[i]代表的是两个长度是height[i]+1变化相等,而如果s[i]s[j]间距是n的话那么他们在实际字符串中的间距也是n,所以如果两个地方的height最小值是n的话他们的间距应该是n+1才行。

二分答案的方法这里讲的很好http://blog.sina.com.cn/s/blog_6635898a0102e0me.html

#include<iostream>
#include<string.h>
#include<stdio.h>
using namespace std;

#define rep(i,n) for(int i = 0;i < n; i++)
using namespace std;
const int size  = 200005,INF = 1<<30;
int rk[size],sa[size],height[size],w[size],wa[size],res[size];
void getSa (int len,int up) {
    int *k = rk,*id = height,*r = res, *cnt = wa;
    rep(i,up) cnt[i] = 0;
    rep(i,len) cnt[k[i] = w[i]]++;
    rep(i,up) cnt[i+1] += cnt[i];
    for(int i = len - 1; i >= 0; i--) {
        sa[--cnt[k[i]]] = i;
    }
    int d = 1,p = 0;
    while(p < len){
        for(int i = len - d; i < len; i++) id[p++] = i;
        rep(i,len)    if(sa[i] >= d) id[p++] = sa[i] - d;
        rep(i,len) r[i] = k[id[i]];
        rep(i,up) cnt[i] = 0;
        rep(i,len) cnt[r[i]]++;
        rep(i,up) cnt[i+1] += cnt[i];
        for(int i = len - 1; i >= 0; i--) {
            sa[--cnt[r[i]]] = id[i];
        }
        swap(k,r);
        p = 0;
        k[sa[0]] = p++;
        rep(i,len-1) {
            if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
                k[sa[i+1]] = p - 1;
            else k[sa[i+1]] = p++;
        }
        if(p >= len) return ;
        d *= 2,up = p, p = 0;
    }
}
void getHeight(int len) {
    rep(i,len) rk[sa[i]] = i;
    height[0] =  0;
    for(int i = 0,p = 0; i < len - 1; i++) {
        int j = sa[rk[i]-1];
        while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) {
            p++;
        }
        height[rk[i]] = p;
        p = max(0,p - 1);
    }
}
int getSuffix(int s[], int n) {
    int len = n,up = 0;
    /*for(int i = 0;i  < len; i++){
        printf("%d ", s[i]);
    }puts("");*/
    for(int i = 0; i < len; i++) {
        w[i] = s[i];
        up = max(up,w[i]);
    }
    w[len++] = 0;
    getSa(len,up+1);
    getHeight(len);
    return len;
}const int maxa = 100000*2+1;
int str[maxa];
int a[maxa];
int judge(int ans, int n){
    int l = sa[0], r = sa[0];
    for(int i = 0;i <= n; i++){
        if(height[i] >= ans){
            l = min(l, sa[i]);
            r = max(r, sa[i]);
            if(r - l > ans)
                return 1;
        }
        else{
            l = r = sa[i];
        }
    }
    return 0;
}
int main(){
    int n;
    while(scanf("%d", &n)!=EOF){
        if(n == 0)return 0;
        for(int i = 0; i < n; i++){
            scanf("%d", &a[i]);
        }
        /*a[n] = a[n-1];
        n++;*/
        for(int i = 0; i < n-1; i++){
            str[i] = a[i+1] - a[i] + 100;
        }
        str[n-1] = 0;
        getSuffix(str, n-1);
        int l = 0, r = n-1;
        while(l < r){
            int mid = (l+r) / 2;
            if(judge(mid, n-1)) l = mid+1;
            else r = mid ;
        }
        //printf("%d\n" , l);
        if(l < 5){
            printf("0\n");
        }else{
            printf("%d\n", l);
        }
    }
}
/*
abcde
bcde
*/

results matching ""

    No results matching ""