Posted 2021-02-26Updated 2021-06-108 minutes read (About 1196 words)

KMP and Suffix Structure

感觉自己像个宝崽，不会的可真多

Prefix Function (Next or Fail or whatever the same name)

KMP里用到的z[i]（prefix function，z function和prefix function可以互相转换，名字借用一下）表示字符串0…i最长相同前后缀（看了很多grandmaster代码，所有人的习惯都不一样，天知道都是怎么想的=_+，我选择保持下标一致，z[i]表示最长长度)。

通过递推，用z[0]~z[i]算出$z[i+1]$，主要思想是第i+1个字母能不能和“之前已经匹配上的最长串的下一个字母”相匹配。如果不行，递归退回次长的串，再尝试匹配。这里有个很奇妙的性质，对于当前的串0~i，最长前后匹配串长度为$z[i]$，次长为$z[z[i] - 1]$…这个套娃背后的原理是：

我们已经知道当前串前后能匹配的最长长度，前后这一段是相同的，因此次长的匹配串应该是0~z[i]这串的前后匹配
而0~z[i]串最长的前后匹配，就是$z[z[i] - 1]$ （减掉的1是保持下标一致，这样很丑，但是选择去掉的写法，z[i]的含义就改变了，其他地方就会变丑，最好的办法应该是让string下标从1开始，那又是另外一个麻烦了=_=）

vector<int> calc_z(string s){
  vector<int> z(s.size());
	int j = 0;
  for(int i=1; i<n; i++){
    while(j>0 && s[i] != s[j]) j = z[j-1];
    if(s[i] == s[j]) j++;
    z[i] = j;
  }
  return z;
}

Exercise

CF536B

题意是给定的串p在原串s中出现的位置，求可能的s数量。

想法是从头到尾填空，如果前后有冲突，那答案是0，没冲突答案就是$26^{uncovered}$，uncovered是原串中放空的部分，可以随便填26个字母。直接做会超时，利用z function算出串p的最长匹配z[p], 次长匹配z[z[p] - 1]… 这样就可以在O(1)的时间内判断交叉k个字母有没有冲突：如果k等于 ${z[p], z[z[p] - 1], …}$ 的其中一个，那么没有冲突，前k个字母和后k个字母相同。

这个判断可以用set进行，也可以用bool array标记 vis[z[p]] = true。 bool array会快一点。

#include <bits/stdc++.h>
using namespace std;
#define ll long long
#define rep(i,a,n) for(int i=(a);i<(n);i++)
#define per(i,n,a) for(int i=(n);i>=(a);i--)
const int MOD = 1e9+7;
const int INF = 1e9;
const int N = 1e6+5;

int nxt[N];
int y[N];
int main(){
    ios::sync_with_stdio(false);
    cin.tie(0);

    int n,m;
    cin>>n>>m;
    string p;
    cin>>p;
    int l=p.length();

    if(m==0){
        ll z=1;
        rep(i,0,n)z=(z*26)%MOD;
        cout<<z;
        return 0;
    }

    rep(i,0,m)cin>>y[i],y[i]--;

    int j=0;
    rep(i,1,l){
        while(j>0&&p[i]!=p[j])j=nxt[j-1];
        if(p[i]==p[j])j++;
        nxt[i]=j;
    }

    set<int> s;
    while(j>0){
        s.insert(j);
        j=nxt[j-1];
    }
    
    int co=0, idx=0;
    rep(i,0,n){
        if(y[idx]==i){
            if(idx>0&&(y[idx-1]+l-1>=i)&&!s.count(y[idx-1]+l-i)){
                cout<<"0";
                return 0;
            }
            idx++;
        }else{
            if(idx==0&&y[idx]>i)co++;
            else if(y[idx-1]+l-1<i)co++;
        }
    }

    ll ans=1;
    rep(i,0,co){
        ans=(ans*26)%MOD;
    }
    cout<<ans;

    return 0;
}

CF126B

题意是找一个最长的子串，使其是prefix，suffix，还要出现在中间。

想法是这个子串如果存在，那么他得是s的最长前后匹配串，或者是次长串，或者是次次长串… 这样才能满足既是prefix，又是suffix。然后要验证这个串是否在中间出现过，就验证这个串的长度m存在于集合${z[1],z[2],..z[n-2]}$之中。可以用set做，也可以用数组标记，测试了下，set会慢很多（set是$O(logn)$毕竟）。

#include <bits/stdc++.h>
using namespace std;
#define ll long long
#define rep(i,a,n) for(int i=a;i<n;i++)
#define per(i,n,a) for(int i=n;i>=a;i--)
const int MOD = 1e9+7;
const int INF = 1e9;
const int N = 1e6+5;

int p[N];
int vis[N];
int main(){
    ios::sync_with_stdio(false);
    cin.tie(0);

    string s;
    cin>>s;
    int n=s.length();
    if(n<=2)cout<<"Just a legend";
    else{
        int j=0;
        rep(i,1,n){
            while(j>0&&s[i]!=s[j])j=p[j-1];
            if(s[i]==s[j])j++;
            p[i]=j;
            if(i!=n-1)vis[j]=1;
        }

        int m=p[n-1];
        while(m!=0){
            if(vis[m]){
                rep(i,0,m)cout<<s[i];
                return 0;
            }
            m=p[m-1];
        }
        cout<<"Just a legend";
    }

    return 0;
}

CF1326D2

题意是从前缀+后缀中找一个最长的回文串，不重叠。

想法是前后能匹配的部分拿的越长越好，肯定是最优的（假设有另外一个最优的回文串左边拿的不是最长的，那么可以去掉后缀第一个字母，加上前缀后面一个字母，他还是最优的）。中间的部分就要从子串的前缀和后缀找一个最长的回文串。有manacher这种专门算法在，但是现在还不需要，因为只需要以前缀或者后缀为开头的回文串。可以计算$s=s+”@”+reverse(s)$的Z function，那么s.substr(0,z[s.length() - 1])就是最长回文串。

#include <bits/stdc++.h>
using namespace std;
#define ll long long
#define rep(i,a,n) for(int i=(a);i<(n);i++)
#define per(i,n,a) for(int i=(n);i>=(a);i--)
const int MOD = 1e9+7;
const int INF = 1e9;
const int N = 100;

string calc(string rs){
    string s=rs;reverse(s.begin(),s.end());
    s=rs+"!"+s;

    int j=0;
    vector<int> z(s.length());
    rep(i,1,s.length()){
        while(j>0&&s[i]!=s[j])j=z[j-1];
        if(s[i]==s[j])j++;
        z[i]=j;
    }

    return s.substr(0,j);
}

int main(){
    ios::sync_with_stdio(false);
    cin.tie(0);

    int t;cin>>t;
    while(t--){
        string s;cin>>s;

        int l=s.length(),k=0;
        rep(i,0,l){
            if(s[i]!=s[l-1-i]||i>=l-1-i)break;
            k=i+1;
        }

        string sp=s.substr(k,l-2*k);
        string a=calc(sp);
        reverse(sp.begin(),sp.end());
        string b=calc(sp);

        cout<<s.substr(0,k)<<(a.length()>b.length()?a:b)<<s.substr(l-k)<<endl;
    }
    
    return 0;
}

CF471D

题意：在一串数字中找匹配的子串

将输入差分以后，KMP找匹配次数。匹配的办法是把text和pattern组合，$s=pattern+“！”+text$，然后算t的z function。因为“！”不会在pattern和text出现，所以z的中的最大值只能是pattern的长度，z[i]==text.length()就说明这个pattern出现了一次（这个方法找到的匹配是有重合的）。

#include <bits/stdc++.h>
using namespace std;
#define ll long long
#define rep(i,a,n) for(int i=(a);i<(n);i++)
#define per(i,n,a) for(int i=(n);i>=(a);i--)
const int MOD = 1e9+7;
const int INF = 1e9;
const int N = 2e5+5;

int aa[N],bb[N];
int z[2*N];
int main(){
    ios::sync_with_stdio(false);
    cin.tie(0);

    int n,w;
    cin>>n>>w;

    vector<int> a,b,ab;
    int tp;
    rep(i,0,n)cin>>aa[i];
    rep(i,0,w)cin>>bb[i];
    rep(i,1,n)a.push_back(aa[i]-aa[i-1]);
    rep(i,1,w)b.push_back(bb[i]-bb[i-1]);

    ab.insert(ab.begin(),b.begin(),b.end());
    ab.push_back(MOD);
    ab.insert(ab.end(),a.begin(),a.end());

    int j=0;
    ll ans=0;
    rep(i,1,ab.size()){
        while(j>0&&ab[i]!=ab[j])j=z[j-1];
        if(ab[i]==ab[j])j++;
        z[i]=j;

        if(z[i]==w-1)ans++;
    }

    if(w==1)ans++;
    cout<<ans;
    return 0;
}

KMP and Suffix Structure

https://nyte-bk201.github.io/2021/02/25/KMP and Suffix Structure/

Author

nyte

Posted on

2021-02-26

Updated on

2021-06-10

Licensed under

#acm string matching

KMP and Suffix Structure

Prefix Function (Next or Fail or whatever the same name)

Exercise

Author

Posted on

Updated on

Licensed under

Catalogue

Tags

Recents