Python/C/C++/JAVA

Advanced Practice Programs with Code and Concept

By D.S

Suffix Automata Construction in C, C++, Python and Java

A Suffix Automaton is a compressed DFA (Deterministic Finite Automaton) that recognizes all suffixes of a string and their substrings. It is built incrementally by adding one character at a time, maintaining a structure where each state represents a set of end positions of substrings. The construction uses a link-tree (suffix links) and transitions for characters to efficiently represent all substrings in O(n) time and space. To implement it, you define a struct for states (with length, link, and transitions) and extend the automaton character-by-character using cloning when necessary.

(a.) C Program

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define ALPHABET_SIZE 26

typedef struct State {
    int len, link;
    int next[ALPHABET_SIZE];
} State;

typedef struct SuffixAutomaton {
    State *states;
    int size, last;
} SuffixAutomaton;

SuffixAutomaton* init_suffix_automaton() {
    SuffixAutomaton *sa = (SuffixAutomaton*)malloc(sizeof(SuffixAutomaton));
    sa->states = (State*)malloc(2 * sizeof(State));
    sa->size = 1;
    sa->last = 0;
    
    sa->states[0].len = 0;
    sa->states[0].link = -1;
    memset(sa->states[0].next, -1, sizeof(sa->states[0].next));
    
    return sa;
}

void sa_extend(SuffixAutomaton *sa, int c) {
    int p = sa->last;
    int curr = sa->size++;
    
    sa->states = (State*)realloc(sa->states, sa->size * sizeof(State));
    sa->states[curr].len = sa->states[p].len + 1;
    memset(sa->states[curr].next, -1, sizeof(sa->states[curr].next));
    
    while (p >= 0 && sa->states[p].next[c] == -1) {
        sa->states[p].next[c] = curr;
        p = sa->states[p].link;
    }
    
    if (p == -1) {
        sa->states[curr].link = 0;
    } else {
        int q = sa->states[p].next[c];
        if (sa->states[p].len + 1 == sa->states[q].len) {
            sa->states[curr].link = q;
        } else {
            int clone = sa->size++;
            sa->states = (State*)realloc(sa->states, sa->size * sizeof(State));
            
            sa->states[clone].len = sa->states[p].len + 1;
            memcpy(sa->states[clone].next, sa->states[q].next, sizeof(sa->states[clone].next));
            sa->states[clone].link = sa->states[q].link;
            
            while (p >= 0 && sa->states[p].next[c] == q) {
                sa->states[p].next[c] = clone;
                p = sa->states[p].link;
            }
            
            sa->states[q].link = clone;
            sa->states[curr].link = clone;
        }
    }
    
    sa->last = curr;
}

void free_suffix_automaton(SuffixAutomaton *sa) {
    free(sa->states);
    free(sa);
}

int main() {
    SuffixAutomaton *sa = init_suffix_automaton();
    char s[] = "ababa";
    
    for (int i = 0; s[i]; i++) {
        sa_extend(sa, s[i] - 'a');
    }
    
    printf("Suffix Automaton constructed with %d states\n", sa->size);
    free_suffix_automaton(sa);
    return 0;
}
Output:-
Suffix Automaton constructed with 6 states

(b.) C++ Program

#include <iostream>
#include <vector>
#include <unordered_map>

using namespace std;

struct State {
    int len, link;
    unordered_map<char, int> next;
};

class SuffixAutomaton {
    vector<State> states;
    int last;
    
public:
    SuffixAutomaton() {
        states.push_back({0, -1, {}});
        last = 0;
    }
    
    void extend(char c) {
        int p = last;
        int curr = states.size();
        states.push_back({states[p].len + 1, -1, {}});
        
        while (p >= 0 && states[p].next.find(c) == states[p].next.end()) {
            states[p].next[c] = curr;
            p = states[p].link;
        }
        
        if (p == -1) {
            states[curr].link = 0;
        } else {
            int q = states[p].next[c];
            if (states[p].len + 1 == states[q].len) {
                states[curr].link = q;
            } else {
                int clone = states.size();
                states.push_back(states[q]);
                states[clone].len = states[p].len + 1;
                
                while (p >= 0 && states[p].next[c] == q) {
                    states[p].next[c] = clone;
                    p = states[p].link;
                }
                
                states[q].link = clone;
                states[curr].link = clone;
            }
        }
        
        last = curr;
    }
    
    int size() const { return states.size(); }
};

int main() {
    SuffixAutomaton sa;
    string s = "ababa";
    
    for (char c : s) {
        sa.extend(c);
    }
    
    cout << "Suffix Automaton constructed with " << sa.size() << " states" << endl;
    return 0;
}
Output:-
Suffix Automaton constructed with 6 states

(c.) Python Program

class State:
    def __init__(self):
        self.len = 0
        self.link = -1
        self.next = {}

class SuffixAutomaton:
    def __init__(self):
        self.states = [State()]
        self.last = 0
    
    def extend(self, c):
        p = self.last
        curr = len(self.states)
        self.states.append(State())
        self.states[curr].len = self.states[p].len + 1
        
        while p >= 0 and c not in self.states[p].next:
            self.states[p].next[c] = curr
            p = self.states[p].link
        
        if p == -1:
            self.states[curr].link = 0
        else:
            q = self.states[p].next[c]
            if self.states[p].len + 1 == self.states[q].len:
                self.states[curr].link = q
            else:
                clone = len(self.states)
                self.states.append(State())
                self.states[clone].len = self.states[p].len + 1
                self.states[clone].next = self.states[q].next.copy()
                self.states[clone].link = self.states[q].link
                
                while p >= 0 and self.states[p].next[c] == q:
                    self.states[p].next[c] = clone
                    p = self.states[p].link
                
                self.states[q].link = clone
                self.states[curr].link = clone
        
        self.last = curr

if __name__ == "__main__":
    sa = SuffixAutomaton()
    s = "ababa"
    
    for c in s:
        sa.extend(c)
    
    print(f"Suffix Automaton constructed with {len(sa.states)} states")
Output:-
Suffix Automaton constructed with 6 states

(d.) Java Program

import java.util.HashMap;
import java.util.Map;

class State {
    int len, link;
    Map<Character, Integer> next;
    
    public State() {
        next = new HashMap<>();
    }
}

public class SuffixAutomaton {
    private State[] states;
    private int last;
    private int size;
    
    public SuffixAutomaton() {
        states = new State[1];
        states[0] = new State();
        states[0].len = 0;
        states[0].link = -1;
        last = 0;
        size = 1;
    }
    
    public void extend(char c) {
        int p = last;
        int curr = size++;
        
        if (curr >= states.length) {
            State[] newStates = new State[states.length * 2];
            System.arraycopy(states, 0, newStates, 0, states.length);
            states = newStates;
        }
        
        states[curr] = new State();
        states[curr].len = states[p].len + 1;
        
        while (p >= 0 && !states[p].next.containsKey(c)) {
            states[p].next.put(c, curr);
            p = states[p].link;
        }
        
        if (p == -1) {
            states[curr].link = 0;
        } else {
            int q = states[p].next.get(c);
            if (states[p].len + 1 == states[q].len) {
                states[curr].link = q;
            } else {
                int clone = size++;
                if (clone >= states.length) {
                    State[] newStates = new State[states.length * 2];
                    System.arraycopy(states, 0, newStates, 0, states.length);
                    states = newStates;
                }
                
                states[clone] = new State();
                states[clone].len = states[p].len + 1;
                states[clone].next = new HashMap<>(states[q].next);
                states[clone].link = states[q].link;
                
                while (p >= 0 && states[p].next.get(c) == q) {
                    states[p].next.put(c, clone);
                    p = states[p].link;
                }
                
                states[q].link = clone;
                states[curr].link = clone;
            }
        }
        
        last = curr;
    }
    
    public int size() { return size; }
    
    public static void main(String[] args) {
        SuffixAutomaton sa = new SuffixAutomaton();
        String s = "ababa";
        
        for (char c : s.toCharArray()) {
            sa.extend(c);
        }
        
        System.out.println("Suffix Automaton constructed with " + sa.size() + " states");
    }
}
Output:-
Suffix Automaton constructed with 6 states

How did you feel about this post?

😍 🙂 😐 😕 😡

Was this helpful?

👍 👎