library

This documentation is automatically generated by online-judge-tools/verification-helper

View the Project on GitHub maspypy/library

:heavy_check_mark: string/suffix_tree.hpp

Depends on

Verified with

Code

#include "string/suffix_array.hpp"
#include "seq/cartesian_tree.hpp"
#include "graph/base.hpp"

// https://twitter.com/maspy_stars/status/1565901414236205057?s=20&t=S2Tu6ayozHcakxai8dmh4g
// 各ノードは、suffix array での長方形領域と見なして、
// グラフおよび、領域データを作る。
// sample: test/my_test/suffix_tree.test.cpp
template <typename SUFFIX>
pair<Graph<int, 1>, vc<tuple<int, int, int, int>>> suffix_tree(SUFFIX& X) {
  auto SA = X.SA;
  auto ISA = X.ISA;
  auto LCP = X.LCP;

  vc<tuple<int, int, int, int>> dat;
  vc<pair<int, int>> edges;

  int N = len(SA);
  if (N == 1) {
    Graph<int, 1> G(2);
    G.add(0, 1);
    G.build();
    dat.eb(0, 1, 1, 1), dat.eb(0, 1, 1, 2);
    return {G, dat};
  }

  dat.eb(0, N, 0, 0);
  CartesianTree<int, true> CT(LCP);

  auto dfs = [&](auto& dfs, int p, int idx, int h) -> void {
    int L = CT.range[idx].fi;
    int R = CT.range[idx].se + 1;
    int hh = LCP[idx];
    if (h < hh) {
      edges.eb(p, len(dat));
      p = len(dat);
      dat.eb(L, R, h, hh);
    }
    if (CT.lch[idx] == -1) {
      if (hh < N - SA[idx]) {
        edges.eb(p, len(dat));
        dat.eb(idx, idx + 1, hh, N - SA[idx]);
      }
    } else {
      dfs(dfs, p, CT.lch[idx], hh);
    }
    if (CT.rch[idx] == -1) {
      if (hh < N - SA[idx + 1]) {
        edges.eb(p, len(dat));
        dat.eb(idx + 1, idx + 2, hh, N - SA[idx + 1]);
      }
    } else {
      dfs(dfs, p, CT.rch[idx], hh);
    }
  };
  int r = CT.root;
  if (LCP[r] > 0) {
    edges.eb(0, 1);
    dat.eb(0, N, 0, LCP[r]);
    dfs(dfs, 1, r, LCP[r]);
  } else {
    dfs(dfs, 0, r, 0);
  }
  for (auto& [a, b, c, d]: dat) ++c, ++d;

  Graph<int, 1> G(len(dat));
  for (auto&& [a, b]: edges) G.add(a, b);
  G.build();
  return {G, dat};
}
#line 1 "string/suffix_tree.hpp"

#line 2 "string/suffix_array.hpp"

#line 2 "alg/monoid/min.hpp"

template <typename E>
struct Monoid_Min {
  using X = E;
  using value_type = X;
  static constexpr X op(const X &x, const X &y) noexcept { return min(x, y); }
  static constexpr X unit() { return infty<E>; }
  static constexpr bool commute = true;
};
#line 2 "ds/sparse_table/sparse_table.hpp"

// 冪等なモノイドであることを仮定。disjoint sparse table より x 倍高速
template <class Monoid>
struct Sparse_Table {
  using MX = Monoid;
  using X = typename MX::value_type;
  int n, log;
  vvc<X> dat;

  Sparse_Table() {}
  Sparse_Table(int n) { build(n); }
  template <typename F>
  Sparse_Table(int n, F f) {
    build(n, f);
  }
  Sparse_Table(const vc<X>& v) { build(v); }

  void build(int m) {
    build(m, [](int i) -> X { return MX::unit(); });
  }
  void build(const vc<X>& v) {
    build(len(v), [&](int i) -> X { return v[i]; });
  }
  template <typename F>
  void build(int m, F f) {
    n = m, log = 1;
    while ((1 << log) < n) ++log;
    dat.resize(log);
    dat[0].resize(n);
    FOR(i, n) dat[0][i] = f(i);

    FOR(i, log - 1) {
      dat[i + 1].resize(len(dat[i]) - (1 << i));
      FOR(j, len(dat[i]) - (1 << i)) {
        dat[i + 1][j] = MX::op(dat[i][j], dat[i][j + (1 << i)]);
      }
    }
  }

  X prod(int L, int R) {
    if (L == R) return MX::unit();
    if (R == L + 1) return dat[0][L];
    int k = topbit(R - L - 1);
    return MX::op(dat[k][L], dat[k][R - (1 << k)]);
  }

  template <class F>
  int max_right(const F check, int L) {
    assert(0 <= L && L <= n && check(MX::unit()));
    if (L == n) return n;
    int ok = L, ng = n + 1;
    while (ok + 1 < ng) {
      int k = (ok + ng) / 2;
      bool bl = check(prod(L, k));
      if (bl) ok = k;
      if (!bl) ng = k;
    }
    return ok;
  }

  template <class F>
  int min_left(const F check, int R) {
    assert(0 <= R && R <= n && check(MX::unit()));
    if (R == 0) return 0;
    int ok = R, ng = -1;
    while (ng + 1 < ok) {
      int k = (ok + ng) / 2;
      bool bl = check(prod(k, R));
      if (bl) ok = k;
      if (!bl) ng = k;
    }
    return ok;
  }
};
#line 5 "string/suffix_array.hpp"

// 辞書順 i 番目の suffix が j 文字目始まりであるとき、
// SA[i] = j, ISA[j] = i
// |S|>0 を前提(そうでない場合 dummy 文字を追加して利用せよ)
struct Suffix_Array {
  vc<int> SA;
  vc<int> ISA;
  vc<int> LCP;
  Sparse_Table<Monoid_Min<int>> seg;
  bool build_seg;

  Suffix_Array(string& s) {
    build_seg = 0;
    assert(len(s) > 0);
    char first = 127, last = 0;
    for (auto&& c: s) {
      chmin(first, c);
      chmax(last, c);
    }
    SA = calc_suffix_array(s, first, last);
    calc_LCP(s);
  }

  Suffix_Array(vc<int>& s) {
    build_seg = 0;
    assert(len(s) > 0);
    SA = calc_suffix_array(s);
    calc_LCP(s);
  }

  // lcp(S[i:], S[j:])
  int lcp(int i, int j) {
    if (!build_seg) {
      build_seg = true;
      seg.build(LCP);
    }
    int n = len(SA);
    if (i == n || j == n) return 0;
    if (i == j) return n - i;
    i = ISA[i], j = ISA[j];
    if (i > j) swap(i, j);
    return seg.prod(i, j);
  }

  // S[i:] との lcp が n 以上であるような半開区間
  pair<int, int> lcp_range(int i, int n) {
    if (!build_seg) {
      build_seg = true;
      seg.build(LCP);
    }
    i = ISA[i];
    int a = seg.min_left([&](auto e) -> bool { return e >= n; }, i);
    int b = seg.max_right([&](auto e) -> bool { return e >= n; }, i);
    return {a, b + 1};
  }

  // -1: S[L1:R1) < S[L2, R2)
  //  0: S[L1:R1) = S[L2, R2)
  // +1: S[L1:R1) > S[L2, R2)
  int compare(int L1, int R1, int L2, int R2) {
    int n1 = R1 - L1, n2 = R2 - L2;
    int n = lcp(L1, L2);
    if (n == n1 && n == n2) return 0;
    if (n == n1) return -1;
    if (n == n2) return 1;
    return (ISA[L1 + n] > ISA[L2 + n] ? 1 : -1);
  }

private:
  void induced_sort(const vc<int>& vect, int val_range, vc<int>& SA,
                    const vc<bool>& sl, const vc<int>& lms_idx) {
    vc<int> l(val_range, 0), r(val_range, 0);
    for (int c: vect) {
      if (c + 1 < val_range) ++l[c + 1];
      ++r[c];
    }
    partial_sum(l.begin(), l.end(), l.begin());
    partial_sum(r.begin(), r.end(), r.begin());
    fill(SA.begin(), SA.end(), -1);
    for (int i = (int)lms_idx.size() - 1; i >= 0; --i)
      SA[--r[vect[lms_idx[i]]]] = lms_idx[i];
    for (int i: SA)
      if (i >= 1 && sl[i - 1]) SA[l[vect[i - 1]]++] = i - 1;
    fill(r.begin(), r.end(), 0);
    for (int c: vect) ++r[c];
    partial_sum(r.begin(), r.end(), r.begin());
    for (int k = (int)SA.size() - 1, i = SA[k]; k >= 1; --k, i = SA[k])
      if (i >= 1 && !sl[i - 1]) { SA[--r[vect[i - 1]]] = i - 1; }
  }

  vc<int> SA_IS(const vc<int>& vect, int val_range) {
    const int n = vect.size();
    vc<int> SA(n), lms_idx;
    vc<bool> sl(n);
    sl[n - 1] = false;
    for (int i = n - 2; i >= 0; --i) {
      sl[i] = (vect[i] > vect[i + 1] || (vect[i] == vect[i + 1] && sl[i + 1]));
      if (sl[i] && !sl[i + 1]) lms_idx.push_back(i + 1);
    }
    reverse(lms_idx.begin(), lms_idx.end());
    induced_sort(vect, val_range, SA, sl, lms_idx);
    vc<int> new_lms_idx(lms_idx.size()), lms_vec(lms_idx.size());
    for (int i = 0, k = 0; i < n; ++i)
      if (!sl[SA[i]] && SA[i] >= 1 && sl[SA[i] - 1]) {
        new_lms_idx[k++] = SA[i];
      }
    int cur = 0;
    SA[n - 1] = cur;
    for (size_t k = 1; k < new_lms_idx.size(); ++k) {
      int i = new_lms_idx[k - 1], j = new_lms_idx[k];
      if (vect[i] != vect[j]) {
        SA[j] = ++cur;
        continue;
      }
      bool flag = false;
      for (int a = i + 1, b = j + 1;; ++a, ++b) {
        if (vect[a] != vect[b]) {
          flag = true;
          break;
        }
        if ((!sl[a] && sl[a - 1]) || (!sl[b] && sl[b - 1])) {
          flag = !((!sl[a] && sl[a - 1]) && (!sl[b] && sl[b - 1]));
          break;
        }
      }
      SA[j] = (flag ? ++cur : cur);
    }
    for (size_t i = 0; i < lms_idx.size(); ++i) lms_vec[i] = SA[lms_idx[i]];
    if (cur + 1 < (int)lms_idx.size()) {
      auto lms_SA = SA_IS(lms_vec, cur + 1);
      for (size_t i = 0; i < lms_idx.size(); ++i) {
        new_lms_idx[i] = lms_idx[lms_SA[i]];
      }
    }
    induced_sort(vect, val_range, SA, sl, new_lms_idx);
    return SA;
  }

  vc<int> calc_suffix_array(const string& s, const char first = 'a',
                            const char last = 'z') {
    vc<int> vect(s.size() + 1);
    copy(begin(s), end(s), begin(vect));
    for (auto& x: vect) x -= (int)first - 1;
    vect.back() = 0;
    auto ret = SA_IS(vect, (int)last - (int)first + 2);
    ret.erase(ret.begin());
    return ret;
  }

  vc<int> calc_suffix_array(const vc<int>& s) {
    vc<int> ss = s;
    UNIQUE(ss);

    vc<int> vect(s.size() + 1);
    copy(all(s), vect.begin());
    for (auto& x: vect) x = LB(ss, x) + 1;
    vect.back() = 0;
    auto ret = SA_IS(vect, MAX(vect) + 2);
    ret.erase(ret.begin());
    return ret;
  }

  template <typename STRING>
  void calc_LCP(const STRING& s) {
    int n = s.size(), k = 0;
    ISA.resize(n);
    LCP.resize(n);
    for (int i = 0; i < n; i++) ISA[SA[i]] = i;
    for (int i = 0; i < n; i++, k ? k-- : 0) {
      if (ISA[i] == n - 1) {
        k = 0;
        continue;
      }
      int j = SA[ISA[i] + 1];
      while (i + k < n && j + k < n && s[i + k] == s[j + k]) k++;
      LCP[ISA[i]] = k;
    }
    LCP.resize(n - 1);
  }
};
#line 1 "seq/cartesian_tree.hpp"
/*
辞書順で高さを unique して、木にしている。
極大長方形アルゴリズムで線形時間構築。
*/
template <typename T, bool IS_MIN>
struct CartesianTree {
  int n;
  vc<T>& A;
  vc<pair<int, int>> range;
  vc<int> lch, rch, par;
  int root;

  CartesianTree(vc<T>& A) : n(len(A)), A(A) {
    range.assign(n, {-1, -1});
    lch.assign(n, -1);
    rch.assign(n, -1);
    par.assign(n, -1);
    if (n == 1) {
      range[0] = {0, 1};
      root = 0;
      return;
    }
    auto is_sm = [&](int i, int j) -> bool {
      if (IS_MIN) return (A[i] < A[j]) || (A[i] == A[j] && i < j);
      return (A[i] > A[j]) || (A[i] == A[j] && i < j);
    };
    vc<int> st;
    FOR(i, n) {
      while (!st.empty() && is_sm(i, st.back())) {
        lch[i] = st.back();
        st.pop_back();
      }
      range[i].fi = (st.empty() ? 0 : st.back() + 1);
      st.eb(i);
    }
    st.clear();
    FOR_R(i, n) {
      while (!st.empty() && is_sm(i, st.back())) {
        rch[i] = st.back();
        st.pop_back();
      }
      range[i].se = (st.empty() ? n : st.back());
      st.eb(i);
    }
    FOR(i, n) if (lch[i] != -1) par[lch[i]] = i;
    FOR(i, n) if (rch[i] != -1) par[rch[i]] = i;
    FOR(i, n) if (par[i] == -1) root = i;
  }

  // (l, r, h)

  tuple<int, int, T> maximum_rectangle(int i) {
    auto [l, r] = range[i];
    return {l, r, A[i]};
  }

  // (l, r, h)

  T max_rectangle_area() {
    assert(IS_MIN);
    T res = 0;
    FOR(i, n) {
      auto [l, r, h] = maximum_rectangle(i);
      chmax(res, (r - l) * h);
    }
    return res;
  }

  ll count_subrectangle(bool baseline) {
    assert(IS_MIN);
    ll res = 0;
    FOR(i, n) {
      auto [l, r, h] = maximum_rectangle(i);
      ll x = (baseline ? h : h * (h + 1) / 2);
      res += x * (i - l + 1) * (r - i);
    }
    return res;
  }
};
#line 2 "graph/base.hpp"

template <typename T>
struct Edge {
  int frm, to;
  T cost;
  int id;
};

template <typename T = int, bool directed = false>
struct Graph {
  static constexpr bool is_directed = directed;
  int N, M;
  using cost_type = T;
  using edge_type = Edge<T>;
  vector<edge_type> edges;
  vector<int> indptr;
  vector<edge_type> csr_edges;
  vc<int> vc_deg, vc_indeg, vc_outdeg;
  bool prepared;

  class OutgoingEdges {
  public:
    OutgoingEdges(const Graph* G, int l, int r) : G(G), l(l), r(r) {}

    const edge_type* begin() const {
      if (l == r) { return 0; }
      return &G->csr_edges[l];
    }

    const edge_type* end() const {
      if (l == r) { return 0; }
      return &G->csr_edges[r];
    }

  private:
    const Graph* G;
    int l, r;
  };

  bool is_prepared() { return prepared; }

  Graph() : N(0), M(0), prepared(0) {}
  Graph(int N) : N(N), M(0), prepared(0) {}

  void build(int n) {
    N = n, M = 0;
    prepared = 0;
    edges.clear();
    indptr.clear();
    csr_edges.clear();
    vc_deg.clear();
    vc_indeg.clear();
    vc_outdeg.clear();
  }

  void add(int frm, int to, T cost = 1, int i = -1) {
    assert(!prepared);
    assert(0 <= frm && 0 <= to && to < N);
    if (i == -1) i = M;
    auto e = edge_type({frm, to, cost, i});
    edges.eb(e);
    ++M;
  }

#ifdef FASTIO
  // wt, off
  void read_tree(bool wt = false, int off = 1) { read_graph(N - 1, wt, off); }

  void read_graph(int M, bool wt = false, int off = 1) {
    for (int m = 0; m < M; ++m) {
      INT(a, b);
      a -= off, b -= off;
      if (!wt) {
        add(a, b);
      } else {
        T c;
        read(c);
        add(a, b, c);
      }
    }
    build();
  }
#endif

  void build() {
    assert(!prepared);
    prepared = true;
    indptr.assign(N + 1, 0);
    for (auto&& e: edges) {
      indptr[e.frm + 1]++;
      if (!directed) indptr[e.to + 1]++;
    }
    for (int v = 0; v < N; ++v) { indptr[v + 1] += indptr[v]; }
    auto counter = indptr;
    csr_edges.resize(indptr.back() + 1);
    for (auto&& e: edges) {
      csr_edges[counter[e.frm]++] = e;
      if (!directed)
        csr_edges[counter[e.to]++] = edge_type({e.to, e.frm, e.cost, e.id});
    }
  }

  OutgoingEdges operator[](int v) const {
    assert(prepared);
    return {this, indptr[v], indptr[v + 1]};
  }

  vc<int> deg_array() {
    if (vc_deg.empty()) calc_deg();
    return vc_deg;
  }

  pair<vc<int>, vc<int>> deg_array_inout() {
    if (vc_indeg.empty()) calc_deg_inout();
    return {vc_indeg, vc_outdeg};
  }

  int deg(int v) {
    if (vc_deg.empty()) calc_deg();
    return vc_deg[v];
  }

  int in_deg(int v) {
    if (vc_indeg.empty()) calc_deg_inout();
    return vc_indeg[v];
  }

  int out_deg(int v) {
    if (vc_outdeg.empty()) calc_deg_inout();
    return vc_outdeg[v];
  }

#ifdef FASTIO
  void debug() {
    print("Graph");
    if (!prepared) {
      print("frm to cost id");
      for (auto&& e: edges) print(e.frm, e.to, e.cost, e.id);
    } else {
      print("indptr", indptr);
      print("frm to cost id");
      FOR(v, N) for (auto&& e: (*this)[v]) print(e.frm, e.to, e.cost, e.id);
    }
  }
#endif

  vc<int> new_idx;
  vc<bool> used_e;

  // G における頂点 V[i] が、新しいグラフで i になるようにする
  // {G, es}
  // sum(deg(v)) の計算量になっていて、
  // 新しいグラフの n+m より大きい可能性があるので注意
  Graph<T, directed> rearrange(vc<int> V, bool keep_eid = 0) {
    if (len(new_idx) != N) new_idx.assign(N, -1);
    int n = len(V);
    FOR(i, n) new_idx[V[i]] = i;
    Graph<T, directed> G(n);
    vc<int> history;
    FOR(i, n) {
      for (auto&& e: (*this)[V[i]]) {
        if (len(used_e) <= e.id) used_e.resize(e.id + 1);
        if (used_e[e.id]) continue;
        int a = e.frm, b = e.to;
        if (new_idx[a] != -1 && new_idx[b] != -1) {
          history.eb(e.id);
          used_e[e.id] = 1;
          int eid = (keep_eid ? e.id : -1);
          G.add(new_idx[a], new_idx[b], e.cost, eid);
        }
      }
    }
    FOR(i, n) new_idx[V[i]] = -1;
    for (auto&& eid: history) used_e[eid] = 0;
    G.build();
    return G;
  }

  Graph<T, true> to_directed_tree(int root = -1) {
    if (root == -1) root = 0;
    assert(!is_directed && prepared && M == N - 1);
    Graph<T, true> G1(N);
    vc<int> par(N, -1);
    auto dfs = [&](auto& dfs, int v) -> void {
      for (auto& e: (*this)[v]) {
        if (e.to == par[v]) continue;
        par[e.to] = v, dfs(dfs, e.to);
      }
    };
    dfs(dfs, root);
    for (auto& e: edges) {
      int a = e.frm, b = e.to;
      if (par[a] == b) swap(a, b);
      assert(par[b] == a);
      G1.add(a, b, e.cost);
    }
    G1.build();
    return G1;
  }

private:
  void calc_deg() {
    assert(vc_deg.empty());
    vc_deg.resize(N);
    for (auto&& e: edges) vc_deg[e.frm]++, vc_deg[e.to]++;
  }

  void calc_deg_inout() {
    assert(vc_indeg.empty());
    vc_indeg.resize(N);
    vc_outdeg.resize(N);
    for (auto&& e: edges) { vc_indeg[e.to]++, vc_outdeg[e.frm]++; }
  }
};
#line 5 "string/suffix_tree.hpp"

// https://twitter.com/maspy_stars/status/1565901414236205057?s=20&t=S2Tu6ayozHcakxai8dmh4g
// 各ノードは、suffix array での長方形領域と見なして、
// グラフおよび、領域データを作る。
// sample: test/my_test/suffix_tree.test.cpp
template <typename SUFFIX>
pair<Graph<int, 1>, vc<tuple<int, int, int, int>>> suffix_tree(SUFFIX& X) {
  auto SA = X.SA;
  auto ISA = X.ISA;
  auto LCP = X.LCP;

  vc<tuple<int, int, int, int>> dat;
  vc<pair<int, int>> edges;

  int N = len(SA);
  if (N == 1) {
    Graph<int, 1> G(2);
    G.add(0, 1);
    G.build();
    dat.eb(0, 1, 1, 1), dat.eb(0, 1, 1, 2);
    return {G, dat};
  }

  dat.eb(0, N, 0, 0);
  CartesianTree<int, true> CT(LCP);

  auto dfs = [&](auto& dfs, int p, int idx, int h) -> void {
    int L = CT.range[idx].fi;
    int R = CT.range[idx].se + 1;
    int hh = LCP[idx];
    if (h < hh) {
      edges.eb(p, len(dat));
      p = len(dat);
      dat.eb(L, R, h, hh);
    }
    if (CT.lch[idx] == -1) {
      if (hh < N - SA[idx]) {
        edges.eb(p, len(dat));
        dat.eb(idx, idx + 1, hh, N - SA[idx]);
      }
    } else {
      dfs(dfs, p, CT.lch[idx], hh);
    }
    if (CT.rch[idx] == -1) {
      if (hh < N - SA[idx + 1]) {
        edges.eb(p, len(dat));
        dat.eb(idx + 1, idx + 2, hh, N - SA[idx + 1]);
      }
    } else {
      dfs(dfs, p, CT.rch[idx], hh);
    }
  };
  int r = CT.root;
  if (LCP[r] > 0) {
    edges.eb(0, 1);
    dat.eb(0, N, 0, LCP[r]);
    dfs(dfs, 1, r, LCP[r]);
  } else {
    dfs(dfs, 0, r, 0);
  }
  for (auto& [a, b, c, d]: dat) ++c, ++d;

  Graph<int, 1> G(len(dat));
  for (auto&& [a, b]: edges) G.add(a, b);
  G.build();
  return {G, dat};
}
Back to top page