library

This documentation is automatically generated by online-judge-tools/verification-helper

View the Project on GitHub maspypy/library

:heavy_check_mark: knapsack/subset_sum.hpp

Depends on

Verified with

Code

#include "ds/my_bitset.hpp"
#include "enumerate/bits.hpp"

// O(N MAX(vals))
template <typename T>
vc<int> subset_sum_solution_1(vc<T>& vals, int target) {
  int n = len(vals);
  if (n == 0) return {};
  int mx = MAX(vals);
  int b = 0, sb = 0;
  while (b < n && sb + vals[b] <= target) { sb += vals[b++]; }
  if (b == n && sb != target) return {};

  int off = target - mx + 1;
  vc<int> dp(2 * mx, -1);
  vv(int, PAR, n, 2 * mx, -1);
  dp[sb - off] = b;
  FOR3(i, b, n) {
    auto newdp = dp;
    auto& par = PAR[i];
    int a = vals[i];
    FOR(j, mx) {
      if (chmax(newdp[j + a], dp[j])) { par[j + a] = -2; }
    }
    FOR3_R(j, mx, 2 * mx) {
      FOR3_R(k, max(dp[j], 0), newdp[j]) {
        if (chmax(newdp[j - vals[k]], k)) par[j - vals[k]] = k;
      }
    }
    swap(dp, newdp);
  }
  if (dp[mx - 1] == -1) return {};
  vc<bool> use(n);
  int i = n - 1, j = mx - 1;
  while (i >= b) {
    int p = PAR[i][j];
    if (p == -2) {
      use[i] = !use[i];
      j -= vals[i--];
    }
    elif (p == -1) { --i; }
    else {
      use[p] = !use[p];
      j += vals[p];
    }
  }
  while (i >= 0) {
    use[i] = !use[i];
    --i;
  }
  vc<int> I;
  FOR(i, n) if (use[i]) I.eb(i);

  ll sm = 0;
  for (auto&& i: I) sm += vals[i];
  assert(sm == target);

  return I;
}

// O(N target / w)
template <typename T>
vc<int> subset_sum_solution_2(vc<T>& vals, int target) {
  int n = len(vals);
  auto I = argsort(vals);
  My_Bitset dp(1, 1);
  vc<int> last(target + 1, -1);
  FOR(k, n) {
    int v = vals[I[k]];
    if (v > target) continue;
    My_Bitset newdp = dp;
    int new_size = len(dp) + v;
    newdp.resize(new_size);
    newdp.or_to_range(v, new_size, dp);
    if (len(newdp) > target + 1) newdp.resize(target + 1);
    // update したところをメモ
    FOR(i, len(newdp.dat)) {
      u64 upd = (i < len(dp.dat) ? dp.dat[i] : u64(0)) ^ newdp.dat[i];
      enumerate_bits_64(upd, [&](int p) -> void { last[(i << 6) | p] = I[k]; });
    }
    swap(dp, newdp);
  }
  if (target >= len(dp) || !dp[target]) return {};
  vc<int> ANS;
  while (target > 0) {
    int i = last[target];
    ANS.eb(i);
    target -= vals[i];
  }
  return ANS;
}

// O(sum^{1.5} / w)
// sum=10^6 で 150ms:https://codeforces.com/contest/755/problem/F
template <typename T>
vc<int> subset_sum_solution_3(vc<T>& vals, int target) {
  int SM = SUM<int>(vals);
  int N = len(vals);
  vvc<int> IDS(SM + 1);
  FOR(i, N) IDS[vals[i]].eb(i);
  vc<pair<int, int>> par(N, {-1, -1});
  vc<int> grp_vals;
  vc<int> raw_idx;
  FOR(x, 1, SM + 1) {
    auto& I = IDS[x];
    while (len(I) >= 3) {
      int a = POP(I), b = POP(I);
      int c = len(par);
      par.eb(a, b);
      IDS[2 * x].eb(c);
    }
    for (auto& i: I) {
      grp_vals.eb(x);
      raw_idx.eb(i);
    }
  }
  auto I = subset_sum_solution_2<int>(grp_vals, target);
  vc<int> ANS;
  for (auto& i: I) {
    vc<int> st = {raw_idx[i]};
    while (len(st)) {
      auto c = POP(st);
      if (c < N) {
        ANS.eb(c);
        continue;
      }
      auto [a, b] = par[c];
      st.eb(a), st.eb(b);
    }
  }
  return ANS;
}

template <typename T>
vc<int> subset_sum_solution_4(vc<T>& vals, T target) {
  if (target <= 0) return {};
  int N = len(vals);
  int M = N / 2;

  auto calc = [&](int L, int R) -> vc<T> {
    int n = R - L;
    vc<T> dp = {0};
    FOR(i, n) {
      T a = vals[L + i];
      vc<T> dp1(len(dp));
      FOR(k, len(dp)) dp1[k] = dp[k] + a;
      vc<T> newdp;
      merge(all(dp), all(dp1), back_inserter(newdp));
      swap(dp, newdp);
    }
    return dp;
  };

  auto restore = [&](int L, int R, T v) -> vc<int> {
    int n = R - L;
    vc<T> dp(1 << n);
    FOR(i, n) FOR(s, 1 << i) dp[s | 1 << i] = dp[s] + vals[L + i];
    FOR(s, 1 << n) {
      if (dp[s] == v) {
        vc<int> I;
        FOR(i, n) if (s >> i & 1) I.eb(L + i);
        return I;
      }
    }
    assert(0);
    return {};
  };

  auto dp1 = calc(0, M);
  auto dp2 = calc(M, N);
  int t = 0;
  FOR_R(s, len(dp1)) {
    while (t + 1 < len(dp2) && dp1[s] + dp2[t + 1] <= target) { ++t; }
    if (dp1[s] + dp2[t] == target) {
      vc<int> I1 = restore(0, M, dp1[s]);
      vc<int> I2 = restore(M, N, dp2[t]);
      I1.insert(I1.end(), all(I2));
      return I1;
    }
  }
  return {};
}

template <typename T>
vc<int> subset_sum(vc<T>& vals, T target) {
  if (target <= 0) return {};
  int n = len(vals);
  if (n == 0) return {};
  int mx = MAX(vals);

  // しきい値の調整をしていない
  // solution 1: O(N mx))
  // solution 2: O(N target / w)
  // solution 3: O(sum^1.5 / w)
  // solution 4: O(2^(N/2))
  double x1 = double(len(vals)) * mx;
  double x2 = double(len(vals)) * target / 500.0;
  double x3 = pow(SUM<double>(vals), 1.5) / 500.0;
  double x4 = pow(2.0, 0.5 * len(vals));
  double mi = min({x1, x2, x3, x4});
  if (x1 == mi) return subset_sum_solution_1(vals, target);
  if (x2 == mi) return subset_sum_solution_2(vals, target);
  if (x3 == mi) return subset_sum_solution_3(vals, target);
  return subset_sum_solution_4(vals, target);
}
#line 2 "ds/my_bitset.hpp"

// https://codeforces.com/contest/914/problem/F
// https://yukicoder.me/problems/no/142
// わずかに普通の bitset より遅いときもあるようだが,
// 固定長にしたくないときや slice 操作が必要なときに使う
struct My_Bitset {
  using T = My_Bitset;
  int N;
  vc<u64> dat;

  // x で埋める
  My_Bitset(int N = 0, int x = 0) : N(N) {
    assert(x == 0 || x == 1);
    u64 v = (x == 0 ? 0 : -1);
    dat.assign((N + 63) >> 6, v);
    if (N) dat.back() >>= (64 * len(dat) - N);
  }

  int size() { return N; }

  void resize(int size) {
    dat.resize((size + 63) >> 6);
    int remainingBits = size & 63;
    if (remainingBits != 0) {
      u64 mask = (u64(1) << remainingBits) - 1;
      dat.back() &= mask;
    }
    N = size;
  }

  // thanks to chatgpt!
  class Proxy {
  public:
    Proxy(vc<u64> &d, int i) : dat(d), index(i) {}
    operator bool() const { return (dat[index >> 6] >> (index & 63)) & 1; }

    Proxy &operator=(u64 value) {
      dat[index >> 6] &= ~(u64(1) << (index & 63));
      dat[index >> 6] |= (value & 1) << (index & 63);
      return *this;
    }
    void flip() {
      dat[index >> 6] ^= (u64(1) << (index & 63)); // XOR to flip the bit
    }

  private:
    vc<u64> &dat;
    int index;
  };

  Proxy operator[](int i) { return Proxy(dat, i); }

  bool operator==(const T &p) {
    assert(N == p.N);
    FOR(i, len(dat)) if (dat[i] != p.dat[i]) return false;
    return true;
  }

  T &operator&=(const T &p) {
    assert(N == p.N);
    FOR(i, len(dat)) dat[i] &= p.dat[i];
    return *this;
  }
  T &operator|=(const T &p) {
    assert(N == p.N);
    FOR(i, len(dat)) dat[i] |= p.dat[i];
    return *this;
  }
  T &operator^=(const T &p) {
    assert(N == p.N);
    FOR(i, len(dat)) dat[i] ^= p.dat[i];
    return *this;
  }
  T operator&(const T &p) const { return T(*this) &= p; }
  T operator|(const T &p) const { return T(*this) |= p; }
  T operator^(const T &p) const { return T(*this) ^= p; }
  T operator~() const {
    T p = (*this);
    p.flip_range(0, N);
    return p;
  }

  int count() {
    int ans = 0;
    for (u64 val: dat) ans += popcnt(val);
    return ans;
  }

  int next(int i) {
    chmax(i, 0);
    if (i >= N) return N;
    int k = i >> 6;
    {
      u64 x = dat[k];
      int s = i & 63;
      x = (x >> s) << s;
      if (x) return (k << 6) | lowbit(x);
    }
    FOR(idx, k + 1, len(dat)) {
      if (dat[idx] == 0) continue;
      return (idx << 6) | lowbit(dat[idx]);
    }
    return N;
  }

  int prev(int i) {
    chmin(i, N - 1);
    if (i <= -1) return -1;
    int k = i >> 6;
    if ((i & 63) < 63) {
      u64 x = dat[k];
      x &= (u64(1) << ((i & 63) + 1)) - 1;
      if (x) return (k << 6) | topbit(x);
      --k;
    }
    FOR_R(idx, k + 1) {
      if (dat[idx] == 0) continue;
      return (idx << 6) | topbit(dat[idx]);
    }
    return -1;
  }

  My_Bitset range(int L, int R) {
    assert(L <= R);
    My_Bitset p(R - L);
    int rm = (R - L) & 63;
    FOR(rm) {
      p[R - L - 1] = bool((*this)[R - 1]);
      --R;
    }
    int n = (R - L) >> 6;
    int hi = L & 63;
    int lo = 64 - hi;
    int s = L >> 6;
    if (hi == 0) {
      FOR(i, n) { p.dat[i] ^= dat[s + i]; }
    } else {
      FOR(i, n) { p.dat[i] ^= (dat[s + i] >> hi) ^ (dat[s + i + 1] << lo); }
    }
    return p;
  }

  int count_range(int L, int R) {
    assert(L <= R);
    int cnt = 0;
    while ((L < R) && (L & 63)) cnt += (*this)[L++];
    while ((L < R) && (R & 63)) cnt += (*this)[--R];
    int l = L >> 6, r = R >> 6;
    FOR(i, l, r) cnt += popcnt(dat[i]);
    return cnt;
  }

  // [L,R) に p を代入
  void assign_to_range(int L, int R, My_Bitset &p) {
    assert(p.N == R - L);
    int a = 0, b = p.N;
    while (L < R && (L & 63)) { (*this)[L++] = bool(p[a++]); }
    while (L < R && (R & 63)) { (*this)[--R] = bool(p[--b]); }
    // p[a:b] を [L:R] に
    int l = L >> 6, r = R >> 6;
    int s = a >> 6, t = b >> t;
    int n = r - l;
    if (!(a & 63)) {
      FOR(i, n) dat[l + i] = p.dat[s + i];
    } else {
      int hi = a & 63;
      int lo = 64 - hi;
      FOR(i, n) dat[l + i] = (p.dat[s + i] >> hi) | (p.dat[1 + s + i] << lo);
    }
  }

  // [L,R) に p を xor
  void xor_to_range(int L, int R, My_Bitset &p) {
    assert(p.N == R - L);
    int a = 0, b = p.N;
    while (L < R && (L & 63)) {
      dat[L >> 6] ^= u64(p[a]) << (L & 63);
      ++a, ++L;
    }
    while (L < R && (R & 63)) {
      --b, --R;
      dat[R >> 6] ^= u64(p[b]) << (R & 63);
    }
    // p[a:b] を [L:R] に
    int l = L >> 6, r = R >> 6;
    int s = a >> 6, t = b >> t;
    int n = r - l;
    if (!(a & 63)) {
      FOR(i, n) dat[l + i] ^= p.dat[s + i];
    } else {
      int hi = a & 63;
      int lo = 64 - hi;
      FOR(i, n) dat[l + i] ^= (p.dat[s + i] >> hi) | (p.dat[1 + s + i] << lo);
    }
  }

  // [L,R) に p を and
  void and_to_range(int L, int R, My_Bitset &p) {
    assert(p.N == R - L);
    int a = 0, b = p.N;
    while (L < R && (L & 63)) {
      if (!p[a++]) (*this)[L++] = 0;
    }
    while (L < R && (R & 63)) {
      if (!p[--b]) (*this)[--R] = 0;
    }
    // p[a:b] を [L:R] に
    int l = L >> 6, r = R >> 6;
    int s = a >> 6, t = b >> t;
    int n = r - l;
    if (!(a & 63)) {
      FOR(i, n) dat[l + i] &= p.dat[s + i];
    } else {
      int hi = a & 63;
      int lo = 64 - hi;
      FOR(i, n) dat[l + i] &= (p.dat[s + i] >> hi) | (p.dat[1 + s + i] << lo);
    }
  }

  // [L,R) に p を or
  void or_to_range(int L, int R, My_Bitset &p) {
    assert(p.N == R - L);
    int a = 0, b = p.N;
    while (L < R && (L & 63)) {
      dat[L >> 6] |= u64(p[a]) << (L & 63);
      ++a, ++L;
    }
    while (L < R && (R & 63)) {
      --b, --R;
      dat[R >> 6] |= u64(p[b]) << (R & 63);
    }
    // p[a:b] を [L:R] に
    int l = L >> 6, r = R >> 6;
    int s = a >> 6, t = b >> t;
    int n = r - l;
    if (!(a & 63)) {
      FOR(i, n) dat[l + i] |= p.dat[s + i];
    } else {
      int hi = a & 63;
      int lo = 64 - hi;
      FOR(i, n) dat[l + i] |= (p.dat[s + i] >> hi) | (p.dat[1 + s + i] << lo);
    }
  }

  // [L,R) を 1 に変更
  void set_range(int L, int R) {
    while (L < R && (L & 63)) { set(L++); }
    while (L < R && (R & 63)) { set(--R); }
    FOR(i, L >> 6, R >> 6) dat[i] = u64(-1);
  }

  // [L,R) を 1 に変更
  void reset_range(int L, int R) {
    while (L < R && (L & 63)) { reset(L++); }
    while (L < R && (R & 63)) { reset(--R); }
    FOR(i, L >> 6, R >> 6) dat[i] = u64(0);
  }

  // [L,R) を flip
  void flip_range(int L, int R) {
    while (L < R && (L & 63)) { flip(L++); }
    while (L < R && (R & 63)) { flip(--R); }
    FOR(i, L >> 6, R >> 6) dat[i] ^= u64(-1);
  }

  // bitset に仕様を合わせる
  void set(int i) { (*this)[i] = 1; }
  void reset(int i) { (*this)[i] = 0; }
  void flip(int i) { (*this)[i].flip(); }
  void set() {
    fill(all(dat), u64(-1));
    resize(N);
  }
  void reset() { fill(all(dat), 0); }
  void flip() {
    FOR(i, len(dat) - 1) { dat[i] = u64(-1) ^ dat[i]; }
    int i = len(dat) - 1;
    FOR(k, 64) {
      if (64 * i + k >= size()) break;
      flip(64 * i + k);
    }
  }
  bool any() {
    FOR(i, len(dat)) {
      if (dat[i]) return true;
    }
    return false;
  }

  int _Find_first() { return next(0); }
  int _Find_next(int p) { return next(p + 1); }

  static string TO_STR[256];
  string to_string() const {
    if (TO_STR[0].empty()) precompute();
    string S;
    for (auto &x: dat) { FOR(i, 8) S += TO_STR[(x >> (8 * i) & 255)]; }
    S.resize(N);
    return S;
  }

  static void precompute() {
    FOR(s, 256) {
      string x;
      FOR(i, 8) x += '0' + (s >> i & 1);
      TO_STR[s] = x;
    }
  }
};
string My_Bitset::TO_STR[256];
#line 1 "enumerate/bits.hpp"
template <typename F>
void enumerate_bits_32(u32 s, F f) {
  while (s) {
    int i = __builtin_ctz(s);
    f(i);
    s ^= 1 << i;
  }
}

template <typename F>
void enumerate_bits_64(u64 s, F f) {
  while (s) {
    int i = __builtin_ctzll(s);
    f(i);
    s ^= u64(1) << i;
  }
}

template <typename BS, typename F>
void enumerate_bits_bitset(BS& b, int L, int R, F f) {
  int p = (b[L] ? L : b._Find_next(L));
  while (p < R) {
    f(p);
    p = b._Find_next(p);
  }
}
#line 3 "knapsack/subset_sum.hpp"

// O(N MAX(vals))
template <typename T>
vc<int> subset_sum_solution_1(vc<T>& vals, int target) {
  int n = len(vals);
  if (n == 0) return {};
  int mx = MAX(vals);
  int b = 0, sb = 0;
  while (b < n && sb + vals[b] <= target) { sb += vals[b++]; }
  if (b == n && sb != target) return {};

  int off = target - mx + 1;
  vc<int> dp(2 * mx, -1);
  vv(int, PAR, n, 2 * mx, -1);
  dp[sb - off] = b;
  FOR3(i, b, n) {
    auto newdp = dp;
    auto& par = PAR[i];
    int a = vals[i];
    FOR(j, mx) {
      if (chmax(newdp[j + a], dp[j])) { par[j + a] = -2; }
    }
    FOR3_R(j, mx, 2 * mx) {
      FOR3_R(k, max(dp[j], 0), newdp[j]) {
        if (chmax(newdp[j - vals[k]], k)) par[j - vals[k]] = k;
      }
    }
    swap(dp, newdp);
  }
  if (dp[mx - 1] == -1) return {};
  vc<bool> use(n);
  int i = n - 1, j = mx - 1;
  while (i >= b) {
    int p = PAR[i][j];
    if (p == -2) {
      use[i] = !use[i];
      j -= vals[i--];
    }
    elif (p == -1) { --i; }
    else {
      use[p] = !use[p];
      j += vals[p];
    }
  }
  while (i >= 0) {
    use[i] = !use[i];
    --i;
  }
  vc<int> I;
  FOR(i, n) if (use[i]) I.eb(i);

  ll sm = 0;
  for (auto&& i: I) sm += vals[i];
  assert(sm == target);

  return I;
}

// O(N target / w)
template <typename T>
vc<int> subset_sum_solution_2(vc<T>& vals, int target) {
  int n = len(vals);
  auto I = argsort(vals);
  My_Bitset dp(1, 1);
  vc<int> last(target + 1, -1);
  FOR(k, n) {
    int v = vals[I[k]];
    if (v > target) continue;
    My_Bitset newdp = dp;
    int new_size = len(dp) + v;
    newdp.resize(new_size);
    newdp.or_to_range(v, new_size, dp);
    if (len(newdp) > target + 1) newdp.resize(target + 1);
    // update したところをメモ
    FOR(i, len(newdp.dat)) {
      u64 upd = (i < len(dp.dat) ? dp.dat[i] : u64(0)) ^ newdp.dat[i];
      enumerate_bits_64(upd, [&](int p) -> void { last[(i << 6) | p] = I[k]; });
    }
    swap(dp, newdp);
  }
  if (target >= len(dp) || !dp[target]) return {};
  vc<int> ANS;
  while (target > 0) {
    int i = last[target];
    ANS.eb(i);
    target -= vals[i];
  }
  return ANS;
}

// O(sum^{1.5} / w)
// sum=10^6 で 150ms:https://codeforces.com/contest/755/problem/F
template <typename T>
vc<int> subset_sum_solution_3(vc<T>& vals, int target) {
  int SM = SUM<int>(vals);
  int N = len(vals);
  vvc<int> IDS(SM + 1);
  FOR(i, N) IDS[vals[i]].eb(i);
  vc<pair<int, int>> par(N, {-1, -1});
  vc<int> grp_vals;
  vc<int> raw_idx;
  FOR(x, 1, SM + 1) {
    auto& I = IDS[x];
    while (len(I) >= 3) {
      int a = POP(I), b = POP(I);
      int c = len(par);
      par.eb(a, b);
      IDS[2 * x].eb(c);
    }
    for (auto& i: I) {
      grp_vals.eb(x);
      raw_idx.eb(i);
    }
  }
  auto I = subset_sum_solution_2<int>(grp_vals, target);
  vc<int> ANS;
  for (auto& i: I) {
    vc<int> st = {raw_idx[i]};
    while (len(st)) {
      auto c = POP(st);
      if (c < N) {
        ANS.eb(c);
        continue;
      }
      auto [a, b] = par[c];
      st.eb(a), st.eb(b);
    }
  }
  return ANS;
}

template <typename T>
vc<int> subset_sum_solution_4(vc<T>& vals, T target) {
  if (target <= 0) return {};
  int N = len(vals);
  int M = N / 2;

  auto calc = [&](int L, int R) -> vc<T> {
    int n = R - L;
    vc<T> dp = {0};
    FOR(i, n) {
      T a = vals[L + i];
      vc<T> dp1(len(dp));
      FOR(k, len(dp)) dp1[k] = dp[k] + a;
      vc<T> newdp;
      merge(all(dp), all(dp1), back_inserter(newdp));
      swap(dp, newdp);
    }
    return dp;
  };

  auto restore = [&](int L, int R, T v) -> vc<int> {
    int n = R - L;
    vc<T> dp(1 << n);
    FOR(i, n) FOR(s, 1 << i) dp[s | 1 << i] = dp[s] + vals[L + i];
    FOR(s, 1 << n) {
      if (dp[s] == v) {
        vc<int> I;
        FOR(i, n) if (s >> i & 1) I.eb(L + i);
        return I;
      }
    }
    assert(0);
    return {};
  };

  auto dp1 = calc(0, M);
  auto dp2 = calc(M, N);
  int t = 0;
  FOR_R(s, len(dp1)) {
    while (t + 1 < len(dp2) && dp1[s] + dp2[t + 1] <= target) { ++t; }
    if (dp1[s] + dp2[t] == target) {
      vc<int> I1 = restore(0, M, dp1[s]);
      vc<int> I2 = restore(M, N, dp2[t]);
      I1.insert(I1.end(), all(I2));
      return I1;
    }
  }
  return {};
}

template <typename T>
vc<int> subset_sum(vc<T>& vals, T target) {
  if (target <= 0) return {};
  int n = len(vals);
  if (n == 0) return {};
  int mx = MAX(vals);

  // しきい値の調整をしていない
  // solution 1: O(N mx))
  // solution 2: O(N target / w)
  // solution 3: O(sum^1.5 / w)
  // solution 4: O(2^(N/2))
  double x1 = double(len(vals)) * mx;
  double x2 = double(len(vals)) * target / 500.0;
  double x3 = pow(SUM<double>(vals), 1.5) / 500.0;
  double x4 = pow(2.0, 0.5 * len(vals));
  double mi = min({x1, x2, x3, x4});
  if (x1 == mi) return subset_sum_solution_1(vals, target);
  if (x2 == mi) return subset_sum_solution_2(vals, target);
  if (x3 == mi) return subset_sum_solution_3(vals, target);
  return subset_sum_solution_4(vals, target);
}
Back to top page