This documentation is automatically generated by online-judge-tools/verification-helper
#include "knapsack/subset_sum.hpp"
#include "ds/my_bitset.hpp"
#include "enumerate/bits.hpp"
// O(N MAX(vals))
template <typename T>
vc<int> subset_sum_solution_1(vc<T>& vals, int target) {
int n = len(vals);
if (n == 0) return {};
int mx = MAX(vals);
int b = 0, sb = 0;
while (b < n && sb + vals[b] <= target) { sb += vals[b++]; }
if (b == n && sb != target) return {};
int off = target - mx + 1;
vc<int> dp(2 * mx, -1);
vv(int, PAR, n, 2 * mx, -1);
dp[sb - off] = b;
FOR3(i, b, n) {
auto newdp = dp;
auto& par = PAR[i];
int a = vals[i];
FOR(j, mx) {
if (chmax(newdp[j + a], dp[j])) { par[j + a] = -2; }
}
FOR3_R(j, mx, 2 * mx) {
FOR3_R(k, max(dp[j], 0), newdp[j]) {
if (chmax(newdp[j - vals[k]], k)) par[j - vals[k]] = k;
}
}
swap(dp, newdp);
}
if (dp[mx - 1] == -1) return {};
vc<bool> use(n);
int i = n - 1, j = mx - 1;
while (i >= b) {
int p = PAR[i][j];
if (p == -2) {
use[i] = !use[i];
j -= vals[i--];
}
elif (p == -1) { --i; }
else {
use[p] = !use[p];
j += vals[p];
}
}
while (i >= 0) {
use[i] = !use[i];
--i;
}
vc<int> I;
FOR(i, n) if (use[i]) I.eb(i);
ll sm = 0;
for (auto&& i: I) sm += vals[i];
assert(sm == target);
return I;
}
// O(N target / w)
template <typename T>
vc<int> subset_sum_solution_2(vc<T>& vals, int target) {
int n = len(vals);
auto I = argsort(vals);
My_Bitset dp(1, 1);
vc<int> last(target + 1, -1);
FOR(k, n) {
int v = vals[I[k]];
if (v > target) continue;
My_Bitset newdp = dp;
int new_size = len(dp) + v;
newdp.resize(new_size);
newdp.or_to_range(v, new_size, dp);
if (len(newdp) > target + 1) newdp.resize(target + 1);
// update したところをメモ
FOR(i, len(newdp.dat)) {
u64 upd = (i < len(dp.dat) ? dp.dat[i] : u64(0)) ^ newdp.dat[i];
enumerate_bits_64(upd, [&](int p) -> void { last[(i << 6) | p] = I[k]; });
}
swap(dp, newdp);
}
if (target >= len(dp) || !dp[target]) return {};
vc<int> ANS;
while (target > 0) {
int i = last[target];
ANS.eb(i);
target -= vals[i];
}
return ANS;
}
// O(sum^{1.5} / w)
// sum=10^6 で 150ms:https://codeforces.com/contest/755/problem/F
template <typename T>
vc<int> subset_sum_solution_3(vc<T>& vals, int target) {
int SM = SUM<int>(vals);
int N = len(vals);
vvc<int> IDS(SM + 1);
FOR(i, N) IDS[vals[i]].eb(i);
vc<pair<int, int>> par(N, {-1, -1});
vc<int> grp_vals;
vc<int> raw_idx;
FOR(x, 1, SM + 1) {
auto& I = IDS[x];
while (len(I) >= 3) {
int a = POP(I), b = POP(I);
int c = len(par);
par.eb(a, b);
IDS[2 * x].eb(c);
}
for (auto& i: I) {
grp_vals.eb(x);
raw_idx.eb(i);
}
}
auto I = subset_sum_solution_2<int>(grp_vals, target);
vc<int> ANS;
for (auto& i: I) {
vc<int> st = {raw_idx[i]};
while (len(st)) {
auto c = POP(st);
if (c < N) {
ANS.eb(c);
continue;
}
auto [a, b] = par[c];
st.eb(a), st.eb(b);
}
}
return ANS;
}
template <typename T>
vc<int> subset_sum_solution_4(vc<T>& vals, T target) {
if (target <= 0) return {};
int N = len(vals);
int M = N / 2;
auto calc = [&](int L, int R) -> vc<T> {
int n = R - L;
vc<T> dp = {0};
FOR(i, n) {
T a = vals[L + i];
vc<T> dp1(len(dp));
FOR(k, len(dp)) dp1[k] = dp[k] + a;
vc<T> newdp;
merge(all(dp), all(dp1), back_inserter(newdp));
swap(dp, newdp);
}
return dp;
};
auto restore = [&](int L, int R, T v) -> vc<int> {
int n = R - L;
vc<T> dp(1 << n);
FOR(i, n) FOR(s, 1 << i) dp[s | 1 << i] = dp[s] + vals[L + i];
FOR(s, 1 << n) {
if (dp[s] == v) {
vc<int> I;
FOR(i, n) if (s >> i & 1) I.eb(L + i);
return I;
}
}
assert(0);
return {};
};
auto dp1 = calc(0, M);
auto dp2 = calc(M, N);
int t = 0;
FOR_R(s, len(dp1)) {
while (t + 1 < len(dp2) && dp1[s] + dp2[t + 1] <= target) { ++t; }
if (dp1[s] + dp2[t] == target) {
vc<int> I1 = restore(0, M, dp1[s]);
vc<int> I2 = restore(M, N, dp2[t]);
I1.insert(I1.end(), all(I2));
return I1;
}
}
return {};
}
template <typename T>
vc<int> subset_sum(vc<T>& vals, T target) {
if (target <= 0) return {};
int n = len(vals);
if (n == 0) return {};
int mx = MAX(vals);
// しきい値の調整をしていない
// solution 1: O(N mx))
// solution 2: O(N target / w)
// solution 3: O(sum^1.5 / w)
// solution 4: O(2^(N/2))
double x1 = double(len(vals)) * mx;
double x2 = double(len(vals)) * target / 500.0;
double x3 = pow(SUM<double>(vals), 1.5) / 500.0;
double x4 = pow(2.0, 0.5 * len(vals));
double mi = min({x1, x2, x3, x4});
if (x1 == mi) return subset_sum_solution_1(vals, target);
if (x2 == mi) return subset_sum_solution_2(vals, target);
if (x3 == mi) return subset_sum_solution_3(vals, target);
return subset_sum_solution_4(vals, target);
}
#line 2 "ds/my_bitset.hpp"
// https://codeforces.com/contest/914/problem/F
// https://yukicoder.me/problems/no/142
// わずかに普通の bitset より遅いときもあるようだが,
// 固定長にしたくないときや slice 操作が必要なときに使う
struct My_Bitset {
using T = My_Bitset;
int N;
vc<u64> dat;
// x で埋める
My_Bitset(int N = 0, int x = 0) : N(N) {
assert(x == 0 || x == 1);
u64 v = (x == 0 ? 0 : -1);
dat.assign((N + 63) >> 6, v);
if (N) dat.back() >>= (64 * len(dat) - N);
}
int size() { return N; }
void resize(int size) {
dat.resize((size + 63) >> 6);
int remainingBits = size & 63;
if (remainingBits != 0) {
u64 mask = (u64(1) << remainingBits) - 1;
dat.back() &= mask;
}
N = size;
}
// thanks to chatgpt!
class Proxy {
public:
Proxy(vc<u64> &d, int i) : dat(d), index(i) {}
operator bool() const { return (dat[index >> 6] >> (index & 63)) & 1; }
Proxy &operator=(u64 value) {
dat[index >> 6] &= ~(u64(1) << (index & 63));
dat[index >> 6] |= (value & 1) << (index & 63);
return *this;
}
void flip() {
dat[index >> 6] ^= (u64(1) << (index & 63)); // XOR to flip the bit
}
private:
vc<u64> &dat;
int index;
};
Proxy operator[](int i) { return Proxy(dat, i); }
T &operator&=(const T &p) {
assert(N == p.N);
FOR(i, len(dat)) dat[i] &= p.dat[i];
return *this;
}
T &operator|=(const T &p) {
assert(N == p.N);
FOR(i, len(dat)) dat[i] |= p.dat[i];
return *this;
}
T &operator^=(const T &p) {
assert(N == p.N);
FOR(i, len(dat)) dat[i] ^= p.dat[i];
return *this;
}
T operator&(const T &p) const { return T(*this) &= p; }
T operator|(const T &p) const { return T(*this) |= p; }
T operator^(const T &p) const { return T(*this) ^= p; }
int count() {
int ans = 0;
for (u64 val: dat) ans += popcnt(val);
return ans;
}
int next(int i) {
chmax(i, 0);
if (i >= N) return N;
int k = i >> 6;
{
u64 x = dat[k];
int s = i & 63;
x = (x >> s) << s;
if (x) return (k << 6) | lowbit(x);
}
FOR(idx, k + 1, len(dat)) {
if (dat[idx] == 0) continue;
return (idx << 6) | lowbit(dat[idx]);
}
return N;
}
int prev(int i) {
chmin(i, N - 1);
if (i <= -1) return -1;
int k = i >> 6;
if ((i & 63) < 63) {
u64 x = dat[k];
x &= (u64(1) << ((i & 63) + 1)) - 1;
if (x) return (k << 6) | topbit(x);
--k;
}
FOR_R(idx, k + 1) {
if (dat[idx] == 0) continue;
return (idx << 6) | topbit(dat[idx]);
}
return -1;
}
My_Bitset range(int L, int R) {
assert(L <= R);
My_Bitset p(R - L);
int rm = (R - L) & 63;
FOR(rm) {
p[R - L - 1] = bool((*this)[R - 1]);
--R;
}
int n = (R - L) >> 6;
int hi = L & 63;
int lo = 64 - hi;
int s = L >> 6;
if (hi == 0) {
FOR(i, n) { p.dat[i] ^= dat[s + i]; }
} else {
FOR(i, n) { p.dat[i] ^= (dat[s + i] >> hi) ^ (dat[s + i + 1] << lo); }
}
return p;
}
int count_range(int L, int R) {
assert(L <= R);
int cnt = 0;
while ((L < R) && (L & 63)) cnt += (*this)[L++];
while ((L < R) && (R & 63)) cnt += (*this)[--R];
int l = L >> 6, r = R >> 6;
FOR(i, l, r) cnt += popcnt(dat[i]);
return cnt;
}
// [L,R) に p を代入
void assign_to_range(int L, int R, My_Bitset &p) {
assert(p.N == R - L);
int a = 0, b = p.N;
while (L < R && (L & 63)) { (*this)[L++] = bool(p[a++]); }
while (L < R && (R & 63)) { (*this)[--R] = bool(p[--b]); }
// p[a:b] を [L:R] に
int l = L >> 6, r = R >> 6;
int s = a >> 6, t = b >> t;
int n = r - l;
if (!(a & 63)) {
FOR(i, n) dat[l + i] = p.dat[s + i];
} else {
int hi = a & 63;
int lo = 64 - hi;
FOR(i, n) dat[l + i] = (p.dat[s + i] >> hi) | (p.dat[1 + s + i] << lo);
}
}
// [L,R) に p を xor
void xor_to_range(int L, int R, My_Bitset &p) {
assert(p.N == R - L);
int a = 0, b = p.N;
while (L < R && (L & 63)) {
dat[L >> 6] ^= u64(p[a]) << (L & 63);
++a, ++L;
}
while (L < R && (R & 63)) {
--b, --R;
dat[R >> 6] ^= u64(p[b]) << (R & 63);
}
// p[a:b] を [L:R] に
int l = L >> 6, r = R >> 6;
int s = a >> 6, t = b >> t;
int n = r - l;
if (!(a & 63)) {
FOR(i, n) dat[l + i] ^= p.dat[s + i];
} else {
int hi = a & 63;
int lo = 64 - hi;
FOR(i, n) dat[l + i] ^= (p.dat[s + i] >> hi) | (p.dat[1 + s + i] << lo);
}
}
// [L,R) に p を and
void and_to_range(int L, int R, My_Bitset &p) {
assert(p.N == R - L);
int a = 0, b = p.N;
while (L < R && (L & 63)) {
if (!p[a++]) (*this)[L++] = 0;
}
while (L < R && (R & 63)) {
if (!p[--b]) (*this)[--R] = 0;
}
// p[a:b] を [L:R] に
int l = L >> 6, r = R >> 6;
int s = a >> 6, t = b >> t;
int n = r - l;
if (!(a & 63)) {
FOR(i, n) dat[l + i] &= p.dat[s + i];
} else {
int hi = a & 63;
int lo = 64 - hi;
FOR(i, n) dat[l + i] &= (p.dat[s + i] >> hi) | (p.dat[1 + s + i] << lo);
}
}
// [L,R) に p を or
void or_to_range(int L, int R, My_Bitset &p) {
assert(p.N == R - L);
int a = 0, b = p.N;
while (L < R && (L & 63)) {
dat[L >> 6] |= u64(p[a]) << (L & 63);
++a, ++L;
}
while (L < R && (R & 63)) {
--b, --R;
dat[R >> 6] |= u64(p[b]) << (R & 63);
}
// p[a:b] を [L:R] に
int l = L >> 6, r = R >> 6;
int s = a >> 6, t = b >> t;
int n = r - l;
if (!(a & 63)) {
FOR(i, n) dat[l + i] |= p.dat[s + i];
} else {
int hi = a & 63;
int lo = 64 - hi;
FOR(i, n) dat[l + i] |= (p.dat[s + i] >> hi) | (p.dat[1 + s + i] << lo);
}
}
string to_string() const {
string S;
FOR(i, N) S += '0' + (dat[i >> 6] >> (i & 63) & 1);
return S;
}
// bitset に仕様を合わせる
void set(int i) { (*this)[i] = 1; }
void reset(int i) { (*this)[i] = 0; }
void flip(int i) { (*this)[i].flip(); }
void set() {
fill(all(dat), u64(-1));
resize(N);
}
void reset() { fill(all(dat), 0); }
int _Find_first() { return next(0); }
int _Find_next(int p) { return next(p + 1); }
};
#line 1 "enumerate/bits.hpp"
template <typename F>
void enumerate_bits_32(u32 s, F f) {
while (s) {
int i = __builtin_ctz(s);
f(i);
s ^= 1 << i;
}
}
template <typename F>
void enumerate_bits_64(u64 s, F f) {
while (s) {
int i = __builtin_ctzll(s);
f(i);
s ^= u64(1) << i;
}
}
template <typename BS, typename F>
void enumerate_bits_bitset(BS& b, int L, int R, F f) {
int p = (b[L] ? L : b._Find_next(L));
while (p < R) {
f(p);
p = b._Find_next(p);
}
}
#line 3 "knapsack/subset_sum.hpp"
// O(N MAX(vals))
template <typename T>
vc<int> subset_sum_solution_1(vc<T>& vals, int target) {
int n = len(vals);
if (n == 0) return {};
int mx = MAX(vals);
int b = 0, sb = 0;
while (b < n && sb + vals[b] <= target) { sb += vals[b++]; }
if (b == n && sb != target) return {};
int off = target - mx + 1;
vc<int> dp(2 * mx, -1);
vv(int, PAR, n, 2 * mx, -1);
dp[sb - off] = b;
FOR3(i, b, n) {
auto newdp = dp;
auto& par = PAR[i];
int a = vals[i];
FOR(j, mx) {
if (chmax(newdp[j + a], dp[j])) { par[j + a] = -2; }
}
FOR3_R(j, mx, 2 * mx) {
FOR3_R(k, max(dp[j], 0), newdp[j]) {
if (chmax(newdp[j - vals[k]], k)) par[j - vals[k]] = k;
}
}
swap(dp, newdp);
}
if (dp[mx - 1] == -1) return {};
vc<bool> use(n);
int i = n - 1, j = mx - 1;
while (i >= b) {
int p = PAR[i][j];
if (p == -2) {
use[i] = !use[i];
j -= vals[i--];
}
elif (p == -1) { --i; }
else {
use[p] = !use[p];
j += vals[p];
}
}
while (i >= 0) {
use[i] = !use[i];
--i;
}
vc<int> I;
FOR(i, n) if (use[i]) I.eb(i);
ll sm = 0;
for (auto&& i: I) sm += vals[i];
assert(sm == target);
return I;
}
// O(N target / w)
template <typename T>
vc<int> subset_sum_solution_2(vc<T>& vals, int target) {
int n = len(vals);
auto I = argsort(vals);
My_Bitset dp(1, 1);
vc<int> last(target + 1, -1);
FOR(k, n) {
int v = vals[I[k]];
if (v > target) continue;
My_Bitset newdp = dp;
int new_size = len(dp) + v;
newdp.resize(new_size);
newdp.or_to_range(v, new_size, dp);
if (len(newdp) > target + 1) newdp.resize(target + 1);
// update したところをメモ
FOR(i, len(newdp.dat)) {
u64 upd = (i < len(dp.dat) ? dp.dat[i] : u64(0)) ^ newdp.dat[i];
enumerate_bits_64(upd, [&](int p) -> void { last[(i << 6) | p] = I[k]; });
}
swap(dp, newdp);
}
if (target >= len(dp) || !dp[target]) return {};
vc<int> ANS;
while (target > 0) {
int i = last[target];
ANS.eb(i);
target -= vals[i];
}
return ANS;
}
// O(sum^{1.5} / w)
// sum=10^6 で 150ms:https://codeforces.com/contest/755/problem/F
template <typename T>
vc<int> subset_sum_solution_3(vc<T>& vals, int target) {
int SM = SUM<int>(vals);
int N = len(vals);
vvc<int> IDS(SM + 1);
FOR(i, N) IDS[vals[i]].eb(i);
vc<pair<int, int>> par(N, {-1, -1});
vc<int> grp_vals;
vc<int> raw_idx;
FOR(x, 1, SM + 1) {
auto& I = IDS[x];
while (len(I) >= 3) {
int a = POP(I), b = POP(I);
int c = len(par);
par.eb(a, b);
IDS[2 * x].eb(c);
}
for (auto& i: I) {
grp_vals.eb(x);
raw_idx.eb(i);
}
}
auto I = subset_sum_solution_2<int>(grp_vals, target);
vc<int> ANS;
for (auto& i: I) {
vc<int> st = {raw_idx[i]};
while (len(st)) {
auto c = POP(st);
if (c < N) {
ANS.eb(c);
continue;
}
auto [a, b] = par[c];
st.eb(a), st.eb(b);
}
}
return ANS;
}
template <typename T>
vc<int> subset_sum_solution_4(vc<T>& vals, T target) {
if (target <= 0) return {};
int N = len(vals);
int M = N / 2;
auto calc = [&](int L, int R) -> vc<T> {
int n = R - L;
vc<T> dp = {0};
FOR(i, n) {
T a = vals[L + i];
vc<T> dp1(len(dp));
FOR(k, len(dp)) dp1[k] = dp[k] + a;
vc<T> newdp;
merge(all(dp), all(dp1), back_inserter(newdp));
swap(dp, newdp);
}
return dp;
};
auto restore = [&](int L, int R, T v) -> vc<int> {
int n = R - L;
vc<T> dp(1 << n);
FOR(i, n) FOR(s, 1 << i) dp[s | 1 << i] = dp[s] + vals[L + i];
FOR(s, 1 << n) {
if (dp[s] == v) {
vc<int> I;
FOR(i, n) if (s >> i & 1) I.eb(L + i);
return I;
}
}
assert(0);
return {};
};
auto dp1 = calc(0, M);
auto dp2 = calc(M, N);
int t = 0;
FOR_R(s, len(dp1)) {
while (t + 1 < len(dp2) && dp1[s] + dp2[t + 1] <= target) { ++t; }
if (dp1[s] + dp2[t] == target) {
vc<int> I1 = restore(0, M, dp1[s]);
vc<int> I2 = restore(M, N, dp2[t]);
I1.insert(I1.end(), all(I2));
return I1;
}
}
return {};
}
template <typename T>
vc<int> subset_sum(vc<T>& vals, T target) {
if (target <= 0) return {};
int n = len(vals);
if (n == 0) return {};
int mx = MAX(vals);
// しきい値の調整をしていない
// solution 1: O(N mx))
// solution 2: O(N target / w)
// solution 3: O(sum^1.5 / w)
// solution 4: O(2^(N/2))
double x1 = double(len(vals)) * mx;
double x2 = double(len(vals)) * target / 500.0;
double x3 = pow(SUM<double>(vals), 1.5) / 500.0;
double x4 = pow(2.0, 0.5 * len(vals));
double mi = min({x1, x2, x3, x4});
if (x1 == mi) return subset_sum_solution_1(vals, target);
if (x2 == mi) return subset_sum_solution_2(vals, target);
if (x3 == mi) return subset_sum_solution_3(vals, target);
return subset_sum_solution_4(vals, target);
}