This documentation is automatically generated by online-judge-tools/verification-helper
#include "poly/middle_product.hpp"
#pragma once
#include "poly/ntt.hpp"
// n, m 次多項式 (n>=m) a, b → n-m 次多項式 c
// c[i] = sum_j b[j]a[i+j]
template <typename mint>
vc<mint> middle_product(vc<mint>& a, vc<mint>& b) {
assert(len(a) >= len(b));
if (b.empty()) return vc<mint>(len(a) - len(b) + 1);
if (min(len(b), len(a) - len(b) + 1) <= 60) {
return middle_product_naive(a, b);
}
if (!(mint::can_ntt())) {
return middle_product_garner(a, b);
} else {
int n = 1 << __lg(2 * len(a) - 1);
vc<mint> fa(n), fb(n);
copy(a.begin(), a.end(), fa.begin());
copy(b.rbegin(), b.rend(), fb.begin());
ntt(fa, 0), ntt(fb, 0);
FOR(i, n) fa[i] *= fb[i];
ntt(fa, 1);
fa.resize(len(a));
fa.erase(fa.begin(), fa.begin() + len(b) - 1);
return fa;
}
}
template <typename mint>
vc<mint> middle_product_garner(vc<mint>& a, vc<mint> b) {
int n = len(a), m = len(b);
if (!n || !m) return {};
static const long long nttprimes[] = {754974721, 167772161, 469762049};
using mint0 = modint<754974721>;
using mint1 = modint<167772161>;
using mint2 = modint<469762049>;
vc<mint0> a0(n), b0(m);
vc<mint1> a1(n), b1(m);
vc<mint2> a2(n), b2(m);
FOR(i, n) a0[i] = a[i].val, a1[i] = a[i].val, a2[i] = a[i].val;
FOR(i, m) b0[i] = b[i].val, b1[i] = b[i].val, b2[i] = b[i].val;
auto c0 = middle_product<mint0>(a0, b0);
auto c1 = middle_product<mint1>(a1, b1);
auto c2 = middle_product<mint2>(a2, b2);
const long long m01 = 1LL * nttprimes[0] * nttprimes[1];
const long long m0_inv_m1 = mint1(nttprimes[0]).inverse().val;
const long long m01_inv_m2 = mint2(m01).inverse().val;
const int mod = mint::get_mod();
auto garner = [&](mint0 x0, mint1 x1, mint2 x2) -> mint {
int r0 = x0.val, r1 = x1.val, r2 = x2.val;
int v1 = (m0_inv_m1 * (r1 + nttprimes[1] - r0)) % nttprimes[1];
auto v2 = (mint2(r2) - r0 - mint2(nttprimes[0]) * v1) * mint2(m01_inv_m2);
return mint(r0 + 1LL * nttprimes[0] * v1 + m01 % mod * v2.val);
};
vc<mint> c(len(c0));
FOR(i, len(c)) c[i] = garner(c0[i], c1[i], c2[i]);
return c;
}
template <typename mint>
vc<mint> middle_product_naive(vc<mint>& a, vc<mint>& b) {
vc<mint> res(len(a) - len(b) + 1);
FOR(i, len(res)) FOR(j, len(b)) res[i] += b[j] * a[i + j];
return res;
}
#line 2 "poly/middle_product.hpp"
#line 2 "poly/ntt.hpp"
template <class mint>
void ntt(vector<mint>& a, bool inverse) {
assert(mint::can_ntt());
const int rank2 = mint::ntt_info().fi;
const int mod = mint::get_mod();
static array<mint, 30> root, iroot;
static array<mint, 30> rate2, irate2;
static array<mint, 30> rate3, irate3;
static bool prepared = 0;
if (!prepared) {
prepared = 1;
root[rank2] = mint::ntt_info().se;
iroot[rank2] = mint(1) / root[rank2];
FOR_R(i, rank2) {
root[i] = root[i + 1] * root[i + 1];
iroot[i] = iroot[i + 1] * iroot[i + 1];
}
mint prod = 1, iprod = 1;
for (int i = 0; i <= rank2 - 2; i++) {
rate2[i] = root[i + 2] * prod;
irate2[i] = iroot[i + 2] * iprod;
prod *= iroot[i + 2];
iprod *= root[i + 2];
}
prod = 1, iprod = 1;
for (int i = 0; i <= rank2 - 3; i++) {
rate3[i] = root[i + 3] * prod;
irate3[i] = iroot[i + 3] * iprod;
prod *= iroot[i + 3];
iprod *= root[i + 3];
}
}
int n = int(a.size());
int h = topbit(n);
assert(n == 1 << h);
if (!inverse) {
int len = 0;
while (len < h) {
if (h - len == 1) {
int p = 1 << (h - len - 1);
mint rot = 1;
FOR(s, 1 << len) {
int offset = s << (h - len);
FOR(i, p) {
auto l = a[i + offset];
auto r = a[i + offset + p] * rot;
a[i + offset] = l + r;
a[i + offset + p] = l - r;
}
rot *= rate2[topbit(~s & -~s)];
}
len++;
} else {
int p = 1 << (h - len - 2);
mint rot = 1, imag = root[2];
for (int s = 0; s < (1 << len); s++) {
mint rot2 = rot * rot;
mint rot3 = rot2 * rot;
int offset = s << (h - len);
for (int i = 0; i < p; i++) {
u64 mod2 = u64(mod) * mod;
u64 a0 = a[i + offset].val;
u64 a1 = u64(a[i + offset + p].val) * rot.val;
u64 a2 = u64(a[i + offset + 2 * p].val) * rot2.val;
u64 a3 = u64(a[i + offset + 3 * p].val) * rot3.val;
u64 a1na3imag = (a1 + mod2 - a3) % mod * imag.val;
u64 na2 = mod2 - a2;
a[i + offset] = a0 + a2 + a1 + a3;
a[i + offset + 1 * p] = a0 + a2 + (2 * mod2 - (a1 + a3));
a[i + offset + 2 * p] = a0 + na2 + a1na3imag;
a[i + offset + 3 * p] = a0 + na2 + (mod2 - a1na3imag);
}
rot *= rate3[topbit(~s & -~s)];
}
len += 2;
}
}
} else {
mint coef = mint(1) / mint(len(a));
FOR(i, len(a)) a[i] *= coef;
int len = h;
while (len) {
if (len == 1) {
int p = 1 << (h - len);
mint irot = 1;
FOR(s, 1 << (len - 1)) {
int offset = s << (h - len + 1);
FOR(i, p) {
u64 l = a[i + offset].val;
u64 r = a[i + offset + p].val;
a[i + offset] = l + r;
a[i + offset + p] = (mod + l - r) * irot.val;
}
irot *= irate2[topbit(~s & -~s)];
}
len--;
} else {
int p = 1 << (h - len);
mint irot = 1, iimag = iroot[2];
FOR(s, (1 << (len - 2))) {
mint irot2 = irot * irot;
mint irot3 = irot2 * irot;
int offset = s << (h - len + 2);
for (int i = 0; i < p; i++) {
u64 a0 = a[i + offset + 0 * p].val;
u64 a1 = a[i + offset + 1 * p].val;
u64 a2 = a[i + offset + 2 * p].val;
u64 a3 = a[i + offset + 3 * p].val;
u64 x = (mod + a2 - a3) * iimag.val % mod;
a[i + offset] = a0 + a1 + a2 + a3;
a[i + offset + 1 * p] = (a0 + mod - a1 + x) * irot.val;
a[i + offset + 2 * p] = (a0 + a1 + 2 * mod - a2 - a3) * irot2.val;
a[i + offset + 3 * p] = (a0 + 2 * mod - a1 - x) * irot3.val;
}
irot *= irate3[topbit(~s & -~s)];
}
len -= 2;
}
}
}
}
#line 4 "poly/middle_product.hpp"
// n, m 次多項式 (n>=m) a, b → n-m 次多項式 c
// c[i] = sum_j b[j]a[i+j]
template <typename mint>
vc<mint> middle_product(vc<mint>& a, vc<mint>& b) {
assert(len(a) >= len(b));
if (b.empty()) return vc<mint>(len(a) - len(b) + 1);
if (min(len(b), len(a) - len(b) + 1) <= 60) {
return middle_product_naive(a, b);
}
if (!(mint::can_ntt())) {
return middle_product_garner(a, b);
} else {
int n = 1 << __lg(2 * len(a) - 1);
vc<mint> fa(n), fb(n);
copy(a.begin(), a.end(), fa.begin());
copy(b.rbegin(), b.rend(), fb.begin());
ntt(fa, 0), ntt(fb, 0);
FOR(i, n) fa[i] *= fb[i];
ntt(fa, 1);
fa.resize(len(a));
fa.erase(fa.begin(), fa.begin() + len(b) - 1);
return fa;
}
}
template <typename mint>
vc<mint> middle_product_garner(vc<mint>& a, vc<mint> b) {
int n = len(a), m = len(b);
if (!n || !m) return {};
static const long long nttprimes[] = {754974721, 167772161, 469762049};
using mint0 = modint<754974721>;
using mint1 = modint<167772161>;
using mint2 = modint<469762049>;
vc<mint0> a0(n), b0(m);
vc<mint1> a1(n), b1(m);
vc<mint2> a2(n), b2(m);
FOR(i, n) a0[i] = a[i].val, a1[i] = a[i].val, a2[i] = a[i].val;
FOR(i, m) b0[i] = b[i].val, b1[i] = b[i].val, b2[i] = b[i].val;
auto c0 = middle_product<mint0>(a0, b0);
auto c1 = middle_product<mint1>(a1, b1);
auto c2 = middle_product<mint2>(a2, b2);
const long long m01 = 1LL * nttprimes[0] * nttprimes[1];
const long long m0_inv_m1 = mint1(nttprimes[0]).inverse().val;
const long long m01_inv_m2 = mint2(m01).inverse().val;
const int mod = mint::get_mod();
auto garner = [&](mint0 x0, mint1 x1, mint2 x2) -> mint {
int r0 = x0.val, r1 = x1.val, r2 = x2.val;
int v1 = (m0_inv_m1 * (r1 + nttprimes[1] - r0)) % nttprimes[1];
auto v2 = (mint2(r2) - r0 - mint2(nttprimes[0]) * v1) * mint2(m01_inv_m2);
return mint(r0 + 1LL * nttprimes[0] * v1 + m01 % mod * v2.val);
};
vc<mint> c(len(c0));
FOR(i, len(c)) c[i] = garner(c0[i], c1[i], c2[i]);
return c;
}
template <typename mint>
vc<mint> middle_product_naive(vc<mint>& a, vc<mint>& b) {
vc<mint> res(len(a) - len(b) + 1);
FOR(i, len(res)) FOR(j, len(b)) res[i] += b[j] * a[i + j];
return res;
}