This documentation is automatically generated by online-judge-tools/verification-helper
#include "linalg/hafnian.hpp"
#include "setfunc/sps_exp.hpp"
#include "enumerate/bits.hpp"
// 隣接行列に対して完全マッチングを数える。
template <typename mint, int LIM = 20>
mint Hufnian(vc<vc<mint>>& mat) {
int N = len(mat);
int n = N / 2;
assert(n <= LIM);
vc<mint> cyc(1 << n);
FOR(i, N / 2) {
int A = 2 * i + 0, B = 2 * i + 1;
int K = 2 * i;
cyc[1 << i] += mat[A][B];
vc<mint> dp(K << i);
for (int j = 0; j < i; ++j) {
int j0 = 2 * j + 0, j1 = 2 * j + 1;
dp[(K << j) + j0] += mat[A][j1], dp[(K << j) + j1] += mat[A][j0];
}
for (int s = 0; s < (1 << i); ++s) {
for (int j = 0; j < i; ++j) {
int j0 = 2 * j + 0, j1 = 2 * j + 1;
cyc[s | 1 << i] += dp[K * s + j0] * mat[B][j0];
cyc[s | 1 << i] += dp[K * s + j1] * mat[B][j1];
for (int k: all_bit<u32>((1 << i) - 1 - s)) {
int k0 = 2 * k + 0, k1 = 2 * k + 1;
int t = s | 1 << k;
dp[K * t + k0] += dp[K * s + j0] * mat[j0][k1];
dp[K * t + k0] += dp[K * s + j1] * mat[j1][k1];
dp[K * t + k1] += dp[K * s + j0] * mat[j0][k0];
dp[K * t + k1] += dp[K * s + j1] * mat[j1][k0];
}
}
}
}
return sps_exp<mint, LIM>(cyc).back();
}
#line 2 "setfunc/subset_convolution.hpp"
#line 2 "setfunc/ranked_zeta.hpp"
template <typename T, int LIM>
vc<array<T, LIM + 1>> ranked_zeta(const vc<T>& f) {
int n = topbit(len(f));
assert(n <= LIM);
assert(len(f) == 1 << n);
vc<array<T, LIM + 1>> Rf(1 << n);
for (int s = 0; s < (1 << n); ++s) Rf[s][popcnt(s)] = f[s];
for (int i = 0; i < n; ++i) {
int w = 1 << i;
for (int p = 0; p < (1 << n); p += 2 * w) {
for (int s = p; s < p + w; ++s) {
int t = s | 1 << i;
for (int d = 0; d <= n; ++d) Rf[t][d] += Rf[s][d];
}
}
}
return Rf;
}
template <typename T, int LIM>
vc<T> ranked_mobius(vc<array<T, LIM + 1>>& Rf) {
int n = topbit(len(Rf));
assert(len(Rf) == 1 << n);
for (int i = 0; i < n; ++i) {
int w = 1 << i;
for (int p = 0; p < (1 << n); p += 2 * w) {
for (int s = p; s < p + w; ++s) {
int t = s | 1 << i;
for (int d = 0; d <= n; ++d) Rf[t][d] -= Rf[s][d];
}
}
}
vc<T> f(1 << n);
for (int s = 0; s < (1 << n); ++s) f[s] = Rf[s][popcnt(s)];
return f;
}
#line 4 "setfunc/subset_convolution.hpp"
template <typename T, int LIM = 20>
vc<T> subset_convolution_square(const vc<T>& A) {
auto RA = ranked_zeta<T, LIM>(A);
int n = topbit(len(RA));
FOR(s, len(RA)) {
auto& f = RA[s];
FOR_R(d, n + 1) {
T x = 0;
FOR(i, d + 1) x += f[i] * f[d - i];
f[d] = x;
}
}
return ranked_mobius<T, LIM>(RA);
}
template <typename T, int LIM = 20>
vc<T> subset_convolution(const vc<T>& A, const vc<T>& B) {
if (A == B) return subset_convolution_square(A);
auto RA = ranked_zeta<T, LIM>(A);
auto RB = ranked_zeta<T, LIM>(B);
int n = topbit(len(RA));
FOR(s, len(RA)) {
auto &f = RA[s], &g = RB[s];
FOR_R(d, n + 1) {
T x = 0;
FOR(i, d + 1) x += f[i] * g[d - i];
f[d] = x;
}
}
return ranked_mobius<T, LIM>(RA);
}
#line 2 "setfunc/sps_exp.hpp"
// sum_i 1/i! s^i, s^i is subset-convolution
template <typename mint, int LIM>
vc<mint> sps_exp(vc<mint>& s) {
const int N = topbit(len(s));
assert(len(s) == (1 << N) && s[0] == mint(0));
vc<mint> dp(1 << N);
dp[0] = mint(1);
FOR(i, N) {
vc<mint> a = {s.begin() + (1 << i), s.begin() + (2 << i)};
vc<mint> b = {dp.begin(), dp.begin() + (1 << i)};
a = subset_convolution<mint, LIM>(a, b);
copy(all(a), dp.begin() + (1 << i));
}
return dp;
}
#line 1 "enumerate/bits.hpp"
template <typename BS, typename F>
void enumerate_bits_bitset(BS& b, int L, int R, F f) {
if (L >= len(b)) return;
int p = (b[L] ? L : b._Find_next(L));
while (p < R) {
f(p);
p = b._Find_next(p);
}
}
#line 3 "linalg/hafnian.hpp"
// 隣接行列に対して完全マッチングを数える。
template <typename mint, int LIM = 20>
mint Hufnian(vc<vc<mint>>& mat) {
int N = len(mat);
int n = N / 2;
assert(n <= LIM);
vc<mint> cyc(1 << n);
FOR(i, N / 2) {
int A = 2 * i + 0, B = 2 * i + 1;
int K = 2 * i;
cyc[1 << i] += mat[A][B];
vc<mint> dp(K << i);
for (int j = 0; j < i; ++j) {
int j0 = 2 * j + 0, j1 = 2 * j + 1;
dp[(K << j) + j0] += mat[A][j1], dp[(K << j) + j1] += mat[A][j0];
}
for (int s = 0; s < (1 << i); ++s) {
for (int j = 0; j < i; ++j) {
int j0 = 2 * j + 0, j1 = 2 * j + 1;
cyc[s | 1 << i] += dp[K * s + j0] * mat[B][j0];
cyc[s | 1 << i] += dp[K * s + j1] * mat[B][j1];
for (int k: all_bit<u32>((1 << i) - 1 - s)) {
int k0 = 2 * k + 0, k1 = 2 * k + 1;
int t = s | 1 << k;
dp[K * t + k0] += dp[K * s + j0] * mat[j0][k1];
dp[K * t + k0] += dp[K * s + j1] * mat[j1][k1];
dp[K * t + k1] += dp[K * s + j0] * mat[j0][k0];
dp[K * t + k1] += dp[K * s + j1] * mat[j1][k0];
}
}
}
}
return sps_exp<mint, LIM>(cyc).back();
}