This documentation is automatically generated by online-judge-tools/verification-helper
#include "nt/multiplicative_sum.hpp"
#include "nt/primetable.hpp"
// f_pe:T(int p,int e), f(p^e)
// f_psum:[1, x] での f(p) の和
template <typename T, typename F1, typename F2>
T multiplicative_sum(ll N, F1 f_pe, F2 f_psum) {
ll sqN = sqrtl(N);
auto P = primetable<int>(sqN);
T ANS = T(1) + f_psum(N); // 1 and prime
// t = up_i^k のときに、(t, i, k, f(t), f(u)) を持たせる
auto dfs = [&](auto self, ll t, ll i, ll k, T ft, T fu) -> void {
T f_nxt = fu * f_pe(P[i], k + 1);
// 子ノードを全部加算
ANS += f_nxt;
ANS += ft * (f_psum(double(N) / t) - f_psum(P[i]));
ll lim = sqrtl(double(N) / t);
if (P[i] <= lim) { self(self, t * P[i], i, k + 1, f_nxt, fu); }
FOR3(j, i + 1, len(P)) {
if (P[j] > lim) break;
self(self, t * P[j], j, 1, ft * f_pe(P[j], 1), ft);
}
};
FOR(i, len(P)) if (P[i] <= sqN) dfs(dfs, P[i], i, 1, f_pe(P[i], 1), 1);
return ANS;
}
#line 2 "nt/primetable.hpp"
template <typename T = int>
vc<T> primetable(int LIM) {
++LIM;
const int S = 32768;
static int done = 2;
static vc<T> primes = {2}, sieve(S + 1);
if (done < LIM) {
done = LIM;
primes = {2}, sieve.assign(S + 1, 0);
const int R = LIM / 2;
primes.reserve(int(LIM / log(LIM) * 1.1));
vc<pair<int, int>> cp;
for (int i = 3; i <= S; i += 2) {
if (!sieve[i]) {
cp.eb(i, i * i / 2);
for (int j = i * i; j <= S; j += 2 * i) sieve[j] = 1;
}
}
for (int L = 1; L <= R; L += S) {
array<bool, S> block{};
for (auto& [p, idx]: cp)
for (int i = idx; i < S + L; idx = (i += p)) block[i - L] = 1;
FOR(i, min(S, R - L)) if (!block[i]) primes.eb((L + i) * 2 + 1);
}
}
int k = LB(primes, LIM + 1);
return {primes.begin(), primes.begin() + k};
}
#line 2 "nt/multiplicative_sum.hpp"
// f_pe:T(int p,int e), f(p^e)
// f_psum:[1, x] での f(p) の和
template <typename T, typename F1, typename F2>
T multiplicative_sum(ll N, F1 f_pe, F2 f_psum) {
ll sqN = sqrtl(N);
auto P = primetable<int>(sqN);
T ANS = T(1) + f_psum(N); // 1 and prime
// t = up_i^k のときに、(t, i, k, f(t), f(u)) を持たせる
auto dfs = [&](auto self, ll t, ll i, ll k, T ft, T fu) -> void {
T f_nxt = fu * f_pe(P[i], k + 1);
// 子ノードを全部加算
ANS += f_nxt;
ANS += ft * (f_psum(double(N) / t) - f_psum(P[i]));
ll lim = sqrtl(double(N) / t);
if (P[i] <= lim) { self(self, t * P[i], i, k + 1, f_nxt, fu); }
FOR3(j, i + 1, len(P)) {
if (P[j] > lim) break;
self(self, t * P[j], j, 1, ft * f_pe(P[j], 1), ft);
}
};
FOR(i, len(P)) if (P[i] <= sqN) dfs(dfs, P[i], i, 1, f_pe(P[i], 1), 1);
return ANS;
}