orig_post.cpp – Efficient division-free randomness extraction

#include <bit>
#include <cstdint>
#include <span>
#include <tuple>
#include <numeric>

/* Given a sequence of N events (integers in range 0..M-1) which are all independent and
 * identically distributed, we want to produce a single integer V that is uniformly distributed in
 * a range 0..R-1. Such uniform integers can then be arbitrarily composed to produce larger uniform
 * integers, and bits can be extracted from it. This approach is a generalization of a
 * Von Neumann debiaser.
 *
 * To do so, observe that while no statement can be made about the actual event probabilities,
 * given the multiset of events observed, note that every permutation of those events is
 * equally likely. Thus, it suffices to set R equal to the number of such permutations, and V to
 * a unique integer for each specific permutation within 0..R-1.
 *
 * The algorithms below include several variations that accomplish this. Notation:
 * - N: number of events
 * - M: range of event values
 * - K: (only for M=2) number of non-zero events
 * - VAR: variable-time algorithm (which leaks information about the uniform value through timing).
 * - DIV: uses division instructions
 * - CTZ: uses count-trailing-zeros instructions (std::countr_zero in C++20).
 * - LIM: if an internal overflow occurs inside the algorithm, no result (R=1,V=0) is produced.
 *        if an algorithm is not LIM, then as long as the number of permutations of the seen events
 *        is less than 2^64, a unique encoding in that range is produced for the permutation. If
 *        the number of permutations exceeds 2^64-1, then a uniformly distributed V within a
 *        reduced range (possibly R=1) is returned.
 * - TAB(S): a compile-time table with S values is needed for this algorith.
 *
 * - BinomialCoderSimple: O(N), M=2, VAR, DIV, LIM
 * - MultinomialCoderSimple: O(N*M), VAR, DIV, LIM
 * - BinomialCoderModinv: O(N), M=2, CTZ
 * - MultinomialCoderModinv: O(N*M), CTZ
 * - BinomialCoderLowK: O(K^2), M=2, VAR, CTZ
 * - BinomialCoderTable: O(K), M=2, VAR, TAB(3*N)
 *
 * The utility functions Merge and ExtractBit implement the composition of uniform integers, and
 * extracting uniform bits from them, respectively.
 */

struct UniformRange {
    /** How many values value can take (public). range=0 is interpreted as 2^64. */
    uint64_t range{1};
    /** A uniformly distributed value in [0,value). (private). */
    uint64_t value{0};
};

/** Simple O(N) binomial coder, operating on a list of bits.
 *
 * Leaks information about the returned UniformRange::value through timing.
 *
 * Complexity: N mul, N div (N = number of bits).
 *
 * The returned value contains the exact (n choose k, encoding) when n * (n choose k) < 2^64.
 */
UniformRange BinomialCoderSimple(std::span<const bool> bits) {
    /* Components of the return value. */
    uint64_t range{1}, value{0};
    /** Number of true bits seen. */
    unsigned num_1{0};
    /** Total number of bits seed. */
    unsigned total{0};

    for (bool bit : bits) {
        uint64_t mask = -uint64_t{bit};
        // Update frequency information.
        num_1 += bit;
        ++total;
        /** How many times we have seen bits equal to bit. */
        unsigned self = (mask & num_1) + ((~mask) & (total - num_1));
        // Compute new range.
        uint64_t tmp = range * total;
        if (tmp < range) return {1, 0}; // Detect overflow.
        uint64_t new_range = tmp / self;
        // Compute new value.
        value += mask & (new_range - range);
        // Update range.
        range = new_range;
    }
    return {range, value};
}

/** Simple O(N*M) multinomial coder, operating on a list of events.
 *
 * Leaks information about the returned UniformRange::value through timing.
 *
 * Complexity: 2N mul, 2N div (N = number of bits).
 *
 * The returned value contains the exact (range, encoding) when n * range < 2^64.
 */
template<unsigned M>
UniformRange MultinomialCoderSimple(std::span<const unsigned> events) {
    /* Components of the return value. */
    uint64_t range{1}, value{0};
    /** Frequency table. */
    unsigned freq[M] = {0};
    /** Total number of events. */
    unsigned total{0};

    for (unsigned event : events) {
        // Update frequency information.
        uint64_t self = ++freq[event];
        ++total;
        // Compute new range.
        uint64_t tmp = range * total;
        if (tmp < range) return {1, 0}; // Detect overflow
        uint64_t new_range = tmp / self;
        // Compute new value.
        unsigned add = std::accumulate(freq, freq + event, (unsigned)0);
        value += (range * add) / self;
        // Update range.
        range = new_range;
    }

    return {range, value};
}

/** Compute the modular inverse of an odd x, mod 2^64.
 *
 * Complexity: 8 mul.
 *
 * Constant-time.
 */
uint64_t ModInv64(uint64_t x) {
    // Compute an initial i which has its 4 bottom bits correct.
    uint64_t i = x ^ (((x + 1) & 4) << 1);
    // Perform 4 iterations of Newton-Rhapson, solving f(i)=0 in the 2-adics, where f(i)=1/(i*x)-1.
    // The number of correct bits doubles every step.
    i *= 2 - i * x;
    i *= 2 - i * x;
    i *= 2 - i * x;
    i *= 2 - i * x;
    return i;
}

/** Given a non-zero x, return (o,s) where o is odd and such that x = (o << s).
 *
 * Complexity: 1 ctz.
 *
 * Constant-time.
 */
std::pair<unsigned, unsigned> SplitOdd(unsigned x)
{
    unsigned shift = std::countr_zero(x);
    return {x >> shift, shift};
}

/** If R is an integer >0, V a uniformly random integer in [0,R), then when given the reduction of
 *  R and V modulo 2^64, construct a UniformRange. */
UniformRange MakeRange(uint64_t range, uint64_t value)
{
    if (value < range) {
        // Even when range and/or value have overflowed, every value<range corresponds to the same
        // amount of V values.
        return {range, value};
    } else {
        // Even when range and/or value have overflowed, every value>=range corresponds to the same
        // amount of V values. This also covers the case where range=0 (and thus R is a nonzero
        // multiple of 2^64).
        return {(uint64_t)-range, value - range};
    }
}

/** O(N) modular-inverse based binomial coder, operating on a list of bits.
 *
 * Complexity:
 * - As written: 10+3N mul, 2N ctz (N = number of bits).
 * - If SplitOdd(total) is replaced with an N-sized table lookup: 10+3N mul, N ctz.
 *
 * Does not leak information about the returned UniformRange::value through timing.
 *
 * The returned value contains the exact (n choose k, encoding) when (n choose k) < 2^64.
 */
UniformRange BinomialCoderModinv(std::span<const bool> bits)
{
    /** How many 1 bits we have seen. */
    unsigned num_1{0};
    /** The total number of bits we have seen. */
    unsigned total{0};
    // The range is represented as ((range_num << range_shift) / denom).
    // The value is represented as (value_num / denom).
    uint64_t range_num{1}, value_num{0}, denom{1};
    unsigned range_shift{0};

    for (bool bit : bits) {
        uint64_t mask = -uint64_t{bit};
        num_1 += bit;
        total += 1;

        /** How many times have we seen bits equal to bit, in split form. */
        auto [self_odd, self_shift] = SplitOdd((mask & num_1) + ((~mask) & (total - num_1)));
        /** How many bits have we seen in total, in split form (this could be a table lookup, as
         *  total is not secret). */
        auto [tot_odd, tot_shift] = SplitOdd(total);

        // Compute new range values (new_range = range * total / self).
        uint64_t new_range_num = range_num * tot_odd;
        uint64_t new_denom = denom * self_odd;
        unsigned new_range_shift = range_shift + tot_shift - self_shift;

        // Compute new value numerator (new_value = value + (mask & (new_range - range))).
        value_num = self_odd * (value_num - (mask & (range_num << range_shift))) +
                    (mask & (new_range_num << new_range_shift));

        // Update range.
        range_num = new_range_num;
        range_shift = new_range_shift;
        denom = new_denom;
    }

    // Compute real range and value.
    uint64_t denom_inv = ModInv64(denom);
    uint64_t range = (range_num * denom_inv) << range_shift;
    uint64_t value = value_num * denom_inv;

    // Produce output.
    return MakeRange(range, value);
}

/** O(N*M) modular-inverse based multinomial coder, operating on a list of events.
 *
 * Complexity:
 * - As written: 10+4N mul, 3N ctz (N = number of bits).
 * - If SplitOdd(total) is replaced with an N-sized table lookup: 10+4N mul, 2N ctz.
 *
 * Does not leak information about the returned UniformRange::value through timing.
 *
 * The returned value contains the exact (range, encoding) when range < 2^64.
 */
template<unsigned M>
UniformRange MultinomialCoderModinv(std::span<const unsigned> events)
{
    /** How many times we have seen each event. */
    unsigned freq[M] = {0};
    /** How many events we have seen in total. */
    unsigned total{0};
    // The range is represented as ((range_num << range_shift) / denom).
    // The value is represented as (value_num / denom).
    uint64_t range_num{1}, value_num{0}, denom{1};
    unsigned range_shift{0};

    for (unsigned event : events) {
        // Update frequency information.
        ++total;
        unsigned add{0}, self{0};
        for (unsigned i = 0; i < M; ++i) {
            unsigned mask_below = -unsigned{i < event};
            unsigned mask_self = -unsigned{i == event};
            freq[i] += (i == event);
            add += mask_below & freq[i];
            self += mask_self & freq[i];
        }
        /** How many times have we seen events lower than event, in split form. */
        auto [add_odd, add_shift] = SplitOdd(add);
        /** How many times have we seen events equal to event, in split form. */
        auto [self_odd, self_shift] = SplitOdd(self);
        /** How many events have we seen in total, in split form (this could be a table lookup). */
        auto [tot_odd, tot_shift] = SplitOdd(total);

        // Compute new range values (new_range = range * total / self).
        uint64_t new_range_num = range_num * tot_odd;
        uint64_t new_denom = denom * self_odd;
        unsigned new_range_shift = range_shift + tot_shift - self_shift;

        // Compute new value numerator (new_value = value + add * range / self).
        value_num = self_odd * value_num + ((add_odd * range_num) << (add_shift + range_shift - self_shift));

        // Update range.
        range_num = new_range_num;
        range_shift = new_range_shift;
        denom = new_denom;
    }

    // Compute real range and value.
    uint64_t denom_inv = ModInv64(denom);
    uint64_t range = (range_num * denom_inv) << range_shift;
    uint64_t value = value_num * denom_inv;

    // Produce output.
    return MakeRange(range, value);
}

/** Compute a fraction {n,d} such that n * ModInv64(d) = (n choose k) (mod 2^64).
 *
 * Complexity: 2k mul, 2k ctz
 *
 * Leaks both n and k through timing.
 */
std::pair<uint64_t, uint64_t> BinomialFraction(unsigned n, unsigned k)
{
    if (k > n) return {0, 1};
    uint64_t num{1}, denom{1};
    unsigned shift{0};
    for (unsigned i = 0; i < k; ++i) {
        // The SplitOdd calls below could be replaced with lookups in an O(k)-sized table.
        auto [mul_odd, mul_shift] = SplitOdd(n - i);
        auto [div_odd, div_shift] = SplitOdd(i + 1);
        num *= mul_odd;
        denom *= div_odd;
        shift += mul_shift - div_shift;
    }
    return {num << shift, denom};
}

/** Compute (n choose k) (mod 2^64) using a precomputed table.
 *
 * Complexity: 2 mul.
 *
 * Needs a table of size 3n.
 *
 * Leaks information about n and k through timing.
 */
uint64_t BinomialTable(unsigned n, unsigned k)
{
    /** Precomputed table: TABLE[i] = {o, ModInv64(o), s} where (o, s) = SplitOdd(i!). */
    static constexpr std::tuple<uint64_t, uint64_t, unsigned> TABLE[100] = {
        {0x1, 0x1, 0}, {0x1, 0x1, 0}, {0x1, 0x1, 1}, {0x3, 0xaaaaaaaaaaaaaaab, 1}, {0x3, 0xaaaaaaaaaaaaaaab, 3}, {0xf, 0xeeeeeeeeeeeeeeef, 3}, {0x2d, 0x4fa4fa4fa4fa4fa5, 4}, {0x13b, 0x2ff2ff2ff2ff2ff3, 4}, {0x13b, 0x2ff2ff2ff2ff2ff3, 7}, {0xb13, 0x938cc70553e3771b, 7}, {0x375f, 0xb71c27cddd93e49f, 8}, {0x26115, 0xb38e3229fcdee63d, 8}, {0x7233f, 0xe684bb63544a4cbf, 10}, {0x5cca33, 0xc2f684917ca340fb, 10}, {0x2898765, 0xf747c9cba417526d, 11}, {0x260eeeeb, 0xbb26eb51d7bd49c3, 11}, {0x260eeeeb, 0xbb26eb51d7bd49c3, 15}, {0x286fddd9b, 0xb0a7efb985294093, 15}, {0x16beecca73, 0xbe4b8c69f259eabb, 16}, {0x1b02b930689, 0x6854d17ed6dc4fb9, 16}, {0x870d9df20ad, 0xe1aa904c915f4325, 18}, {0xb141df4dae31, 0x3b8206df131cead1, 18}, {0x79dd498567c1b, 0x79c6009fea76fe13, 19}, {0xaf2e19afc5266d, 0xd8c5d381633cd365, 19}, {0x20d8a4d0f4f7347, 0x4841f12b21144677, 22}, {0x335281867ec241ef, 0x4a91ff68200b0d0f, 22}, {0x9b3093d46fdd5923, 0x8f9513a58c4f9e8b, 23}, {0x5e1f9767cc5866b1, 0x2b3e690621a42251, 23},
        {0x92dd23d6966aced7, 0x4f520f00e03c04e7, 25}, {0xa30d0f4f0a196e5b, 0x2edf84ee600211d3, 25}, {0x8dc3e5a1977d7755, 0xadcaa2764aaacdfd, 26}, {0x2ab8ce915831734b, 0x161f4f9033f4fe63, 26}, {0x2ab8ce915831734b, 0x161f4f9033f4fe63, 31}, {0x81d2a0bc5e5fdcab, 0xbada2932ea4d3e03, 31}, {0x9efcac82445da75b, 0xcec189f3efaa30d3, 32}, {0xbc8b95cf58cde171, 0xf7475bb68330bf91, 32}, {0xa0e8444a1f3cecf9, 0x37eb7bf7d5b01549, 34}, {0x4191deb683ce3ffd, 0x46b35660a4e91555, 34}, {0xddd3878bc84ebfc7, 0xa567c12d81f151f7, 35}, {0xcb39a64b83ff3751, 0x4c724007bb2071b1, 35}, {0xf8203f7993fc1495, 0xf4a0cce58a016bd, 38}, {0xbd2a2a78b35f4bdd, 0xfa21068e66106475, 38}, {0x84757be6b6d13921, 0x244ab72b5a318ae1, 39}, {0x3fbbcfc0b524988b, 0x366ce67e080d0f23, 39}, {0xbd11ed47c8928df9, 0xd666fdae5dd2a449, 41}, {0x3c26b59e41c2f4c5, 0xd740ddd0acc06a0d, 41}, {0x677a5137e883fdb3, 0xb050bbbb28e6f97b, 42}, {0xff74e943b03b93dd, 0x70b003fe890a5c75, 42}, {0xfe5ebbcb10b2bb97, 0xd03aabff83037427, 46}, {0xb021f1de3235e7e7, 0x13ec4ca72c783bd7, 46},
        {0x33509eb2e743a58f, 0x90282c06afdbd96f, 47}, {0x390f9da41279fb7d, 0x4414ddb9db4a95d5, 47}, {0xe5cb0154f031c559, 0xa2c68735ae6832e9, 49}, {0x93074695ba4ddb6d, 0xbf72d71455676665, 49}, {0x81c471caa636247f, 0xa8469fab6b759b7f, 50}, {0xe1347289b5a1d749, 0xc1e55b56e606caf9, 50}, {0x286f21c3f76ce2ff, 0x40455630fc4a1cff, 53}, {0xbe84a2173e8ac7, 0x120a7b0046d16f7, 53}, {0x1595065ca215b88b, 0xa7c3553b08faef23, 54}, {0xf95877595b018809, 0x9f0bfd1b08d48639, 54}, {0x9c2efe3c5516f887, 0xa433ffce9a304d37, 56}, {0x373294604679382b, 0xa22ad1d53915c683, 56}, {0xaf1ff7a888adcd35, 0xcb6cbc723ba5dd1d, 57}, {0x18ddf279a2c5800b, 0x547fb1b8ab9d0ba3, 57}, {0x18ddf279a2c5800b, 0x547fb1b8ab9d0ba3, 63}, {0x505a90e2542582cb, 0x8f15a826498852e3, 63}, {0x5bacad2cd8d5dc2b, 0x32e1a03f38880283, 64}, {0xfe3152bcbff89f41, 0x3de4cce63283f0c1, 64}, {0xe1467e88bf829351, 0x5dfe6667e4da95b1, 66}, {0xb8001adb9e31b4d5, 0xfda6eeeef479e47d, 66}, {0x2803ac06a0cbb91f, 0xf14de991cc7882df, 67}, {0x1904b5d698805799, 0xe68db79247630ca9, 67},
        {0xe12a648b5c831461, 0xa7d6db8207ee8fa1, 70}, {0x3516abbd6160cfa9, 0x255e1f0fcf034499, 70}, {0xac46d25f12fe036d, 0xc9a8990e43dd7e65, 71}, {0x78bfa1da906b00ef, 0x3279b6f289702e0f, 71}, {0xf6390338b7f111bd, 0xe7b5905d9b71b195, 73}, {0xf25f80f538255d9, 0x3025ba41ff0da69, 73}, {0x4ec8ca55b8db140f, 0xb7df3d6d3be55aef, 74}, {0x4ff670740b9b30a1, 0xf89b212ebff2b361, 74}, {0x8fd032443a07f325, 0xfe856d095996f0ad, 78}, {0x80dfe7965c83eeb5, 0xd6e533e9fdf20f9d, 78}, {0xa3dc1714d1213afd, 0xf8c0e84a63da3255, 79}, {0x205b7bbfcdc62007, 0xa677876cd91b4db7, 79}, {0xa78126bbe140a093, 0x7ed4f97780d7d9b, 81}, {0x9de1dc61ca7550cf, 0x90a8705f258db62f, 81}, {0x84f0046d01b492c5, 0xa41bbb2be31b1c0d, 82}, {0x2d91810b945de0f3, 0x6ec28690b038383b, 82}, {0xf5408b7f6008aa71, 0xdb860c3bb2edd691, 85}, {0x43707f4863034149, 0x838286838a980f9, 85}, {0xdac65fb9679279d5, 0x558417a74b36f77d, 86}, {0xc48406e7d1114eb7, 0x71779afc3646ef07, 86}, {0xa7dc9ed3c88e1271, 0x743cda377ccb6e91, 88}, {0xfb25b2efdb9cb30d, 0x7fdf9f3fe89153c5, 88},
        {0x1bebda0951c4df63, 0xdc97d25df49b9a4b, 89}, {0x5c85e975580ee5bd, 0x76321a778eb37d95, 89}, {0x1591bc60082cb137, 0x7cbb5e27da3bd487, 94}, {0x2c38606318ef25d7, 0x9cff4ade1a009de7, 94}, {0x76ca72f7c5c63e27, 0x70eb166d05c15197, 95}, {0xf04a75d17baa0915, 0xdcf0460b71d5fe3d, 95}
    };

    if (k > n) return 0;
    auto [n_odd, _n, n_shift] = TABLE[n];
    auto [_k, k_inv_odd, k_shift] = TABLE[k];
    auto [_nmk, nmk_inv_odd, nmk_shift] = TABLE[n - k];
    return (n_odd * k_inv_odd * nmk_inv_odd) << (n_shift - k_shift - nmk_shift);
}

/** O(K^2) binomial coder, operating on a list of true bit positions.
 *
 * Complexity: K(K+3)+13 mul, K(K+3) ctz (K = number of true bits, = positions.size()).
 -
 * Leaks information about the returned UniformRange::value through timing.
 *
 * The returned value contains the exact (n choose k, encoding) when (n choose k) < 2^64.
 */
UniformRange BinomialCoderLowK(std::span<const unsigned> positions, unsigned count)
{
    uint64_t value_num{0};
    uint64_t value_denom{1};
    unsigned num_1{0};

    // Compute value in fraction form.
    for (unsigned position : positions) {
        num_1 += 1;

        auto [add_num, add_denom] = BinomialFraction(position, num_1);
        std::tie(value_num, value_denom) = std::pair{value_num * add_denom + add_num * value_denom, value_denom * add_denom};
    }

    // Compute range in fraction form.
    auto [range_num, range_denom] = BinomialFraction(count, num_1);

    // Compute actual value and range (using batch inversion).
    uint64_t inv = ModInv64(value_denom * range_denom);
    uint64_t range = range_num * inv * value_denom;
    uint64_t value = value_num * inv * range_denom;

    // Produce output.
    return MakeRange(range, value);
}

/** O(K) binomial coder using an O(N) table, operating on a list of true bit positions.
 *
 * Complexity: 2K+2 mul (K = number of true bits, = positions.size())
 *
 * Needs a table of size 3N (N = number of bits, = count).
 *
 * Leaks information about the returned UniformRange::value through timing.
 *
 * The returned value contains the exact (n choose k, encoding) when (n choose k) < 2^64.
 */
UniformRange BinomialCoderTable(std::span<const unsigned> positions, unsigned count)
{
    uint64_t value{0};
    unsigned num_1{0};

    // Compute value.
    for (unsigned position : positions) {
        num_1 += 1;
        value += BinomialTable(position, num_1);
    }

    // Compute range.
    uint64_t range = BinomialTable(count, num_1);

    // Produce output.
    return MakeRange(range, value);
}

/** Combine two UniformRange objects. */
UniformRange Merge(const UniformRange& a, const UniformRange& b)
{
    return {a.range * b.range, a.range * b.value + a.value};
}

/** Extract a bit from a UniformRange object. Even though this is a variable-time algorithm, the
 *  timing information does not leak any information about the return UniformRange::value, or
 *  the updated a.value. */
UniformRange ExtractBit(UniformRange& a)
{
    if (a.range != 0) {
        if (a.range & 1) {
            // If the range is odd, map the last value to nothing, to make the range even.
            if (a.value == a.range - 1) [[unlikely]] {
                a = {1, 0};
                return {1, 0};
            }
        }
        a.range >>= 1;
    } else [[unlikely]] {
        // Range is 2^64.
        a.range = uint64_t{1} << 63;
    }
    bool bit = a.value & 1;
    a.value >>= 1;
    return {2, bit};
}

#include <assert.h>
#include <optional>
#include <map>
#include <iostream>

int main(void) {
    // Try all lengths (= number of bits) between 0 and 23.
    for (size_t len = 0; len < 24; ++len) {
        std::cerr << "Testing length " << len << "...\n";
        // Test binomial coders.
        std::optional<UniformRange> table[25];
        // Iterate over all combinations of len bits.
        for (uint64_t comb = 0; (comb >> len) == 0; ++comb) {
            // Encode those bits in bit array bits, and position array pos.
            bool bits[24];
            unsigned pos[24];
            size_t sum = 0;
            for (unsigned i = 0; i < len; ++i) {
                bool bit = (comb >> i) & 1;
                bits[i] = bit;
                if (bit) {
                    pos[sum] = i;
                    ++sum;
                }
            }
            // Invoke all the binomial coders.
            auto ret_simple = BinomialCoderSimple({bits, len});
            auto ret_modinv = BinomialCoderModinv({bits, len});
            auto ret_lowk = BinomialCoderLowK({pos, sum}, len);
            auto ret_table = BinomialCoderTable({pos, sum}, len);
            // Compare them.
            assert(ret_simple.range == ret_modinv.range);
            assert(ret_simple.value == ret_modinv.value);
            assert(ret_simple.range == ret_lowk.range);
            assert(ret_simple.value == ret_lowk.value);
            assert(ret_simple.range == ret_table.range);
            assert(ret_simple.value == ret_table.value);
            // Sanity check.
            if (!table[sum]) {
                // If we have never seen this amount of 1 bits, the value must be 0.
                assert(ret_simple.value == 0);
            } else {
                // If we have seen it, the range must match, and the value must be one higher.
                assert(ret_simple.range == table[sum]->range);
                assert(ret_simple.value == table[sum]->value + 1);
            }
            // Remember what we saw last for this amount of 1 bits.
            table[sum] = ret_simple;
        }
        for (size_t i = 0; i < 25; ++i) {
            if (table[i].has_value()) {
                assert(table[i]->range == table[i]->value + 1);
            }
        }

        // Test multinomial coders.
        if (len > 10) continue;
        std::map<std::array<unsigned, 4>, UniformRange> mtable;
        // Iterate over all combinations of up to 10 events.
        for (uint64_t comb = 0; (comb >> (2 * len)) == 0; ++comb) {
            // Construct events list and frequency table.
            unsigned events[10];
            std::array<unsigned, 4> freq = {0, 0, 0, 0};
            for (unsigned i = 0; i < len; ++i) {
                unsigned event = (comb >> (2 * i)) & 3;
                events[i] = event;
                ++freq[event];
            }
            // Invoke multinomial coders.
            auto ret_simple = MultinomialCoderSimple<4>({events, len});
            auto ret_modinv = MultinomialCoderModinv<4>({events, len});
            // Compare them.
            assert(ret_simple.range == ret_modinv.range);
            assert(ret_simple.value == ret_modinv.value);
            // Sanity check.
            auto it = mtable.find(freq);
            if (it == mtable.end()) {
                assert(ret_simple.value == 0);
            } else {
                assert(ret_simple.range == it->second.range);
                assert(ret_simple.value == it->second.value + 1);
            }
            mtable[freq] = ret_simple;
        }
        for (const auto& [freq, res] : mtable) {
            assert(res.range == res.value + 1);
        }
    }
}