#include #include #include #include #include /* Given a sequence of N events (integers in range 0..M-1) which are all independent and * identically distributed, we want to produce a single integer V that is uniformly distributed in * a range 0..R-1. Such uniform integers can then be arbitrarily composed to produce larger uniform * integers, and bits can be extracted from it. This approach is a generalization of a * Von Neumann debiaser. * * To do so, observe that while no statement can be made about the actual event probabilities, * given the multiset of events observed, note that every permutation of those events is * equally likely. Thus, it suffices to set R equal to the number of such permutations, and V to * a unique integer for each specific permutation within 0..R-1. * * The algorithms below include several variations that accomplish this. Notation: * - N: number of events * - M: range of event values * - K: (only for M=2) number of non-zero events * - VAR: variable-time algorithm (which leaks information about the uniform value through timing). * - DIV: uses division instructions * - CTZ: uses count-trailing-zeros instructions (std::countr_zero in C++20). * - LIM: if an internal overflow occurs inside the algorithm, no result (R=1,V=0) is produced. * if an algorithm is not LIM, then as long as the number of permutations of the seen events * is less than 2^64, a unique encoding in that range is produced for the permutation. If * the number of permutations exceeds 2^64-1, then a uniformly distributed V within a * reduced range (possibly R=1) is returned. * - TAB(S): a compile-time table with S values is needed for this algorith. * * - BinomialCoderSimple: O(N), M=2, VAR, DIV, LIM * - MultinomialCoderSimple: O(N*M), VAR, DIV, LIM * - BinomialCoderModinv: O(N), M=2, CTZ * - MultinomialCoderModinv: O(N*M), CTZ * - BinomialCoderLowK: O(K^2), M=2, VAR, CTZ * - BinomialCoderTable: O(K), M=2, VAR, TAB(3*N) * * The utility functions Merge and ExtractBit implement the composition of uniform integers, and * extracting uniform bits from them, respectively. */ struct UniformRange { /** How many values value can take (public). range=0 is interpreted as 2^64. */ uint64_t range{1}; /** A uniformly distributed value in [0,value). (private). */ uint64_t value{0}; }; /** Simple O(N) binomial coder, operating on a list of bits. * * Leaks information about the returned UniformRange::value through timing. * * Complexity: N mul, N div (N = number of bits). * * The returned value contains the exact (n choose k, encoding) when n * (n choose k) < 2^64. */ UniformRange BinomialCoderSimple(std::span bits) { /* Components of the return value. */ uint64_t range{1}, value{0}; /** Number of true bits seen. */ unsigned num_1{0}; /** Total number of bits seed. */ unsigned total{0}; for (bool bit : bits) { uint64_t mask = -uint64_t{bit}; // Update frequency information. num_1 += bit; ++total; /** How many times we have seen bits equal to bit. */ unsigned self = (mask & num_1) + ((~mask) & (total - num_1)); // Compute new range. uint64_t tmp = range * total; if (tmp < range) return {1, 0}; // Detect overflow. uint64_t new_range = tmp / self; // Compute new value. value += mask & (new_range - range); // Update range. range = new_range; } return {range, value}; } /** Simple O(N*M) multinomial coder, operating on a list of events. * * Leaks information about the returned UniformRange::value through timing. * * Complexity: 2N mul, 2N div (N = number of bits). * * The returned value contains the exact (range, encoding) when n * range < 2^64. */ template UniformRange MultinomialCoderSimple(std::span events) { /* Components of the return value. */ uint64_t range{1}, value{0}; /** Frequency table. */ unsigned freq[M] = {0}; /** Total number of events. */ unsigned total{0}; for (unsigned event : events) { // Update frequency information. uint64_t self = ++freq[event]; ++total; // Compute new range. uint64_t tmp = range * total; if (tmp < range) return {1, 0}; // Detect overflow uint64_t new_range = tmp / self; // Compute new value. unsigned add = std::accumulate(freq, freq + event, (unsigned)0); value += (range * add) / self; // Update range. range = new_range; } return {range, value}; } /** Compute the modular inverse of an odd x, mod 2^64. * * Complexity: 8 mul. * * Constant-time. */ uint64_t ModInv64(uint64_t x) { // Compute an initial i which has its 4 bottom bits correct. uint64_t i = x ^ (((x + 1) & 4) << 1); // Perform 4 iterations of Newton-Rhapson, solving f(i)=0 in the 2-adics, where f(i)=1/(i*x)-1. // The number of correct bits doubles every step. i *= 2 - i * x; i *= 2 - i * x; i *= 2 - i * x; i *= 2 - i * x; return i; } /** Given a non-zero x, return (o,s) where o is odd and such that x = (o << s). * * Complexity: 1 ctz. * * Constant-time. */ std::pair SplitOdd(unsigned x) { unsigned shift = std::countr_zero(x); return {x >> shift, shift}; } /** If R is an integer >0, V a uniformly random integer in [0,R), then when given the reduction of * R and V modulo 2^64, construct a UniformRange. */ UniformRange MakeRange(uint64_t range, uint64_t value) { if (value < range) { // Even when range and/or value have overflowed, every value=range corresponds to the same // amount of V values. This also covers the case where range=0 (and thus R is a nonzero // multiple of 2^64). return {(uint64_t)-range, value - range}; } } /** O(N) modular-inverse based binomial coder, operating on a list of bits. * * Complexity: * - As written: 10+3N mul, 2N ctz (N = number of bits). * - If SplitOdd(total) is replaced with an N-sized table lookup: 10+3N mul, N ctz. * * Does not leak information about the returned UniformRange::value through timing. * * The returned value contains the exact (n choose k, encoding) when (n choose k) < 2^64. */ UniformRange BinomialCoderModinv(std::span bits) { /** How many 1 bits we have seen. */ unsigned num_1{0}; /** The total number of bits we have seen. */ unsigned total{0}; // The range is represented as ((range_num << range_shift) / denom). // The value is represented as (value_num / denom). uint64_t range_num{1}, value_num{0}, denom{1}; unsigned range_shift{0}; for (bool bit : bits) { uint64_t mask = -uint64_t{bit}; num_1 += bit; total += 1; /** How many times have we seen bits equal to bit, in split form. */ auto [self_odd, self_shift] = SplitOdd((mask & num_1) + ((~mask) & (total - num_1))); /** How many bits have we seen in total, in split form (this could be a table lookup, as * total is not secret). */ auto [tot_odd, tot_shift] = SplitOdd(total); // Compute new range values (new_range = range * total / self). uint64_t new_range_num = range_num * tot_odd; uint64_t new_denom = denom * self_odd; unsigned new_range_shift = range_shift + tot_shift - self_shift; // Compute new value numerator (new_value = value + (mask & (new_range - range))). value_num = self_odd * (value_num - (mask & (range_num << range_shift))) + (mask & (new_range_num << new_range_shift)); // Update range. range_num = new_range_num; range_shift = new_range_shift; denom = new_denom; } // Compute real range and value. uint64_t denom_inv = ModInv64(denom); uint64_t range = (range_num * denom_inv) << range_shift; uint64_t value = value_num * denom_inv; // Produce output. return MakeRange(range, value); } /** O(N*M) modular-inverse based multinomial coder, operating on a list of events. * * Complexity: * - As written: 10+4N mul, 3N ctz (N = number of bits). * - If SplitOdd(total) is replaced with an N-sized table lookup: 10+4N mul, 2N ctz. * * Does not leak information about the returned UniformRange::value through timing. * * The returned value contains the exact (range, encoding) when range < 2^64. */ template UniformRange MultinomialCoderModinv(std::span events) { /** How many times we have seen each event. */ unsigned freq[M] = {0}; /** How many events we have seen in total. */ unsigned total{0}; // The range is represented as ((range_num << range_shift) / denom). // The value is represented as (value_num / denom). uint64_t range_num{1}, value_num{0}, denom{1}; unsigned range_shift{0}; for (unsigned event : events) { // Update frequency information. ++total; unsigned add{0}, self{0}; for (unsigned i = 0; i < M; ++i) { unsigned mask_below = -unsigned{i < event}; unsigned mask_self = -unsigned{i == event}; freq[i] += (i == event); add += mask_below & freq[i]; self += mask_self & freq[i]; } /** How many times have we seen events lower than event, in split form. */ auto [add_odd, add_shift] = SplitOdd(add); /** How many times have we seen events equal to event, in split form. */ auto [self_odd, self_shift] = SplitOdd(self); /** How many events have we seen in total, in split form (this could be a table lookup). */ auto [tot_odd, tot_shift] = SplitOdd(total); // Compute new range values (new_range = range * total / self). uint64_t new_range_num = range_num * tot_odd; uint64_t new_denom = denom * self_odd; unsigned new_range_shift = range_shift + tot_shift - self_shift; // Compute new value numerator (new_value = value + add * range / self). value_num = self_odd * value_num + ((add_odd * range_num) << (add_shift + range_shift - self_shift)); // Update range. range_num = new_range_num; range_shift = new_range_shift; denom = new_denom; } // Compute real range and value. uint64_t denom_inv = ModInv64(denom); uint64_t range = (range_num * denom_inv) << range_shift; uint64_t value = value_num * denom_inv; // Produce output. return MakeRange(range, value); } /** Compute a fraction {n,d} such that n * ModInv64(d) = (n choose k) (mod 2^64). * * Complexity: 2k mul, 2k ctz * * Leaks both n and k through timing. */ std::pair BinomialFraction(unsigned n, unsigned k) { if (k > n) return {0, 1}; uint64_t num{1}, denom{1}; unsigned shift{0}; for (unsigned i = 0; i < k; ++i) { // The SplitOdd calls below could be replaced with lookups in an O(k)-sized table. auto [mul_odd, mul_shift] = SplitOdd(n - i); auto [div_odd, div_shift] = SplitOdd(i + 1); num *= mul_odd; denom *= div_odd; shift += mul_shift - div_shift; } return {num << shift, denom}; } /** Compute (n choose k) (mod 2^64) using a precomputed table. * * Complexity: 2 mul. * * Needs a table of size 3n. * * Leaks information about n and k through timing. */ uint64_t BinomialTable(unsigned n, unsigned k) { /** Precomputed table: TABLE[i] = {o, ModInv64(o), s} where (o, s) = SplitOdd(i!). */ static constexpr std::tuple TABLE[100] = { {0x1, 0x1, 0}, {0x1, 0x1, 0}, {0x1, 0x1, 1}, {0x3, 0xaaaaaaaaaaaaaaab, 1}, {0x3, 0xaaaaaaaaaaaaaaab, 3}, {0xf, 0xeeeeeeeeeeeeeeef, 3}, {0x2d, 0x4fa4fa4fa4fa4fa5, 4}, {0x13b, 0x2ff2ff2ff2ff2ff3, 4}, {0x13b, 0x2ff2ff2ff2ff2ff3, 7}, {0xb13, 0x938cc70553e3771b, 7}, {0x375f, 0xb71c27cddd93e49f, 8}, {0x26115, 0xb38e3229fcdee63d, 8}, {0x7233f, 0xe684bb63544a4cbf, 10}, {0x5cca33, 0xc2f684917ca340fb, 10}, {0x2898765, 0xf747c9cba417526d, 11}, {0x260eeeeb, 0xbb26eb51d7bd49c3, 11}, {0x260eeeeb, 0xbb26eb51d7bd49c3, 15}, {0x286fddd9b, 0xb0a7efb985294093, 15}, {0x16beecca73, 0xbe4b8c69f259eabb, 16}, {0x1b02b930689, 0x6854d17ed6dc4fb9, 16}, {0x870d9df20ad, 0xe1aa904c915f4325, 18}, {0xb141df4dae31, 0x3b8206df131cead1, 18}, {0x79dd498567c1b, 0x79c6009fea76fe13, 19}, {0xaf2e19afc5266d, 0xd8c5d381633cd365, 19}, {0x20d8a4d0f4f7347, 0x4841f12b21144677, 22}, {0x335281867ec241ef, 0x4a91ff68200b0d0f, 22}, {0x9b3093d46fdd5923, 0x8f9513a58c4f9e8b, 23}, {0x5e1f9767cc5866b1, 0x2b3e690621a42251, 23}, {0x92dd23d6966aced7, 0x4f520f00e03c04e7, 25}, {0xa30d0f4f0a196e5b, 0x2edf84ee600211d3, 25}, {0x8dc3e5a1977d7755, 0xadcaa2764aaacdfd, 26}, {0x2ab8ce915831734b, 0x161f4f9033f4fe63, 26}, {0x2ab8ce915831734b, 0x161f4f9033f4fe63, 31}, {0x81d2a0bc5e5fdcab, 0xbada2932ea4d3e03, 31}, {0x9efcac82445da75b, 0xcec189f3efaa30d3, 32}, {0xbc8b95cf58cde171, 0xf7475bb68330bf91, 32}, {0xa0e8444a1f3cecf9, 0x37eb7bf7d5b01549, 34}, {0x4191deb683ce3ffd, 0x46b35660a4e91555, 34}, {0xddd3878bc84ebfc7, 0xa567c12d81f151f7, 35}, {0xcb39a64b83ff3751, 0x4c724007bb2071b1, 35}, {0xf8203f7993fc1495, 0xf4a0cce58a016bd, 38}, {0xbd2a2a78b35f4bdd, 0xfa21068e66106475, 38}, {0x84757be6b6d13921, 0x244ab72b5a318ae1, 39}, {0x3fbbcfc0b524988b, 0x366ce67e080d0f23, 39}, {0xbd11ed47c8928df9, 0xd666fdae5dd2a449, 41}, {0x3c26b59e41c2f4c5, 0xd740ddd0acc06a0d, 41}, {0x677a5137e883fdb3, 0xb050bbbb28e6f97b, 42}, {0xff74e943b03b93dd, 0x70b003fe890a5c75, 42}, {0xfe5ebbcb10b2bb97, 0xd03aabff83037427, 46}, {0xb021f1de3235e7e7, 0x13ec4ca72c783bd7, 46}, {0x33509eb2e743a58f, 0x90282c06afdbd96f, 47}, {0x390f9da41279fb7d, 0x4414ddb9db4a95d5, 47}, {0xe5cb0154f031c559, 0xa2c68735ae6832e9, 49}, {0x93074695ba4ddb6d, 0xbf72d71455676665, 49}, {0x81c471caa636247f, 0xa8469fab6b759b7f, 50}, {0xe1347289b5a1d749, 0xc1e55b56e606caf9, 50}, {0x286f21c3f76ce2ff, 0x40455630fc4a1cff, 53}, {0xbe84a2173e8ac7, 0x120a7b0046d16f7, 53}, {0x1595065ca215b88b, 0xa7c3553b08faef23, 54}, {0xf95877595b018809, 0x9f0bfd1b08d48639, 54}, {0x9c2efe3c5516f887, 0xa433ffce9a304d37, 56}, {0x373294604679382b, 0xa22ad1d53915c683, 56}, {0xaf1ff7a888adcd35, 0xcb6cbc723ba5dd1d, 57}, {0x18ddf279a2c5800b, 0x547fb1b8ab9d0ba3, 57}, {0x18ddf279a2c5800b, 0x547fb1b8ab9d0ba3, 63}, {0x505a90e2542582cb, 0x8f15a826498852e3, 63}, {0x5bacad2cd8d5dc2b, 0x32e1a03f38880283, 64}, {0xfe3152bcbff89f41, 0x3de4cce63283f0c1, 64}, {0xe1467e88bf829351, 0x5dfe6667e4da95b1, 66}, {0xb8001adb9e31b4d5, 0xfda6eeeef479e47d, 66}, {0x2803ac06a0cbb91f, 0xf14de991cc7882df, 67}, {0x1904b5d698805799, 0xe68db79247630ca9, 67}, {0xe12a648b5c831461, 0xa7d6db8207ee8fa1, 70}, {0x3516abbd6160cfa9, 0x255e1f0fcf034499, 70}, {0xac46d25f12fe036d, 0xc9a8990e43dd7e65, 71}, {0x78bfa1da906b00ef, 0x3279b6f289702e0f, 71}, {0xf6390338b7f111bd, 0xe7b5905d9b71b195, 73}, {0xf25f80f538255d9, 0x3025ba41ff0da69, 73}, {0x4ec8ca55b8db140f, 0xb7df3d6d3be55aef, 74}, {0x4ff670740b9b30a1, 0xf89b212ebff2b361, 74}, {0x8fd032443a07f325, 0xfe856d095996f0ad, 78}, {0x80dfe7965c83eeb5, 0xd6e533e9fdf20f9d, 78}, {0xa3dc1714d1213afd, 0xf8c0e84a63da3255, 79}, {0x205b7bbfcdc62007, 0xa677876cd91b4db7, 79}, {0xa78126bbe140a093, 0x7ed4f97780d7d9b, 81}, {0x9de1dc61ca7550cf, 0x90a8705f258db62f, 81}, {0x84f0046d01b492c5, 0xa41bbb2be31b1c0d, 82}, {0x2d91810b945de0f3, 0x6ec28690b038383b, 82}, {0xf5408b7f6008aa71, 0xdb860c3bb2edd691, 85}, {0x43707f4863034149, 0x838286838a980f9, 85}, {0xdac65fb9679279d5, 0x558417a74b36f77d, 86}, {0xc48406e7d1114eb7, 0x71779afc3646ef07, 86}, {0xa7dc9ed3c88e1271, 0x743cda377ccb6e91, 88}, {0xfb25b2efdb9cb30d, 0x7fdf9f3fe89153c5, 88}, {0x1bebda0951c4df63, 0xdc97d25df49b9a4b, 89}, {0x5c85e975580ee5bd, 0x76321a778eb37d95, 89}, {0x1591bc60082cb137, 0x7cbb5e27da3bd487, 94}, {0x2c38606318ef25d7, 0x9cff4ade1a009de7, 94}, {0x76ca72f7c5c63e27, 0x70eb166d05c15197, 95}, {0xf04a75d17baa0915, 0xdcf0460b71d5fe3d, 95} }; if (k > n) return 0; auto [n_odd, _n, n_shift] = TABLE[n]; auto [_k, k_inv_odd, k_shift] = TABLE[k]; auto [_nmk, nmk_inv_odd, nmk_shift] = TABLE[n - k]; return (n_odd * k_inv_odd * nmk_inv_odd) << (n_shift - k_shift - nmk_shift); } /** O(K^2) binomial coder, operating on a list of true bit positions. * * Complexity: K(K+3)+13 mul, K(K+3) ctz (K = number of true bits, = positions.size()). - * Leaks information about the returned UniformRange::value through timing. * * The returned value contains the exact (n choose k, encoding) when (n choose k) < 2^64. */ UniformRange BinomialCoderLowK(std::span positions, unsigned count) { uint64_t value_num{0}; uint64_t value_denom{1}; unsigned num_1{0}; // Compute value in fraction form. for (unsigned position : positions) { num_1 += 1; auto [add_num, add_denom] = BinomialFraction(position, num_1); std::tie(value_num, value_denom) = std::pair{value_num * add_denom + add_num * value_denom, value_denom * add_denom}; } // Compute range in fraction form. auto [range_num, range_denom] = BinomialFraction(count, num_1); // Compute actual value and range (using batch inversion). uint64_t inv = ModInv64(value_denom * range_denom); uint64_t range = range_num * inv * value_denom; uint64_t value = value_num * inv * range_denom; // Produce output. return MakeRange(range, value); } /** O(K) binomial coder using an O(N) table, operating on a list of true bit positions. * * Complexity: 2K+2 mul (K = number of true bits, = positions.size()) * * Needs a table of size 3N (N = number of bits, = count). * * Leaks information about the returned UniformRange::value through timing. * * The returned value contains the exact (n choose k, encoding) when (n choose k) < 2^64. */ UniformRange BinomialCoderTable(std::span positions, unsigned count) { uint64_t value{0}; unsigned num_1{0}; // Compute value. for (unsigned position : positions) { num_1 += 1; value += BinomialTable(position, num_1); } // Compute range. uint64_t range = BinomialTable(count, num_1); // Produce output. return MakeRange(range, value); } /** Combine two UniformRange objects. */ UniformRange Merge(const UniformRange& a, const UniformRange& b) { return {a.range * b.range, a.range * b.value + a.value}; } /** Extract a bit from a UniformRange object. Even though this is a variable-time algorithm, the * timing information does not leak any information about the return UniformRange::value, or * the updated a.value. */ UniformRange ExtractBit(UniformRange& a) { if (a.range != 0) { if (a.range & 1) { // If the range is odd, map the last value to nothing, to make the range even. if (a.value == a.range - 1) [[unlikely]] { a = {1, 0}; return {1, 0}; } } a.range >>= 1; } else [[unlikely]] { // Range is 2^64. a.range = uint64_t{1} << 63; } bool bit = a.value & 1; a.value >>= 1; return {2, bit}; } #include #include #include #include int main(void) { // Try all lengths (= number of bits) between 0 and 23. for (size_t len = 0; len < 24; ++len) { std::cerr << "Testing length " << len << "...\n"; // Test binomial coders. std::optional table[25]; // Iterate over all combinations of len bits. for (uint64_t comb = 0; (comb >> len) == 0; ++comb) { // Encode those bits in bit array bits, and position array pos. bool bits[24]; unsigned pos[24]; size_t sum = 0; for (unsigned i = 0; i < len; ++i) { bool bit = (comb >> i) & 1; bits[i] = bit; if (bit) { pos[sum] = i; ++sum; } } // Invoke all the binomial coders. auto ret_simple = BinomialCoderSimple({bits, len}); auto ret_modinv = BinomialCoderModinv({bits, len}); auto ret_lowk = BinomialCoderLowK({pos, sum}, len); auto ret_table = BinomialCoderTable({pos, sum}, len); // Compare them. assert(ret_simple.range == ret_modinv.range); assert(ret_simple.value == ret_modinv.value); assert(ret_simple.range == ret_lowk.range); assert(ret_simple.value == ret_lowk.value); assert(ret_simple.range == ret_table.range); assert(ret_simple.value == ret_table.value); // Sanity check. if (!table[sum]) { // If we have never seen this amount of 1 bits, the value must be 0. assert(ret_simple.value == 0); } else { // If we have seen it, the range must match, and the value must be one higher. assert(ret_simple.range == table[sum]->range); assert(ret_simple.value == table[sum]->value + 1); } // Remember what we saw last for this amount of 1 bits. table[sum] = ret_simple; } for (size_t i = 0; i < 25; ++i) { if (table[i].has_value()) { assert(table[i]->range == table[i]->value + 1); } } // Test multinomial coders. if (len > 10) continue; std::map, UniformRange> mtable; // Iterate over all combinations of up to 10 events. for (uint64_t comb = 0; (comb >> (2 * len)) == 0; ++comb) { // Construct events list and frequency table. unsigned events[10]; std::array freq = {0, 0, 0, 0}; for (unsigned i = 0; i < len; ++i) { unsigned event = (comb >> (2 * i)) & 3; events[i] = event; ++freq[event]; } // Invoke multinomial coders. auto ret_simple = MultinomialCoderSimple<4>({events, len}); auto ret_modinv = MultinomialCoderModinv<4>({events, len}); // Compare them. assert(ret_simple.range == ret_modinv.range); assert(ret_simple.value == ret_modinv.value); // Sanity check. auto it = mtable.find(freq); if (it == mtable.end()) { assert(ret_simple.value == 0); } else { assert(ret_simple.range == it->second.range); assert(ret_simple.value == it->second.value + 1); } mtable[freq] = ret_simple; } for (const auto& [freq, res] : mtable) { assert(res.range == res.value + 1); } } }