barretenberg
Loading...
Searching...
No Matches
permutation_library.hpp
1#pragma once
2#include "barretenberg/common/ref_vector.hpp"
3#include "barretenberg/plonk/proof_system/proving_key/proving_key.hpp"
4#include "barretenberg/polynomials/polynomial.hpp"
5#include <typeinfo>
6
7namespace proof_system::honk::permutation_library {
8
45template <typename Flavor, typename GrandProdRelation>
46void compute_permutation_grand_product(const size_t circuit_size,
47 auto& full_polynomials,
49{
50 using FF = typename Flavor::FF;
51 using Polynomial = typename Flavor::Polynomial;
52 using Accumulator = std::tuple_element_t<0, typename GrandProdRelation::SumcheckArrayOfValuesOverSubrelations>;
53
54 // Allocate numerator/denominator polynomials that will serve as scratch space
55 // TODO(zac) we can re-use the permutation polynomial as the numerator polynomial.
56 // Reduces readability (issue #2215)
57 Polynomial numerator = Polynomial{ circuit_size };
58 Polynomial denominator = Polynomial{ circuit_size };
59
60 // Step (1)
61 // Populate `numerator` and `denominator` with the algebra described by GrandProdRelation
62 static constexpr size_t MIN_CIRCUIT_SIZE_TO_MULTITHREAD = 64;
63 const size_t num_threads = circuit_size >= MIN_CIRCUIT_SIZE_TO_MULTITHREAD
64 ? (circuit_size >= get_num_cpus_pow2() ? get_num_cpus_pow2() : 1)
65 : 1;
66 const size_t block_size = circuit_size / num_threads;
67 parallel_for(num_threads, [&](size_t thread_idx) {
68 const size_t start = thread_idx * block_size;
69 const size_t end = (thread_idx + 1) * block_size;
70 for (size_t i = start; i < end; ++i) {
71
72 typename Flavor::AllValues evaluations;
73 for (auto [eval, poly] : zip_view(evaluations.get_all(), full_polynomials.get_all())) {
74 eval = poly.size() > i ? poly[i] : 0;
75 }
76 numerator[i] = GrandProdRelation::template compute_permutation_numerator<Accumulator>(evaluations,
77 relation_parameters);
78 denominator[i] = GrandProdRelation::template compute_permutation_denominator<Accumulator>(
79 evaluations, relation_parameters);
80 }
81 });
82
83 // Step (2)
84 // Compute the accumulating product of the numerator and denominator terms.
85 // This step is split into three parts for efficient multithreading:
86 // (i) compute ∏ A(j), ∏ B(j) subproducts for each thread
87 // (ii) compute scaling factor required to convert each subproduct into a single running product
88 // (ii) combine subproducts into a single running product
89 //
90 // For example, consider 4 threads and a size-8 numerator { a0, a1, a2, a3, a4, a5, a6, a7 }
91 // (i) Each thread computes 1 element of N = {{ a0, a0a1 }, { a2, a2a3 }, { a4, a4a5 }, { a6, a6a7 }}
92 // (ii) Take partial products P = { 1, a0a1, a2a3, a4a5 }
93 // (iii) Each thread j computes N[i][j]*P[j]=
94 // {{a0,a0a1},{a0a1a2,a0a1a2a3},{a0a1a2a3a4,a0a1a2a3a4a5},{a0a1a2a3a4a5a6,a0a1a2a3a4a5a6a7}}
95 std::vector<FF> partial_numerators(num_threads);
96 std::vector<FF> partial_denominators(num_threads);
97
98 parallel_for(num_threads, [&](size_t thread_idx) {
99 const size_t start = thread_idx * block_size;
100 const size_t end = (thread_idx + 1) * block_size;
101 for (size_t i = start; i < end - 1; ++i) {
102 numerator[i + 1] *= numerator[i];
103 denominator[i + 1] *= denominator[i];
104 }
105 partial_numerators[thread_idx] = numerator[end - 1];
106 partial_denominators[thread_idx] = denominator[end - 1];
107 });
108
109 parallel_for(num_threads, [&](size_t thread_idx) {
110 const size_t start = thread_idx * block_size;
111 const size_t end = (thread_idx + 1) * block_size;
112 if (thread_idx > 0) {
113 FF numerator_scaling = 1;
114 FF denominator_scaling = 1;
115
116 for (size_t j = 0; j < thread_idx; ++j) {
117 numerator_scaling *= partial_numerators[j];
118 denominator_scaling *= partial_denominators[j];
119 }
120 for (size_t i = start; i < end; ++i) {
121 numerator[i] *= numerator_scaling;
122 denominator[i] *= denominator_scaling;
123 }
124 }
125
126 // Final step: invert denominator
127 FF::batch_invert(std::span{ &denominator[start], block_size });
128 });
129
130 // Step (3) Compute z_perm[i] = numerator[i] / denominator[i]
131 auto& grand_product_polynomial = GrandProdRelation::get_grand_product_polynomial(full_polynomials);
132 grand_product_polynomial[0] = 0;
133 parallel_for(num_threads, [&](size_t thread_idx) {
134 const size_t start = thread_idx * block_size;
135 const size_t end = (thread_idx == num_threads - 1) ? circuit_size - 1 : (thread_idx + 1) * block_size;
136 for (size_t i = start; i < end; ++i) {
137 grand_product_polynomial[i + 1] = numerator[i] * denominator[i];
138 }
139 });
140}
141
142template <typename Flavor>
143void compute_permutation_grand_products(std::shared_ptr<typename Flavor::ProvingKey>& key,
144 typename Flavor::ProverPolynomials& full_polynomials,
146{
147 using GrandProductRelations = typename Flavor::GrandProductRelations;
148 using FF = typename Flavor::FF;
149
150 constexpr size_t NUM_RELATIONS = std::tuple_size<GrandProductRelations>{};
151 barretenberg::constexpr_for<0, NUM_RELATIONS, 1>([&]<size_t i>() {
152 using PermutationRelation = typename std::tuple_element<i, GrandProductRelations>::type;
153
154 // Assign the grand product polynomial to the relevant std::span member of `full_polynomials` (and its shift)
155 // For example, for UltraPermutationRelation, this will be `full_polynomials.z_perm`
156 // For example, for LookupRelation, this will be `full_polynomials.z_lookup`
157 barretenberg::Polynomial<FF>& full_polynomial =
158 PermutationRelation::get_grand_product_polynomial(full_polynomials);
159 barretenberg::Polynomial<FF>& key_polynomial = PermutationRelation::get_grand_product_polynomial(*key);
160 full_polynomial = key_polynomial.share();
161
162 compute_permutation_grand_product<Flavor, PermutationRelation>(
163 key->circuit_size, full_polynomials, relation_parameters);
164 barretenberg::Polynomial<FF>& full_polynomial_shift =
165 PermutationRelation::get_shifted_grand_product_polynomial(full_polynomials);
166 full_polynomial_shift = key_polynomial.shifted();
167 });
168}
169
187template <typename Flavor, typename StorageHandle> void compute_concatenated_polynomials(StorageHandle* proving_key)
188{
189 // Concatenation groups are vectors of polynomials that are concatenated together
190 auto concatenation_groups = proving_key->get_concatenation_groups();
191
192 // Resulting concatenated polynomials
193 auto targets = proving_key->get_concatenated_constraints();
194
195 // A function that produces 1 concatenated polynomial
196 // TODO(#756): This can be rewritten to use more cores. Currently uses at maximum the number of concatenated
197 // polynomials (4 in Goblin Translator)
198 auto ordering_function = [&](size_t i) {
199 auto my_group = concatenation_groups[i];
200 auto& current_target = targets[i];
201
202 // For each polynomial in group
203 for (size_t j = 0; j < my_group.size(); j++) {
204 auto starting_write_offset = current_target.begin();
205 auto finishing_read_offset = my_group[j].begin();
206 std::advance(starting_write_offset, j * Flavor::MINI_CIRCUIT_SIZE);
207 std::advance(finishing_read_offset, Flavor::MINI_CIRCUIT_SIZE);
208 // Copy into appropriate position in the concatenated polynomial
209 std::copy(my_group[j].begin(), finishing_read_offset, starting_write_offset);
210 }
211 };
212 parallel_for(concatenation_groups.size(), ordering_function);
213}
214
237template <typename Flavor, typename StorageHandle>
238void compute_goblin_translator_range_constraint_ordered_polynomials(StorageHandle* proving_key)
239{
240
241 using FF = typename Flavor::FF;
242
243 // Get constants
244 constexpr auto sort_step = Flavor::SORT_STEP;
245 constexpr auto num_concatenated_wires = Flavor::NUM_CONCATENATED_WIRES;
246 constexpr auto full_circuit_size = Flavor::FULL_CIRCUIT_SIZE;
247 constexpr auto mini_circuit_size = Flavor::MINI_CIRCUIT_SIZE;
248
249 // The value we have to end polynomials with
250 constexpr uint32_t max_value = (1 << Flavor::MICRO_LIMB_BITS) - 1;
251
252 // Number of elements needed to go from 0 to MAX_VALUE with our step
253 constexpr size_t sorted_elements_count = (max_value / sort_step) + 1 + (max_value % sort_step == 0 ? 0 : 1);
254
255 // Check if we can construct these polynomials
256 static_assert((num_concatenated_wires + 1) * sorted_elements_count < full_circuit_size);
257
258 // First use integers (easier to sort)
259 std::vector<size_t> sorted_elements(sorted_elements_count);
260
261 // Fill with necessary steps
262 sorted_elements[0] = max_value;
263 for (size_t i = 1; i < sorted_elements_count; i++) {
264 sorted_elements[i] = (sorted_elements_count - 1 - i) * sort_step;
265 }
266
267 std::vector<std::vector<uint32_t>> ordered_vectors_uint(num_concatenated_wires);
268 auto ordered_constraint_polynomials = std::vector{ &proving_key->ordered_range_constraints_0,
269 &proving_key->ordered_range_constraints_1,
270 &proving_key->ordered_range_constraints_2,
271 &proving_key->ordered_range_constraints_3 };
272 std::vector<size_t> extra_denominator_uint(full_circuit_size);
273
274 // Get information which polynomials need to be concatenated
275 auto concatenation_groups = proving_key->get_concatenation_groups();
276
277 // A function that transfers elements from each of the polynomials in the chosen concatenation group in the uint
278 // ordered polynomials
279 auto ordering_function = [&](size_t i) {
280 // Get the group and the main target vector
281 auto my_group = concatenation_groups[i];
282 auto& current_vector = ordered_vectors_uint[i];
283 current_vector.resize(Flavor::FULL_CIRCUIT_SIZE);
284
285 // Calculate how much space there is for values from the original polynomials
286 auto free_space_before_runway = full_circuit_size - sorted_elements_count;
287
288 // Calculate the offset of this group's overflowing elements in the extra denominator polynomial
289 size_t extra_denominator_offset = i * sorted_elements_count;
290
291 // Go through each polynomial in the concatenation group
292 for (size_t j = 0; j < Flavor::CONCATENATION_INDEX; j++) {
293
294 // Calculate the offset in the target vector
295 auto current_offset = j * mini_circuit_size;
296 // For each element in the polynomial
297 for (size_t k = 0; k < mini_circuit_size; k++) {
298
299 // Put it it the target polynomial
300 if ((current_offset + k) < free_space_before_runway) {
301 current_vector[current_offset + k] = static_cast<uint32_t>(uint256_t(my_group[j][k]).data[0]);
302
303 // Or in the extra one if there is no space left
304 } else {
305 extra_denominator_uint[extra_denominator_offset] =
306 static_cast<uint32_t>(uint256_t(my_group[j][k]).data[0]);
307 extra_denominator_offset++;
308 }
309 }
310 }
311 // Copy the steps into the target polynomial
312 auto starting_write_offset = current_vector.begin();
313 std::advance(starting_write_offset, free_space_before_runway);
314 std::copy(sorted_elements.cbegin(), sorted_elements.cend(), starting_write_offset);
315
316 // Sort the polynomial in nondescending order. We sort using vector with size_t elements for 2 reasons:
317 // 1. It is faster to sort size_t
318 // 2. Comparison operators for finite fields are operating on internal form, so we'd have to convert them from
319 // Montgomery
320 std::sort(current_vector.begin(), current_vector.end());
321
322 // Copy the values into the actual polynomial
323 std::transform(current_vector.cbegin(),
324 current_vector.cend(),
325 (*ordered_constraint_polynomials[i]).begin(),
326 [](uint32_t in) { return FF(in); });
327 };
328
329 // Construct the first 4 polynomials
330 parallel_for(num_concatenated_wires, ordering_function);
331 ordered_vectors_uint.clear();
332
333 auto sorted_element_insertion_offset = extra_denominator_uint.begin();
334 std::advance(sorted_element_insertion_offset, num_concatenated_wires * sorted_elements_count);
335
336 // Add steps to the extra denominator polynomial
337 std::copy(sorted_elements.cbegin(), sorted_elements.cend(), sorted_element_insertion_offset);
338
339 // Sort it
340#ifdef NO_TBB
341 std::sort(extra_denominator_uint.begin(), extra_denominator_uint.end());
342#else
343 std::sort(std::execution::par_unseq, extra_denominator_uint.begin(), extra_denominator.end());
344#endif
345
346 // And copy it to the actual polynomial
347 std::transform(extra_denominator_uint.cbegin(),
348 extra_denominator_uint.cend(),
349 proving_key->ordered_range_constraints_4.begin(),
350 [](uint32_t in) { return FF(in); });
351}
352
368template <typename Flavor> inline void compute_extra_range_constraint_numerator(auto proving_key)
369{
370
371 // Get the full goblin circuits size (this is the length of concatenated range constraint polynomials)
372 auto full_circuit_size = Flavor::FULL_CIRCUIT_SIZE;
373 auto sort_step = Flavor::SORT_STEP;
374 auto num_concatenated_wires = Flavor::NUM_CONCATENATED_WIRES;
375
376 auto& extra_range_constraint_numerator = proving_key->ordered_extra_range_constraints_numerator;
377
378 uint32_t MAX_VALUE = (1 << Flavor::MICRO_LIMB_BITS) - 1;
379
380 // Calculate how many elements there are in the sequence MAX_VALUE, MAX_VALUE - 3,...,0
381 size_t sorted_elements_count = (MAX_VALUE / sort_step) + 1 + (MAX_VALUE % sort_step == 0 ? 0 : 1);
382
383 // Check that we can fit every element in the polynomial
384 ASSERT((num_concatenated_wires + 1) * sorted_elements_count < full_circuit_size);
385
386 std::vector<size_t> sorted_elements(sorted_elements_count);
387
388 // Calculate the sequence in integers
389 sorted_elements[0] = MAX_VALUE;
390 for (size_t i = 1; i < sorted_elements_count; i++) {
391 sorted_elements[i] = (sorted_elements_count - 1 - i) * sort_step;
392 }
393
394 // TODO(#756): can be parallelized further. This will use at most 5 threads
395 auto fill_with_shift = [&](size_t shift) {
396 for (size_t i = 0; i < sorted_elements_count; i++) {
397 extra_range_constraint_numerator[shift + i * (num_concatenated_wires + 1)] = sorted_elements[i];
398 }
399 };
400 // Fill polynomials with a sequence, where each element is repeated num_concatenated_wires+1 times
401 parallel_for(num_concatenated_wires + 1, fill_with_shift);
402}
403
409template <typename Flavor> inline void compute_lagrange_polynomials_for_goblin_translator(auto proving_key)
410
411{
412 const size_t n = proving_key->circuit_size;
413 typename Flavor::Polynomial lagrange_polynomial_odd_in_minicircuit(n);
414 typename Flavor::Polynomial lagrange_polynomial_even_in_minicircut(n);
415 typename Flavor::Polynomial lagrange_polynomial_second(n);
416 typename Flavor::Polynomial lagrange_polynomial_second_to_last_in_minicircuit(n);
417
418 for (size_t i = 1; i < Flavor::MINI_CIRCUIT_SIZE - 1; i += 2) {
419 lagrange_polynomial_odd_in_minicircuit[i] = 1;
420 lagrange_polynomial_even_in_minicircut[i + 1] = 1;
421 }
422 proving_key->lagrange_odd_in_minicircuit = lagrange_polynomial_odd_in_minicircuit.share();
423
424 proving_key->lagrange_even_in_minicircuit = lagrange_polynomial_even_in_minicircut.share();
425 lagrange_polynomial_second[1] = 1;
426 lagrange_polynomial_second_to_last_in_minicircuit[Flavor::MINI_CIRCUIT_SIZE - 2] = 1;
427 proving_key->lagrange_second_to_last_in_minicircuit = lagrange_polynomial_second_to_last_in_minicircuit.share();
428 proving_key->lagrange_second = lagrange_polynomial_second.share();
429}
430
431} // namespace proof_system::honk::permutation_library
Definition: polynomial.hpp:12
Polynomial shifted() const
Returns an std::span of the left-shift of self.
Definition: polynomial.cpp:323
Polynomial share() const
Definition: polynomial.cpp:141
Definition: uint256.hpp:25
A container for the prover polynomials handles.
Definition: AvmMini_flavor.hpp:263
Definition: zip_view.hpp:159
Container for parameters used by the grand product (permutation, lookup) Honk relations.
Definition: relation_parameters.hpp:12