barretenberg
Loading...
Searching...
No Matches
field_impl_x64.hpp
1#pragma once
2
3#if (BBERG_NO_ASM == 0)
4#include "./field_impl.hpp"
5#include "asm_macros.hpp"
6namespace barretenberg {
7
8template <class T> field<T> field<T>::asm_mul_with_coarse_reduction(const field& a, const field& b) noexcept
9{
10 field r;
11 constexpr uint64_t r_inv = T::r_inv;
12 constexpr uint64_t modulus_0 = modulus.data[0];
13 constexpr uint64_t modulus_1 = modulus.data[1];
14 constexpr uint64_t modulus_2 = modulus.data[2];
15 constexpr uint64_t modulus_3 = modulus.data[3];
16 constexpr uint64_t zero_ref = 0;
17
27 __asm__(MUL("0(%0)", "8(%0)", "16(%0)", "24(%0)", "%1")
28 STORE_FIELD_ELEMENT("%2", "%%r12", "%%r13", "%%r14", "%%r15")
29 :
30 : "%r"(&a),
31 "%r"(&b),
32 "r"(&r),
33 [modulus_0] "m"(modulus_0),
34 [modulus_1] "m"(modulus_1),
35 [modulus_2] "m"(modulus_2),
36 [modulus_3] "m"(modulus_3),
37 [r_inv] "m"(r_inv),
38 [zero_reference] "m"(zero_ref)
39 : "%rdx", "%rdi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
40 return r;
41}
42
43template <class T> void field<T>::asm_self_mul_with_coarse_reduction(const field& a, const field& b) noexcept
44{
45 constexpr uint64_t r_inv = T::r_inv;
46 constexpr uint64_t modulus_0 = modulus.data[0];
47 constexpr uint64_t modulus_1 = modulus.data[1];
48 constexpr uint64_t modulus_2 = modulus.data[2];
49 constexpr uint64_t modulus_3 = modulus.data[3];
50 constexpr uint64_t zero_ref = 0;
60 __asm__(MUL("0(%0)", "8(%0)", "16(%0)", "24(%0)", "%1")
61 STORE_FIELD_ELEMENT("%0", "%%r12", "%%r13", "%%r14", "%%r15")
62 :
63 : "r"(&a),
64 "r"(&b),
65 [modulus_0] "m"(modulus_0),
66 [modulus_1] "m"(modulus_1),
67 [modulus_2] "m"(modulus_2),
68 [modulus_3] "m"(modulus_3),
69 [r_inv] "m"(r_inv),
70 [zero_reference] "m"(zero_ref)
71 : "%rdx", "%rdi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
72}
73
74template <class T> field<T> field<T>::asm_sqr_with_coarse_reduction(const field& a) noexcept
75{
76 field r;
77 constexpr uint64_t r_inv = T::r_inv;
78 constexpr uint64_t modulus_0 = modulus.data[0];
79 constexpr uint64_t modulus_1 = modulus.data[1];
80 constexpr uint64_t modulus_2 = modulus.data[2];
81 constexpr uint64_t modulus_3 = modulus.data[3];
82 constexpr uint64_t zero_ref = 0;
83
84// Our SQR implementation with BMI2 but without ADX has a bug.
85// The case is extremely rare so fixing it is a bit of a waste of time.
86// We'll use MUL instead.
87#if !defined(__ADX__) || defined(DISABLE_ADX)
97 __asm__(MUL("0(%0)", "8(%0)", "16(%0)", "24(%0)", "%1")
98 STORE_FIELD_ELEMENT("%2", "%%r12", "%%r13", "%%r14", "%%r15")
99 :
100 : "%r"(&a),
101 "%r"(&a),
102 "r"(&r),
103 [modulus_0] "m"(modulus_0),
104 [modulus_1] "m"(modulus_1),
105 [modulus_2] "m"(modulus_2),
106 [modulus_3] "m"(modulus_3),
107 [r_inv] "m"(r_inv),
108 [zero_reference] "m"(zero_ref)
109 : "%rdx", "%rdi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
110
111#else
112
121 __asm__(SQR("%0")
122 // "movq %[r_ptr], %%rsi \n\t"
123 STORE_FIELD_ELEMENT("%1", "%%r12", "%%r13", "%%r14", "%%r15")
124 :
125 : "r"(&a),
126 "r"(&r),
127 [zero_reference] "m"(zero_ref),
128 [modulus_0] "m"(modulus_0),
129 [modulus_1] "m"(modulus_1),
130 [modulus_2] "m"(modulus_2),
131 [modulus_3] "m"(modulus_3),
132 [r_inv] "m"(r_inv)
133 : "%rcx", "%rdx", "%rdi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
134#endif
135 return r;
136}
137
138template <class T> void field<T>::asm_self_sqr_with_coarse_reduction(const field& a) noexcept
139{
140 constexpr uint64_t r_inv = T::r_inv;
141 constexpr uint64_t modulus_0 = modulus.data[0];
142 constexpr uint64_t modulus_1 = modulus.data[1];
143 constexpr uint64_t modulus_2 = modulus.data[2];
144 constexpr uint64_t modulus_3 = modulus.data[3];
145 constexpr uint64_t zero_ref = 0;
146
147// Our SQR implementation with BMI2 but without ADX has a bug.
148// The case is extremely rare so fixing it is a bit of a waste of time.
149// We'll use MUL instead.
150#if !defined(__ADX__) || defined(DISABLE_ADX)
160 __asm__(MUL("0(%0)", "8(%0)", "16(%0)", "24(%0)", "%1")
161 STORE_FIELD_ELEMENT("%0", "%%r12", "%%r13", "%%r14", "%%r15")
162 :
163 : "r"(&a),
164 "r"(&a),
165 [modulus_0] "m"(modulus_0),
166 [modulus_1] "m"(modulus_1),
167 [modulus_2] "m"(modulus_2),
168 [modulus_3] "m"(modulus_3),
169 [r_inv] "m"(r_inv),
170 [zero_reference] "m"(zero_ref)
171 : "%rdx", "%rdi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
172
173#else
182 __asm__(SQR("%0")
183 // "movq %[r_ptr], %%rsi \n\t"
184 STORE_FIELD_ELEMENT("%0", "%%r12", "%%r13", "%%r14", "%%r15")
185 :
186 : "r"(&a),
187 [zero_reference] "m"(zero_ref),
188 [modulus_0] "m"(modulus_0),
189 [modulus_1] "m"(modulus_1),
190 [modulus_2] "m"(modulus_2),
191 [modulus_3] "m"(modulus_3),
192 [r_inv] "m"(r_inv)
193 : "%rcx", "%rdx", "%rdi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
194#endif
195}
196
197template <class T> field<T> field<T>::asm_add_with_coarse_reduction(const field& a, const field& b) noexcept
198{
199 field r;
200
201 constexpr uint64_t twice_not_modulus_0 = twice_not_modulus.data[0];
202 constexpr uint64_t twice_not_modulus_1 = twice_not_modulus.data[1];
203 constexpr uint64_t twice_not_modulus_2 = twice_not_modulus.data[2];
204 constexpr uint64_t twice_not_modulus_3 = twice_not_modulus.data[3];
205
206 __asm__(CLEAR_FLAGS("%%r12") LOAD_FIELD_ELEMENT("%0", "%%r12", "%%r13", "%%r14", "%%r15")
207 ADD_REDUCE("%1",
208 "%[twice_not_modulus_0]",
209 "%[twice_not_modulus_1]",
210 "%[twice_not_modulus_2]",
211 "%[twice_not_modulus_3]") STORE_FIELD_ELEMENT("%2", "%%r12", "%%r13", "%%r14", "%%r15")
212 :
213 : "%r"(&a),
214 "%r"(&b),
215 "r"(&r),
216 [twice_not_modulus_0] "m"(twice_not_modulus_0),
217 [twice_not_modulus_1] "m"(twice_not_modulus_1),
218 [twice_not_modulus_2] "m"(twice_not_modulus_2),
219 [twice_not_modulus_3] "m"(twice_not_modulus_3)
220 : "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
221 return r;
222}
223
224template <class T> void field<T>::asm_self_add_with_coarse_reduction(const field& a, const field& b) noexcept
225{
226 constexpr uint64_t twice_not_modulus_0 = twice_not_modulus.data[0];
227 constexpr uint64_t twice_not_modulus_1 = twice_not_modulus.data[1];
228 constexpr uint64_t twice_not_modulus_2 = twice_not_modulus.data[2];
229 constexpr uint64_t twice_not_modulus_3 = twice_not_modulus.data[3];
230
231 __asm__(CLEAR_FLAGS("%%r12") LOAD_FIELD_ELEMENT("%0", "%%r12", "%%r13", "%%r14", "%%r15")
232 ADD_REDUCE("%1",
233 "%[twice_not_modulus_0]",
234 "%[twice_not_modulus_1]",
235 "%[twice_not_modulus_2]",
236 "%[twice_not_modulus_3]") STORE_FIELD_ELEMENT("%0", "%%r12", "%%r13", "%%r14", "%%r15")
237 :
238 : "r"(&a),
239 "r"(&b),
240 [twice_not_modulus_0] "m"(twice_not_modulus_0),
241 [twice_not_modulus_1] "m"(twice_not_modulus_1),
242 [twice_not_modulus_2] "m"(twice_not_modulus_2),
243 [twice_not_modulus_3] "m"(twice_not_modulus_3)
244 : "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
245}
246
247template <class T> field<T> field<T>::asm_sub_with_coarse_reduction(const field& a, const field& b) noexcept
248{
249 field r;
250
251 constexpr uint64_t twice_modulus_0 = twice_modulus.data[0];
252 constexpr uint64_t twice_modulus_1 = twice_modulus.data[1];
253 constexpr uint64_t twice_modulus_2 = twice_modulus.data[2];
254 constexpr uint64_t twice_modulus_3 = twice_modulus.data[3];
255
256 __asm__(
257 CLEAR_FLAGS("%%r12") LOAD_FIELD_ELEMENT("%0", "%%r12", "%%r13", "%%r14", "%%r15") SUB("%1")
258 REDUCE_FIELD_ELEMENT("%[twice_modulus_0]", "%[twice_modulus_1]", "%[twice_modulus_2]", "%[twice_modulus_3]")
259 STORE_FIELD_ELEMENT("%2", "%%r12", "%%r13", "%%r14", "%%r15")
260 :
261 : "r"(&a),
262 "r"(&b),
263 "r"(&r),
264 [twice_modulus_0] "m"(twice_modulus_0),
265 [twice_modulus_1] "m"(twice_modulus_1),
266 [twice_modulus_2] "m"(twice_modulus_2),
267 [twice_modulus_3] "m"(twice_modulus_3)
268 : "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
269 return r;
270}
271
272template <class T> void field<T>::asm_self_sub_with_coarse_reduction(const field& a, const field& b) noexcept
273{
274 constexpr uint64_t twice_modulus_0 = twice_modulus.data[0];
275 constexpr uint64_t twice_modulus_1 = twice_modulus.data[1];
276 constexpr uint64_t twice_modulus_2 = twice_modulus.data[2];
277 constexpr uint64_t twice_modulus_3 = twice_modulus.data[3];
278
279 __asm__(
280 CLEAR_FLAGS("%%r12") LOAD_FIELD_ELEMENT("%0", "%%r12", "%%r13", "%%r14", "%%r15") SUB("%1")
281 REDUCE_FIELD_ELEMENT("%[twice_modulus_0]", "%[twice_modulus_1]", "%[twice_modulus_2]", "%[twice_modulus_3]")
282 STORE_FIELD_ELEMENT("%0", "%%r12", "%%r13", "%%r14", "%%r15")
283 :
284 : "r"(&a),
285 "r"(&b),
286 [twice_modulus_0] "m"(twice_modulus_0),
287 [twice_modulus_1] "m"(twice_modulus_1),
288 [twice_modulus_2] "m"(twice_modulus_2),
289 [twice_modulus_3] "m"(twice_modulus_3)
290 : "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
291}
292
293template <class T> void field<T>::asm_conditional_negate(field& r, const uint64_t predicate) noexcept
294{
295 constexpr uint64_t twice_modulus_0 = twice_modulus.data[0];
296 constexpr uint64_t twice_modulus_1 = twice_modulus.data[1];
297 constexpr uint64_t twice_modulus_2 = twice_modulus.data[2];
298 constexpr uint64_t twice_modulus_3 = twice_modulus.data[3];
299
300 __asm__(CLEAR_FLAGS("%%r8") LOAD_FIELD_ELEMENT(
301 "%1", "%%r8", "%%r9", "%%r10", "%%r11") "movq %[twice_modulus_0], %%r12 \n\t"
302 "movq %[twice_modulus_1], %%r13 \n\t"
303 "movq %[twice_modulus_2], %%r14 \n\t"
304 "movq %[twice_modulus_3], %%r15 \n\t"
305 "subq %%r8, %%r12 \n\t"
306 "sbbq %%r9, %%r13 \n\t"
307 "sbbq %%r10, %%r14 \n\t"
308 "sbbq %%r11, %%r15 \n\t"
309 "testq %0, %0 \n\t"
310 "cmovnzq %%r12, %%r8 \n\t"
311 "cmovnzq %%r13, %%r9 \n\t"
312 "cmovnzq %%r14, %%r10 \n\t"
313 "cmovnzq %%r15, %%r11 \n\t" STORE_FIELD_ELEMENT(
314 "%1", "%%r8", "%%r9", "%%r10", "%%r11")
315 :
316 : "r"(predicate),
317 "r"(&r),
318 [twice_modulus_0] "i"(twice_modulus_0),
319 [twice_modulus_1] "i"(twice_modulus_1),
320 [twice_modulus_2] "i"(twice_modulus_2),
321 [twice_modulus_3] "i"(twice_modulus_3)
322 : "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
323}
324
325template <class T> field<T> field<T>::asm_reduce_once(const field& a) noexcept
326{
327 field r;
328
329 constexpr uint64_t not_modulus_0 = not_modulus.data[0];
330 constexpr uint64_t not_modulus_1 = not_modulus.data[1];
331 constexpr uint64_t not_modulus_2 = not_modulus.data[2];
332 constexpr uint64_t not_modulus_3 = not_modulus.data[3];
333
334 __asm__(CLEAR_FLAGS("%%r12") LOAD_FIELD_ELEMENT("%0", "%%r12", "%%r13", "%%r14", "%%r15")
335 REDUCE_FIELD_ELEMENT("%[not_modulus_0]", "%[not_modulus_1]", "%[not_modulus_2]", "%[not_modulus_3]")
336 STORE_FIELD_ELEMENT("%1", "%%r12", "%%r13", "%%r14", "%%r15")
337 :
338 : "r"(&a),
339 "r"(&r),
340 [not_modulus_0] "m"(not_modulus_0),
341 [not_modulus_1] "m"(not_modulus_1),
342 [not_modulus_2] "m"(not_modulus_2),
343 [not_modulus_3] "m"(not_modulus_3)
344 : "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
345 return r;
346}
347
348template <class T> void field<T>::asm_self_reduce_once(const field& a) noexcept
349{
350 constexpr uint64_t not_modulus_0 = not_modulus.data[0];
351 constexpr uint64_t not_modulus_1 = not_modulus.data[1];
352 constexpr uint64_t not_modulus_2 = not_modulus.data[2];
353 constexpr uint64_t not_modulus_3 = not_modulus.data[3];
354
355 __asm__(CLEAR_FLAGS("%%r12") LOAD_FIELD_ELEMENT("%0", "%%r12", "%%r13", "%%r14", "%%r15")
356 REDUCE_FIELD_ELEMENT("%[not_modulus_0]", "%[not_modulus_1]", "%[not_modulus_2]", "%[not_modulus_3]")
357 STORE_FIELD_ELEMENT("%0", "%%r12", "%%r13", "%%r14", "%%r15")
358 :
359 : "r"(&a),
360 [not_modulus_0] "m"(not_modulus_0),
361 [not_modulus_1] "m"(not_modulus_1),
362 [not_modulus_2] "m"(not_modulus_2),
363 [not_modulus_3] "m"(not_modulus_3)
364 : "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory");
365}
366} // namespace barretenberg
367#endif
constexpr_utils defines some helper methods that perform some stl-equivalent operations but in a cons...
Definition: constexpr_utils.hpp:16