7#define CLEAR_FLAGS(empty_reg) \
8 "xorq " empty_reg ", " empty_reg " \n\t"
14#define LOAD_FIELD_ELEMENT(a, lolo, lohi, hilo, hihi) \
15 "movq 0(" a "), " lolo " \n\t" \
16 "movq 8(" a "), " lohi " \n\t" \
17 "movq 16(" a "), " hilo " \n\t" \
18 "movq 24(" a "), " hihi " \n\t"
25#define STORE_FIELD_ELEMENT(r, lolo, lohi, hilo, hihi) \
26 "movq " lolo ", 0(" r ") \n\t" \
27 "movq " lohi ", 8(" r ") \n\t" \
28 "movq " hilo ", 16(" r ") \n\t" \
29 "movq " hihi ", 24(" r ") \n\t"
31#if !defined(__ADX__) || defined(DISABLE_ADX)
37 "addq 0(" b "), %%r12 \n\t" \
38 "adcq 8(" b "), %%r13 \n\t" \
39 "adcq 16(" b "), %%r14 \n\t" \
40 "adcq 24(" b "), %%r15 \n\t"
47 "subq 0(" b "), %%r12 \n\t" \
48 "sbbq 8(" b "), %%r13 \n\t" \
49 "sbbq 16(" b "), %%r14 \n\t" \
50 "sbbq 24(" b "), %%r15 \n\t"
57#define ADD_REDUCE(b, modulus_0, modulus_1, modulus_2, modulus_3) \
58 "addq 0(" b "), %%r12 \n\t" \
59 "adcq 8(" b "), %%r13 \n\t" \
60 "adcq 16(" b "), %%r14 \n\t" \
61 "adcq 24(" b "), %%r15 \n\t" \
62 "movq %%r12, %%r8 \n\t" \
63 "movq %%r13, %%r9 \n\t" \
64 "movq %%r14, %%r10 \n\t" \
65 "movq %%r15, %%r11 \n\t" \
66 "addq " modulus_0 ", %%r12 \n\t" \
67 "adcq " modulus_1 ", %%r13 \n\t" \
68 "adcq " modulus_2 ", %%r14 \n\t" \
69 "adcq " modulus_3 ", %%r15 \n\t" \
70 "cmovncq %%r8, %%r12 \n\t" \
71 "cmovncq %%r9, %%r13 \n\t" \
72 "cmovncq %%r10, %%r14 \n\t" \
73 "cmovncq %%r11, %%r15 \n\t"
81#define REDUCE_FIELD_ELEMENT(neg_modulus_0, neg_modulus_1, neg_modulus_2, neg_modulus_3) \
83 "movq %%r12, %%r8 \n\t" \
84 "movq %%r13, %%r9 \n\t" \
85 "movq %%r14, %%r10 \n\t" \
86 "movq %%r15, %%r11 \n\t" \
87 "addq " neg_modulus_0 ", %%r12 \n\t"
\
88 "adcq " neg_modulus_1 ", %%r13 \n\t"
\
89 "adcq " neg_modulus_2 ", %%r14 \n\t"
\
90 "adcq " neg_modulus_3 ", %%r15 \n\t"
\
94 "cmovncq %%r8, %%r12 \n\t" \
95 "cmovncq %%r9, %%r13 \n\t" \
96 "cmovncq %%r10, %%r14 \n\t" \
97 "cmovncq %%r11, %%r15 \n\t"
104 "movq 0(" a "), %%rdx \n\t"
\
106 "xorq %%r8, %%r8 \n\t" \
108 "mulxq 8(" a "), %%r9, %%r10 \n\t"
\
109 "mulxq 16(" a "), %%r8, %%r15 \n\t"
\
110 "mulxq 24(" a "), %%r11, %%r12 \n\t"
\
114 "addq %%r8, %%r10 \n\t" \
115 "adcq %%r15, %%r11 \n\t" \
116 "movq 8(" a "), %%rdx \n\t"
\
117 "mulxq 16(" a "), %%r8, %%r15 \n\t"
\
118 "mulxq 24(" a "), %%rdi, %%rcx \n\t"
\
119 "movq 24(" a "), %%rdx \n\t"
\
120 "mulxq 16(" a "), %%r13, %%r14 \n\t"
\
121 "adcq %%rdi, %%r12 \n\t" \
122 "adcq %%rcx, %%r13 \n\t" \
123 "adcq $0, %%r14 \n\t" \
124 "addq %%r8, %%r11 \n\t" \
125 "adcq %%r15, %%r12 \n\t" \
126 "adcq $0, %%r13 \n\t" \
129 "addq %%r9, %%r9 \n\t" \
130 "adcq %%r10, %%r10 \n\t" \
131 "adcq %%r11, %%r11 \n\t" \
132 "adcq %%r12, %%r12 \n\t" \
133 "adcq %%r13, %%r13 \n\t" \
134 "adcq %%r14, %%r14 \n\t" \
137 "movq 0(" a "), %%rdx \n\t"
\
138 "mulxq %%rdx, %%r8, %%rcx \n\t" \
139 "movq 16(" a "), %%rdx \n\t"
\
140 "mulxq %%rdx, %%rdx, %%rdi \n\t" \
142 "addq %%rdx, %%r12 \n\t" \
143 "adcq %%rdi, %%r13 \n\t" \
144 "adcq $0, %%r14 \n\t" \
145 "addq %%rcx, %%r9 \n\t" \
146 "movq 24(" a "), %%rdx \n\t"
\
147 "mulxq %%rdx, %%rcx, %%r15 \n\t" \
148 "movq 8(" a "), %%rdx \n\t"
\
149 "mulxq %%rdx, %%rdi, %%rdx \n\t" \
150 "adcq %%rdi, %%r10 \n\t" \
151 "adcq %%rdx, %%r11 \n\t" \
152 "adcq $0, %%r12 \n\t" \
153 "addq %%rcx, %%r14 \n\t" \
154 "adcq $0, %%r15 \n\t" \
157 "movq %%r8, %%rdx \n\t" \
158 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
159 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
160 "addq %%rdi, %%r8 \n\t" \
161 "adcq %%rcx, %%r9 \n\t" \
162 "mulxq %[modulus_1], %%rdi, %%rcx \n\t" \
163 "adcq %%rcx, %%r10 \n\t" \
164 "adcq $0, %%r11 \n\t" \
166 "addq %%rdi, %%r9 \n\t" \
167 "mulxq %[modulus_2], %%rdi, %%rcx \n\t" \
168 "mulxq %[modulus_3], %%r8, %%rdx \n\t" \
169 "adcq %%rdi, %%r10 \n\t" \
170 "adcq %%rcx, %%r11 \n\t" \
171 "adcq %%rdx, %%r12 \n\t" \
172 "adcq $0, %%r13 \n\t" \
173 "addq %%r8, %%r11 \n\t" \
174 "adcq $0, %%r12 \n\t" \
177 "movq %%r9, %%rdx \n\t" \
178 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
179 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
180 "addq %%rdi, %%r9 \n\t" \
181 "adcq %%rcx, %%r10 \n\t" \
182 "mulxq %[modulus_1], %%rdi, %%rcx \n\t" \
183 "adcq %%rcx, %%r11 \n\t" \
184 "adcq $0, %%r12 \n\t" \
185 "addq %%rdi, %%r10 \n\t" \
186 "mulxq %[modulus_2], %%rdi, %%rcx \n\t" \
187 "mulxq %[modulus_3], %%r8, %%r9 \n\t" \
188 "adcq %%rdi, %%r11 \n\t" \
189 "adcq %%rcx, %%r12 \n\t" \
190 "adcq %%r9, %%r13 \n\t" \
191 "adcq $0, %%r14 \n\t" \
192 "addq %%r8, %%r12 \n\t" \
193 "adcq $0, %%r13 \n\t" \
196 "movq %%r10, %%rdx \n\t" \
197 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
198 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
199 "addq %%rdi, %%r10 \n\t" \
200 "adcq %%rcx, %%r11 \n\t" \
201 "mulxq %[modulus_1], %%rdi, %%rcx \n\t" \
202 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
203 "mulxq %[modulus_3], %%r10, %%rdx \n\t" \
204 "adcq %%rcx, %%r12 \n\t" \
205 "adcq %%r9, %%r13 \n\t" \
206 "adcq %%rdx, %%r14 \n\t" \
207 "adcq $0, %%r15 \n\t" \
208 "addq %%rdi, %%r11 \n\t" \
209 "adcq %%r8, %%r12 \n\t" \
210 "adcq %%r10, %%r13 \n\t" \
211 "adcq $0, %%r14 \n\t" \
214 "movq %%r11, %%rdx \n\t" \
215 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
216 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
217 "mulxq %[modulus_1], %%r8, %%r9 \n\t" \
218 "addq %%rdi, %%r11 \n\t" \
219 "adcq %%r8, %%r12 \n\t" \
220 "adcq %%r9, %%r13 \n\t" \
221 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
222 "mulxq %[modulus_3], %%r10, %%r11 \n\t" \
223 "adcq %%r9, %%r14 \n\t" \
224 "adcq %%r11, %%r15 \n\t" \
225 "addq %%rcx, %%r12 \n\t" \
226 "adcq %%r8, %%r13 \n\t" \
227 "adcq %%r10, %%r14 \n\t" \
228 "adcq $0, %%r15 \n\t"
235#define MUL(a1, a2, a3, a4, b) \
236 "movq " a1 ", %%rdx \n\t"
\
237 "xorq %%r8, %%r8 \n\t" \
239 "mulxq 8(" b "), %%r8, %%r9 \n\t"
\
240 "mulxq 24(" b "), %%rdi, %%r12 \n\t"
\
241 "mulxq 0(" b "), %%r13, %%r14 \n\t"
\
242 "mulxq 16(" b "), %%r15, %%r10 \n\t"
\
246 "movq %%r13, %%rdx \n\t" \
247 "mulxq %[r_inv], %%rdx, %%r11 \n\t" \
250 "addq %%r8, %%r14 \n\t" \
251 "adcq %%r9, %%r15 \n\t" \
252 "adcq %%rdi, %%r10 \n\t" \
253 "adcq $0, %%r12 \n\t" \
256 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
257 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
258 "addq %%r8, %%r13 \n\t" \
259 "adcq %%rdi, %%r14 \n\t" \
260 "adcq %%r11, %%r15 \n\t" \
261 "adcq $0, %%r10 \n\t" \
262 "adcq $0, %%r12 \n\t" \
263 "addq %%r9, %%r14 \n\t" \
264 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
265 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
266 "adcq %%r8, %%r15 \n\t" \
267 "adcq %%rdi, %%r10 \n\t" \
268 "adcq %%r11, %%r12 \n\t" \
269 "addq %%r9, %%r10 \n\t" \
270 "adcq $0, %%r12 \n\t" \
279 "movq " a2 ", %%rdx \n\t"
\
280 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
281 "mulxq 8(" b "), %%rdi, %%r11 \n\t"
\
282 "addq %%r8, %%r14 \n\t" \
283 "adcq %%rdi, %%r15 \n\t" \
284 "adcq %%r11, %%r10 \n\t" \
285 "adcq $0, %%r12 \n\t" \
286 "addq %%r9, %%r15 \n\t" \
288 "mulxq 16(" b "), %%r8, %%r9 \n\t"
\
289 "mulxq 24(" b "), %%rdi, %%r13 \n\t"
\
290 "adcq %%r8, %%r10 \n\t" \
291 "adcq %%rdi, %%r12 \n\t" \
292 "adcq $0, %%r13 \n\t" \
293 "addq %%r9, %%r12 \n\t" \
294 "adcq $0, %%r13 \n\t" \
297 "movq %%r14, %%rdx \n\t" \
298 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
299 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
300 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
301 "addq %%r8, %%r14 \n\t" \
302 "adcq %%rdi, %%r15 \n\t" \
303 "adcq %%r11, %%r10 \n\t" \
304 "adcq $0, %%r12 \n\t" \
305 "adcq $0, %%r13 \n\t" \
306 "addq %%r9, %%r15 \n\t" \
307 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
308 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
309 "adcq %%r8, %%r10 \n\t" \
310 "adcq %%r9, %%r12 \n\t" \
311 "adcq %%r11, %%r13 \n\t" \
312 "addq %%rdi, %%r12 \n\t" \
313 "adcq $0, %%r13 \n\t" \
316 "movq " a3 ", %%rdx \n\t"
\
317 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
318 "mulxq 8(" b "), %%rdi, %%r11 \n\t"
\
319 "addq %%r8, %%r15 \n\t" \
320 "adcq %%r9, %%r10 \n\t" \
321 "adcq %%r11, %%r12 \n\t" \
322 "adcq $0, %%r13 \n\t" \
323 "addq %%rdi, %%r10 \n\t" \
324 "mulxq 16(" b "), %%r8, %%r9 \n\t"
\
325 "mulxq 24(" b "), %%rdi, %%r14 \n\t"
\
326 "adcq %%r8, %%r12 \n\t" \
327 "adcq %%r9, %%r13 \n\t" \
328 "adcq $0, %%r14 \n\t" \
329 "addq %%rdi, %%r13 \n\t" \
330 "adcq $0, %%r14 \n\t" \
333 "movq %%r15, %%rdx \n\t" \
334 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
335 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
336 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
337 "addq %%r8, %%r15 \n\t" \
338 "adcq %%r9, %%r10 \n\t" \
339 "adcq %%r11, %%r12 \n\t" \
340 "adcq $0, %%r13 \n\t" \
341 "adcq $0, %%r14 \n\t" \
342 "addq %%rdi, %%r10 \n\t" \
343 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
344 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
345 "adcq %%r8, %%r12 \n\t" \
346 "adcq %%r9, %%r13 \n\t" \
347 "adcq %%r11, %%r14 \n\t" \
348 "addq %%rdi, %%r13 \n\t" \
349 "adcq $0, %%r14 \n\t" \
352 "movq " a4 ", %%rdx \n\t"
\
353 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
354 "mulxq 8(" b "), %%rdi, %%r11 \n\t"
\
355 "addq %%r8, %%r10 \n\t" \
356 "adcq %%r9, %%r12 \n\t" \
357 "adcq %%r11, %%r13 \n\t" \
358 "adcq $0, %%r14 \n\t" \
359 "addq %%rdi, %%r12 \n\t" \
361 "mulxq 16(" b "), %%r8, %%r9 \n\t"
\
362 "mulxq 24(" b "), %%rdi, %%r15 \n\t"
\
363 "adcq %%r8, %%r13 \n\t" \
364 "adcq %%r9, %%r14 \n\t" \
365 "adcq $0, %%r15 \n\t" \
366 "addq %%rdi, %%r14 \n\t" \
367 "adcq $0, %%r15 \n\t" \
370 "movq %%r10, %%rdx \n\t" \
371 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
372 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
373 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
374 "addq %%r8, %%r10 \n\t" \
375 "adcq %%r9, %%r12 \n\t" \
376 "adcq %%r11, %%r13 \n\t" \
377 "adcq $0, %%r14 \n\t" \
378 "adcq $0, %%r15 \n\t" \
379 "addq %%rdi, %%r12 \n\t" \
381 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
382 "mulxq %[modulus_3], %%rdi, %%rdx \n\t" \
383 "adcq %%r8, %%r13 \n\t" \
384 "adcq %%r9, %%r14 \n\t" \
385 "adcq %%rdx, %%r15 \n\t" \
386 "addq %%rdi, %%r14 \n\t" \
387 "adcq $0, %%r15 \n\t"
394#define MUL_256(a, b, r) \
395 "movq 0(" a "), %%rdx \n\t"
\
398 "mulxq 8(" b "), %%r8, %%r9 \n\t"
\
399 "mulxq 24(" b "), %%rdi, %%r12 \n\t"
\
400 "mulxq 0(" b "), %%r13, %%r14 \n\t"
\
401 "mulxq 16(" b "), %%r15, %%rax \n\t"
\
403 "xorq %%r10, %%r10 \n\t" \
407 "addq %%r8, %%r14 \n\t" \
408 "adcq %%r9, %%r15 \n\t" \
409 "adcq %%r10, %%rax \n\t" \
410 "addq %%rdi, %%rax \n\t" \
413 "movq 8(" a "), %%rdx \n\t"
\
414 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
415 "mulxq 8(" b "), %%rdi, %%rsi \n\t"
\
416 "addq %%r8, %%r14 \n\t" \
417 "adcq %%r9, %%r15 \n\t" \
418 "adcq %%rsi, %%rax \n\t" \
419 "addq %%rdi, %%r15 \n\t" \
421 "mulxq 16(" b "), %%r8, %%r9 \n\t"
\
422 "adcq %%r8, %%rax \n\t" \
425 "movq 16(" a "), %%rdx \n\t"
\
426 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
427 "mulxq 8(" b "), %%rdi, %%rsi \n\t"
\
428 "addq %%r8, %%r15 \n\t" \
429 "adcq %%r9, %%rax \n\t" \
430 "addq %%rdi, %%rax \n\t" \
434 "movq 24(" a "), %%rdx \n\t"
\
435 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
436 "adcq %%r8, %%rax \n\t" \
437 "movq %%r13, 0(" r ") \n\t" \
438 "movq %%r14, 8(" r ") \n\t" \
439 "movq %%r15, 16(" r ") \n\t" \
440 "movq %%rax, 24(" r ") \n\t"
449 "adcxq 0(" b "), %%r12 \n\t" \
450 "adcxq 8(" b "), %%r13 \n\t" \
451 "adcxq 16(" b "), %%r14 \n\t" \
452 "adcxq 24(" b "), %%r15 \n\t"
459 "subq 0(" b "), %%r12 \n\t" \
460 "sbbq 8(" b "), %%r13 \n\t" \
461 "sbbq 16(" b "), %%r14 \n\t" \
462 "sbbq 24(" b "), %%r15 \n\t"
468#define ADD_REDUCE(b, modulus_0, modulus_1, modulus_2, modulus_3) \
469 "adcxq 0(" b "), %%r12 \n\t" \
470 "movq %%r12, %%r8 \n\t" \
471 "adoxq " modulus_0 ", %%r12 \n\t" \
472 "adcxq 8(" b "), %%r13 \n\t" \
473 "movq %%r13, %%r9 \n\t" \
474 "adoxq " modulus_1 ", %%r13 \n\t" \
475 "adcxq 16(" b "), %%r14 \n\t" \
476 "movq %%r14, %%r10 \n\t" \
477 "adoxq " modulus_2 ", %%r14 \n\t" \
478 "adcxq 24(" b "), %%r15 \n\t" \
479 "movq %%r15, %%r11 \n\t" \
480 "adoxq " modulus_3 ", %%r15 \n\t" \
481 "cmovnoq %%r8, %%r12 \n\t" \
482 "cmovnoq %%r9, %%r13 \n\t" \
483 "cmovnoq %%r10, %%r14 \n\t" \
484 "cmovnoq %%r11, %%r15 \n\t"
491#define REDUCE_FIELD_ELEMENT(neg_modulus_0, neg_modulus_1, neg_modulus_2, neg_modulus_3) \
493 "movq %%r12, %%r8 \n\t" \
494 "movq %%r13, %%r9 \n\t" \
495 "movq %%r14, %%r10 \n\t" \
496 "movq %%r15, %%r11 \n\t" \
502 "adoxq " neg_modulus_0 ", %%r12 \n\t"
\
503 "adoxq " neg_modulus_1 ", %%r13 \n\t"
\
504 "adoxq " neg_modulus_2 ", %%r14 \n\t"
\
505 "adoxq " neg_modulus_3 ", %%r15 \n\t"
\
509 "cmovnoq %%r8, %%r12 \n\t" \
510 "cmovnoq %%r9, %%r13 \n\t" \
511 "cmovnoq %%r10, %%r14 \n\t" \
512 "cmovnoq %%r11, %%r15 \n\t"
520 "movq 0(" a "), %%rdx \n\t"
\
522 "xorq %%r8, %%r8 \n\t" \
524 "mulxq 8(" a "), %%r9, %%r10 \n\t"
\
525 "mulxq 16(" a "), %%r8, %%r15 \n\t"
\
526 "mulxq 24(" a "), %%r11, %%r12 \n\t"
\
530 "adoxq %%r8, %%r10 \n\t" \
531 "adcxq %%r15, %%r11 \n\t" \
532 "movq 8(" a "), %%rdx \n\t"
\
533 "mulxq 16(" a "), %%r8, %%r15 \n\t"
\
534 "mulxq 24(" a "), %%rdi, %%rcx \n\t"
\
535 "movq 24(" a "), %%rdx \n\t"
\
536 "mulxq 16(" a "), %%r13, %%r14 \n\t"
\
537 "adoxq %%r8, %%r11 \n\t" \
538 "adcxq %%rdi, %%r12 \n\t" \
539 "adoxq %%r15, %%r12 \n\t" \
540 "adcxq %%rcx, %%r13 \n\t" \
541 "adoxq %[zero_reference], %%r13 \n\t" \
542 "adcxq %[zero_reference], %%r14 \n\t" \
543 "adoxq %[zero_reference], %%r14 \n\t" \
546 "adoxq %%r9, %%r9 \n\t" \
547 "adcxq %%r12, %%r12 \n\t" \
548 "adoxq %%r10, %%r10 \n\t" \
549 "adcxq %%r13, %%r13 \n\t" \
550 "adoxq %%r11, %%r11 \n\t" \
551 "adcxq %%r14, %%r14 \n\t" \
554 "movq 0(" a "), %%rdx \n\t"
\
555 "mulxq %%rdx, %%r8, %%rcx \n\t" \
556 "movq 16(" a "), %%rdx \n\t"
\
557 "mulxq %%rdx, %%rdx, %%rdi \n\t" \
559 "adcxq %%rcx, %%r9 \n\t" \
560 "adoxq %%rdx, %%r12 \n\t" \
561 "adoxq %%rdi, %%r13 \n\t" \
562 "movq 24(" a "), %%rdx \n\t"
\
563 "mulxq %%rdx, %%rcx, %%r15 \n\t" \
564 "movq 8(" a "), %%rdx \n\t"
\
565 "mulxq %%rdx, %%rdi, %%rdx \n\t" \
566 "adcxq %%rdi, %%r10 \n\t" \
567 "adcxq %%rdx, %%r11 \n\t" \
568 "adoxq %%rcx, %%r14 \n\t" \
569 "adoxq %[zero_reference], %%r15 \n\t" \
572 "movq %%r8, %%rdx \n\t" \
573 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
574 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
575 "adoxq %%rdi, %%r8 \n\t" \
576 "mulxq %[modulus_3], %%r8, %%rdi \n\t" \
577 "adcxq %%rdi, %%r12 \n\t" \
578 "adoxq %%rcx, %%r9 \n\t" \
579 "adcxq %[zero_reference], %%r13 \n\t" \
580 "adcxq %[zero_reference], %%r14 \n\t" \
581 "mulxq %[modulus_1], %%rdi, %%rcx \n\t" \
582 "adcxq %[zero_reference], %%r15 \n\t" \
583 "adoxq %%rcx, %%r10 \n\t" \
584 "adcxq %%rdi, %%r9 \n\t" \
585 "adoxq %%r8, %%r11 \n\t" \
586 "mulxq %[modulus_2], %%rdi, %%rcx \n\t" \
587 "adcxq %%rdi, %%r10 \n\t" \
588 "adcxq %%rcx, %%r11 \n\t" \
591 "movq %%r9, %%rdx \n\t" \
592 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
593 "mulxq %[modulus_2], %%rdi, %%rcx \n\t" \
594 "adoxq %%rcx, %%r12 \n\t" \
595 "mulxq %[modulus_3], %%r8, %%rcx \n\t" \
596 "adcxq %%r8, %%r12 \n\t" \
597 "adoxq %%rcx, %%r13 \n\t" \
598 "adcxq %[zero_reference], %%r13 \n\t" \
599 "adoxq %[zero_reference], %%r14 \n\t" \
600 "adcxq %[zero_reference], %%r14 \n\t" \
601 "adoxq %[zero_reference], %%r15 \n\t" \
602 "adcxq %[zero_reference], %%r15 \n\t" \
603 "mulxq %[modulus_0], %%r8, %%rcx \n\t" \
604 "adcxq %%r8, %%r9 \n\t" \
605 "adoxq %%rcx, %%r10 \n\t" \
606 "mulxq %[modulus_1], %%r8, %%rcx \n\t" \
607 "adcxq %%r8, %%r10 \n\t" \
608 "adoxq %%rcx, %%r11 \n\t" \
609 "adcxq %%rdi, %%r11 \n\t" \
612 "movq %%r10, %%rdx \n\t" \
613 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
614 "mulxq %[modulus_1], %%rdi, %%rcx \n\t" \
615 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
616 "adoxq %%rcx, %%r12 \n\t" \
617 "adcxq %%r8, %%r12 \n\t" \
618 "adoxq %%r9, %%r13 \n\t" \
619 "mulxq %[modulus_3], %%r8, %%r9 \n\t" \
620 "adcxq %%r8, %%r13 \n\t" \
621 "adoxq %%r9, %%r14 \n\t" \
622 "adcxq %[zero_reference], %%r14 \n\t" \
623 "adoxq %[zero_reference], %%r15 \n\t" \
624 "adcxq %[zero_reference], %%r15 \n\t" \
625 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
626 "adcxq %%r8, %%r10 \n\t" \
627 "adoxq %%r9, %%r11 \n\t" \
628 "adcxq %%rdi, %%r11 \n\t" \
629 "adoxq %[zero_reference], %%r12 \n\t" \
630 "adoxq %[zero_reference], %%r13 \n\t" \
633 "movq %%r11, %%rdx \n\t" \
634 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
635 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
636 "mulxq %[modulus_1], %%r8, %%r9 \n\t" \
637 "adoxq %%rdi, %%r11 \n\t" \
638 "adcxq %%r8, %%r12 \n\t" \
639 "adoxq %%rcx, %%r12 \n\t" \
640 "adcxq %%r9, %%r13 \n\t" \
641 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
642 "mulxq %[modulus_3], %%r10, %%r11 \n\t" \
643 "adoxq %%r8, %%r13 \n\t" \
644 "adcxq %%r10, %%r14 \n\t" \
645 "adoxq %%r9, %%r14 \n\t" \
646 "adcxq %%r11, %%r15 \n\t" \
647 "adoxq %[zero_reference], %%r15 \n\t"
653#define MUL(a1, a2, a3, a4, b) \
654 "movq " a1 ", %%rdx \n\t"
\
655 "xorq %%r8, %%r8 \n\t" \
657 "mulxq 0(" b "), %%r13, %%r14 \n\t"
\
658 "mulxq 8(" b "), %%r8, %%r9 \n\t"
\
659 "mulxq 16(" b "), %%r15, %%r10 \n\t"
\
660 "mulxq 24(" b "), %%rdi, %%r12 \n\t"
\
664 "movq %%r13, %%rdx \n\t" \
665 "mulxq %[r_inv], %%rdx, %%r11 \n\t" \
668 "adcxq %%r8, %%r14 \n\t" \
669 "adoxq %%rdi, %%r10 \n\t" \
670 "adcxq %%r9, %%r15 \n\t" \
673 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
674 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
675 "adcxq %%rdi, %%r10 \n\t" \
676 "adoxq %%r11, %%r12 \n\t" \
677 "adcxq %[zero_reference], %%r12 \n\t" \
678 "adoxq %%r8, %%r13 \n\t" \
679 "adcxq %%r9, %%r14 \n\t" \
680 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
681 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
682 "adoxq %%rdi, %%r14 \n\t" \
683 "adcxq %%r11, %%r15 \n\t" \
684 "adoxq %%r8, %%r15 \n\t" \
685 "adcxq %%r9, %%r10 \n\t" \
694 "movq " a2 ", %%rdx \n\t"
\
695 "mulxq 16(" b "), %%r8, %%r9 \n\t"
\
696 "mulxq 24(" b "), %%rdi, %%r13 \n\t"
\
697 "adoxq %%r8, %%r10 \n\t" \
698 "adcxq %%rdi, %%r12 \n\t" \
699 "adoxq %%r9, %%r12 \n\t" \
700 "adcxq %[zero_reference], %%r13 \n\t" \
701 "adoxq %[zero_reference], %%r13 \n\t" \
702 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
703 "mulxq 8(" b "), %%rdi, %%r11 \n\t"
\
704 "adcxq %%r8, %%r14 \n\t" \
705 "adoxq %%r9, %%r15 \n\t" \
706 "adcxq %%rdi, %%r15 \n\t" \
707 "adoxq %%r11, %%r10 \n\t" \
710 "movq %%r14, %%rdx \n\t" \
711 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
712 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
713 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
714 "adcxq %%r8, %%r10 \n\t" \
715 "adoxq %%r9, %%r12 \n\t" \
716 "adcxq %%rdi, %%r12 \n\t" \
717 "adoxq %%r11, %%r13 \n\t" \
718 "adcxq %[zero_reference], %%r13 \n\t" \
719 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
720 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
721 "adoxq %%r8, %%r14 \n\t" \
722 "adcxq %%rdi, %%r15 \n\t" \
723 "adoxq %%r9, %%r15 \n\t" \
724 "adcxq %%r11, %%r10 \n\t" \
727 "movq " a3 ", %%rdx \n\t"
\
728 "mulxq 8(" b "), %%rdi, %%r11 \n\t"
\
729 "mulxq 16(" b "), %%r8, %%r9 \n\t"
\
730 "adoxq %%rdi, %%r10 \n\t" \
731 "adcxq %%r11, %%r12 \n\t" \
732 "adoxq %%r8, %%r12 \n\t" \
733 "adcxq %%r9, %%r13 \n\t" \
734 "mulxq 24(" b "), %%rdi, %%r14 \n\t"
\
735 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
736 "adoxq %%rdi, %%r13 \n\t" \
737 "adcxq %[zero_reference], %%r14 \n\t" \
738 "adoxq %[zero_reference], %%r14 \n\t" \
739 "adcxq %%r8, %%r15 \n\t" \
740 "adoxq %%r9, %%r10 \n\t" \
743 "movq %%r15, %%rdx \n\t" \
744 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
745 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
746 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
747 "adcxq %%rdi, %%r10 \n\t" \
748 "adoxq %%r11, %%r12 \n\t" \
749 "adcxq %%r8, %%r12 \n\t" \
750 "adoxq %%r9, %%r13 \n\t" \
751 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
752 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
753 "adcxq %%rdi, %%r13 \n\t" \
754 "adoxq %%r11, %%r14 \n\t" \
755 "adcxq %[zero_reference], %%r14 \n\t" \
756 "adoxq %%r8, %%r15 \n\t" \
757 "adcxq %%r9, %%r10 \n\t" \
760 "movq " a4 ", %%rdx \n\t"
\
761 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
762 "mulxq 8(" b "), %%rdi, %%r11 \n\t"
\
763 "adoxq %%r8, %%r10 \n\t" \
764 "adcxq %%r9, %%r12 \n\t" \
765 "adoxq %%rdi, %%r12 \n\t" \
766 "adcxq %%r11, %%r13 \n\t" \
768 "mulxq 16(" b "), %%r8, %%r9 \n\t"
\
769 "mulxq 24(" b "), %%rdi, %%r15 \n\t"
\
770 "adoxq %%r8, %%r13 \n\t" \
771 "adcxq %%r9, %%r14 \n\t" \
772 "adoxq %%rdi, %%r14 \n\t" \
773 "adcxq %[zero_reference], %%r15 \n\t" \
774 "adoxq %[zero_reference], %%r15 \n\t" \
777 "movq %%r10, %%rdx \n\t" \
778 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
779 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
780 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
781 "adoxq %%r8, %%r10 \n\t" \
782 "adcxq %%r9, %%r12 \n\t" \
783 "adoxq %%rdi, %%r12 \n\t" \
784 "adcxq %%r11, %%r13 \n\t" \
786 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
787 "mulxq %[modulus_3], %%rdi, %%rdx \n\t" \
788 "adoxq %%r8, %%r13 \n\t" \
789 "adcxq %%r9, %%r14 \n\t" \
790 "adoxq %%rdi, %%r14 \n\t" \
791 "adcxq %%rdx, %%r15 \n\t" \
792 "adoxq %[zero_reference], %%r15 \n\t"
798#define MUL_FOO(a1, a2, a3, a4, b) \
799 "movq " a1 ", %%rdx \n\t"
\
800 "xorq %%r8, %%r8 \n\t" \
802 "mulxq 0(" b "), %%r13, %%r14 \n\t"
\
803 "mulxq 8(" b "), %%r8, %%r9 \n\t"
\
804 "mulxq 16(" b "), %%r15, %%r10 \n\t"
\
805 "mulxq 24(" b "), %%rdi, %%r12 \n\t"
\
809 "movq %%r13, %%rdx \n\t" \
810 "mulxq %[r_inv], %%rdx, %%r11 \n\t" \
813 "adcxq %%r8, %%r14 \n\t" \
814 "adoxq %%rdi, %%r10 \n\t" \
815 "adcxq %%r9, %%r15 \n\t" \
818 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
819 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
820 "adcxq %%rdi, %%r10 \n\t" \
821 "adoxq %%r11, %%r12 \n\t" \
822 "adcxq %[zero_reference], %%r12 \n\t" \
823 "adoxq %%r8, %%r13 \n\t" \
824 "adcxq %%r9, %%r14 \n\t" \
825 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
826 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
827 "adoxq %%rdi, %%r14 \n\t" \
828 "adcxq %%r11, %%r15 \n\t" \
829 "adoxq %%r8, %%r15 \n\t" \
830 "adcxq %%r9, %%r10 \n\t" \
839 "movq " a2 ", %%rdx \n\t"
\
840 "mulxq 16(" b "), %%r8, %%r9 \n\t"
\
841 "mulxq 24(" b "), %%rdi, %%r13 \n\t"
\
842 "adoxq %%r8, %%r10 \n\t" \
843 "adcxq %%rdi, %%r12 \n\t" \
844 "adoxq %%r9, %%r12 \n\t" \
845 "adcxq %[zero_reference], %%r13 \n\t" \
846 "adoxq %[zero_reference], %%r13 \n\t" \
847 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
848 "mulxq 8(" b "), %%rdi, %%r11 \n\t"
\
849 "adcxq %%r8, %%r14 \n\t" \
850 "adoxq %%r9, %%r15 \n\t" \
851 "adcxq %%rdi, %%r15 \n\t" \
852 "adoxq %%r11, %%r10 \n\t" \
855 "movq %%r14, %%rdx \n\t" \
856 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
857 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
858 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
859 "adcxq %%r8, %%r10 \n\t" \
860 "adoxq %%r9, %%r12 \n\t" \
861 "adcxq %%rdi, %%r12 \n\t" \
862 "adoxq %%r11, %%r13 \n\t" \
863 "adcxq %[zero_reference], %%r13 \n\t" \
864 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
865 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
866 "adoxq %%r8, %%r14 \n\t" \
867 "adcxq %%rdi, %%r15 \n\t" \
868 "adoxq %%r9, %%r15 \n\t" \
869 "adcxq %%r11, %%r10 \n\t" \
872 "movq " a3 ", %%rdx \n\t"
\
873 "mulxq 8(" b "), %%rdi, %%r11 \n\t"
\
874 "mulxq 16(" b "), %%r8, %%r9 \n\t"
\
875 "adoxq %%rdi, %%r10 \n\t" \
876 "adcxq %%r11, %%r12 \n\t" \
877 "adoxq %%r8, %%r12 \n\t" \
878 "adcxq %%r9, %%r13 \n\t" \
879 "mulxq 24(" b "), %%rdi, %%r14 \n\t"
\
880 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
881 "adoxq %%rdi, %%r13 \n\t" \
882 "adcxq %[zero_reference], %%r14 \n\t" \
883 "adoxq %[zero_reference], %%r14 \n\t" \
884 "adcxq %%r8, %%r15 \n\t" \
885 "adoxq %%r9, %%r10 \n\t" \
888 "movq %%r15, %%rdx \n\t" \
889 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
890 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
891 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
892 "adcxq %%rdi, %%r10 \n\t" \
893 "adoxq %%r11, %%r12 \n\t" \
894 "adcxq %%r8, %%r12 \n\t" \
895 "adoxq %%r9, %%r13 \n\t" \
896 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
897 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
898 "adcxq %%rdi, %%r13 \n\t" \
899 "adoxq %%r11, %%r14 \n\t" \
900 "adcxq %[zero_reference], %%r14 \n\t" \
901 "adoxq %%r8, %%r15 \n\t" \
902 "adcxq %%r9, %%r10 \n\t" \
905 "movq " a4 ", %%rdx \n\t"
\
906 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
907 "mulxq 8(" b "), %%rdi, %%r11 \n\t"
\
908 "adoxq %%r8, %%r10 \n\t" \
909 "adcxq %%r9, %%r12 \n\t" \
910 "adoxq %%rdi, %%r12 \n\t" \
911 "adcxq %%r11, %%r13 \n\t" \
913 "mulxq 16(" b "), %%r8, %%r9 \n\t"
\
914 "mulxq 24(" b "), %%rdi, %%r15 \n\t"
\
915 "adoxq %%r8, %%r13 \n\t" \
916 "adcxq %%r9, %%r14 \n\t" \
917 "adoxq %%rdi, %%r14 \n\t" \
918 "adcxq %[zero_reference], %%r15 \n\t" \
919 "adoxq %[zero_reference], %%r15 \n\t" \
922 "movq %%r10, %%rdx \n\t" \
923 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
924 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
925 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
926 "adoxq %%r8, %%r10 \n\t" \
927 "adcxq %%r9, %%r12 \n\t" \
928 "adoxq %%rdi, %%r12 \n\t" \
929 "adcxq %%r11, %%r13 \n\t" \
931 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
932 "mulxq %[modulus_3], %%rdi, %%rdx \n\t" \
933 "adoxq %%r8, %%r13 \n\t" \
934 "adcxq %%r9, %%r14 \n\t" \
935 "adoxq %%rdi, %%r14 \n\t" \
936 "adcxq %%rdx, %%r15 \n\t" \
937 "adoxq %[zero_reference], %%r15 \n\t"
943#define MUL_256(a, b, r) \
944 "movq 0(" a "), %%rdx \n\t"
\
947 "mulxq 8(" b "), %%r8, %%r9 \n\t"
\
948 "mulxq 24(" b "), %%rdi, %%r12 \n\t"
\
949 "mulxq 0(" b "), %%r13, %%r14 \n\t"
\
950 "mulxq 16(" b "), %%r15, %%rax \n\t"
\
952 "xorq %%r10, %%r10 \n\t" \
956 "adcxq %%r8, %%r14 \n\t" \
957 "adoxq %%rdi, %%rax \n\t" \
958 "adcxq %%r9, %%r15 \n\t" \
959 "adcxq %%r10, %%rax \n\t" \
962 "movq 8(" a "), %%rdx \n\t"
\
963 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
964 "mulxq 8(" b "), %%rdi, %%rsi \n\t"
\
965 "adcxq %%r8, %%r14 \n\t" \
966 "adoxq %%r9, %%r15 \n\t" \
967 "adcxq %%rdi, %%r15 \n\t" \
968 "adoxq %%rsi, %%rax \n\t" \
970 "mulxq 16(" b "), %%r8, %%r9 \n\t"
\
971 "adcxq %%r8, %%rax \n\t" \
974 "movq 16(" a "), %%rdx \n\t"
\
975 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
976 "mulxq 8(" b "), %%rdi, %%rsi \n\t"
\
977 "adcxq %%r8, %%r15 \n\t" \
978 "adoxq %%r9, %%rax \n\t" \
979 "adcxq %%rdi, %%rax \n\t" \
983 "movq 24(" a "), %%rdx \n\t"
\
984 "mulxq 0(" b "), %%r8, %%r9 \n\t"
\
985 "adcxq %%r8, %%rax \n\t" \
986 "movq %%r13, 0(" r ") \n\t" \
987 "movq %%r14, 8(" r ") \n\t" \
988 "movq %%r15, 16(" r ") \n\t" \
989 "movq %%rax, 24(" r ") \n\t"