barretenberg
Loading...
Searching...
No Matches
blake3-impl.hpp
1#pragma once
2/*
3 BLAKE3 reference source code package - C implementations
4
5 Intellectual property:
6
7 The Rust code is copyright Jack O'Connor, 2019-2020.
8 The C code is copyright Samuel Neves and Jack O'Connor, 2019-2020.
9 The assembly code is copyright Samuel Neves, 2019-2020.
10
11 This work is released into the public domain with CC0 1.0. Alternatively, it is licensed under the Apache
12 License 2.0.
13
14 - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
15 - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
16
17 More information about the BLAKE3 hash function can be found at
18 https://github.com/BLAKE3-team/BLAKE3.
19*/
20
21#ifndef BLAKE3_IMPL_H
22#define BLAKE3_IMPL_H
23
24#include <assert.h>
25#include <stdbool.h>
26#include <stddef.h>
27#include <stdint.h>
28#include <string.h>
29
30#include "blake3s.hpp"
31
32namespace blake3_full {
33
34// This C implementation tries to support recent versions of GCC, Clang, and
35// MSVC.
36#if defined(_MSC_VER)
37#define INLINE static __forceinline
38#else
39#define INLINE static inline __attribute__((always_inline))
40#endif
41
42#if defined(__x86_64__) || defined(_M_X64)
43#define IS_X86
44#define IS_X86_64
45#endif
46
47#if defined(__i386__) || defined(_M_IX86)
48#define IS_X86
49#define IS_X86_32
50#endif
51
52#if defined(IS_X86)
53#if defined(_MSC_VER)
54#include <intrin.h>
55#endif
56#include <immintrin.h>
57#endif
58
59// #if defined(IS_X86)
60// #define MAX_SIMD_DEGREE 16
61// #elif defined(BLAKE3_USE_NEON)
62// #define MAX_SIMD_DEGREE 4
63// #else
64#define MAX_SIMD_DEGREE 1
65// #endif
66
67// There are some places where we want a static size that's equal to the
68// MAX_SIMD_DEGREE, but also at least 2.
69#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
70
71// The dynamically detected SIMD degree of the current platform.
72/*
73 * Commenting out unnecessary parts as we currently don't need SIMD fo
74 * different hardwares. To be revisited later.
75 *
76 */
77size_t blake3_simd_degree(void)
78{
79 return 1;
80 // #if defined(IS_X86)
81 // const enum cpu_feature features = get_cpu_features();
82 // MAYBE_UNUSED(features);
83 // #if !defined(BLAKE3_NO_AVX512)
84 // if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
85 // return 16;
86 // }
87 // #endif
88 // #if !defined(BLAKE3_NO_AVX2)
89 // if (features & AVX2) {
90 // return 8;
91 // }
92 // #endif
93 // #if !defined(BLAKE3_NO_SSE41)
94 // if (features & SSE41) {
95 // return 4;
96 // }
97 // #endif
98 // #if !defined(BLAKE3_NO_SSE2)
99 // if (features & SSE2) {
100 // return 4;
101 // }
102 // #endif
103 // #endif
104 // #if defined(BLAKE3_USE_NEON)
105 // return 4;
106 // #endif
107 // return 1;
108}
109
110/*----------------------------------------------------------------
111 *
112 * Commenting out as we currently don't need SIMD for different hardwares.
113 * To be revisited later.
114 *
115
116enum cpu_feature get_cpu_features() {
117 if (g_cpu_features != UNDEFINED) {
118 return g_cpu_features;
119 } else {
120#if defined(IS_X86)
121 uint32_t regs[4] = {0};
122 uint32_t *eax = &regs[0], *ebx = &regs[1], *ecx = &regs[2], *edx = &regs[3];
123 (void)edx;
124 enum cpu_feature features = 0;
125 cpuid(regs, 0);
126 const int max_id = *eax;
127 cpuid(regs, 1);
128#if defined(__amd64__) || defined(_M_X64)
129 features |= SSE2;
130#else
131 if (*edx & (1UL << 26))
132 features |= SSE2;
133#endif
134 if (*ecx & (1UL << 0))
135 features |= SSSE3;
136 if (*ecx & (1UL << 19))
137 features |= SSE41;
138
139 if (*ecx & (1UL << 27)) { // OSXSAVE
140 const uint64_t mask = xgetbv();
141 if ((mask & 6) == 6) { // SSE and AVX states
142 if (*ecx & (1UL << 28))
143 features |= AVX;
144 if (max_id >= 7) {
145 cpuidex(regs, 7, 0);
146 if (*ebx & (1UL << 5))
147 features |= AVX2;
148 if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
149 if (*ebx & (1UL << 31))
150 features |= AVX512VL;
151 if (*ebx & (1UL << 16))
152 features |= AVX512F;
153 }
154 }
155 }
156 }
157 g_cpu_features = features;
158 return features;
159#else
160 // How to detect NEON?
161 return 0;
162#endif
163 }
164}
165----------------------------------------------------------------*/
166
167/* Find index of the highest set bit */
168/* x is assumed to be nonzero. */
169static unsigned int highest_one(uint64_t x)
170{
171#if defined(__GNUC__) || defined(__clang__)
172 return uint32_t(63) ^ uint32_t(__builtin_clzll(x));
173#elif defined(_MSC_VER) && defined(IS_X86_64)
174 unsigned long index;
175 _BitScanReverse64(&index, x);
176 return index;
177#elif defined(_MSC_VER) && defined(IS_X86_32)
178 if (x >> 32) {
179 unsigned long index;
180 _BitScanReverse(&index, x >> 32);
181 return 32 + index;
182 } else {
183 unsigned long index;
184 _BitScanReverse(&index, x);
185 return index;
186 }
187#else
188 unsigned int c = 0;
189 if (x & 0xffffffff00000000ULL) {
190 x >>= 32;
191 c += 32;
192 }
193 if (x & 0x00000000ffff0000ULL) {
194 x >>= 16;
195 c += 16;
196 }
197 if (x & 0x000000000000ff00ULL) {
198 x >>= 8;
199 c += 8;
200 }
201 if (x & 0x00000000000000f0ULL) {
202 x >>= 4;
203 c += 4;
204 }
205 if (x & 0x000000000000000cULL) {
206 x >>= 2;
207 c += 2;
208 }
209 if (x & 0x0000000000000002ULL) {
210 c += 1;
211 }
212 return c;
213#endif
214}
215
216// Count the number of 1 bits.
217INLINE unsigned int popcnt(uint64_t x)
218{
219#if defined(__GNUC__) || defined(__clang__)
220 return uint32_t(__builtin_popcountll(x));
221#else
222 unsigned int count = 0;
223 while (x != 0) {
224 count += 1;
225 x &= x - 1;
226 }
227 return count;
228#endif
229}
230
231// Right rotates 32 bit inputs
232INLINE uint32_t rotr32(uint32_t w, uint32_t c)
233{
234 return (w >> c) | (w << (32 - c));
235}
236
237// Largest power of two less than or equal to x. As a special case, returns 1
238// when x is 0.
239INLINE uint64_t round_down_to_power_of_2(uint64_t x)
240{
241 return 1ULL << highest_one(x | 1);
242}
243
244INLINE uint32_t counter_low(uint64_t counter)
245{
246 return (uint32_t)counter;
247}
248
249INLINE uint32_t counter_high(uint64_t counter)
250{
251 return (uint32_t)(counter >> 32);
252}
253
254INLINE uint32_t load32(const void* src)
255{
256 const uint8_t* p = (const uint8_t*)src;
257 return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) | ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
258}
259
260INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN], uint32_t key_words[8])
261{
262 key_words[0] = load32(&key[0 * 4]);
263 key_words[1] = load32(&key[1 * 4]);
264 key_words[2] = load32(&key[2 * 4]);
265 key_words[3] = load32(&key[3 * 4]);
266 key_words[4] = load32(&key[4 * 4]);
267 key_words[5] = load32(&key[5 * 4]);
268 key_words[6] = load32(&key[6 * 4]);
269 key_words[7] = load32(&key[7 * 4]);
270}
271
272INLINE void store32(void* dst, uint32_t w)
273{
274 uint8_t* p = (uint8_t*)dst;
275 p[0] = (uint8_t)(w >> 0);
276 p[1] = (uint8_t)(w >> 8);
277 p[2] = (uint8_t)(w >> 16);
278 p[3] = (uint8_t)(w >> 24);
279}
280
281INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8])
282{
283 store32(&bytes_out[0 * 4], cv_words[0]);
284 store32(&bytes_out[1 * 4], cv_words[1]);
285 store32(&bytes_out[2 * 4], cv_words[2]);
286 store32(&bytes_out[3 * 4], cv_words[3]);
287 store32(&bytes_out[4 * 4], cv_words[4]);
288 store32(&bytes_out[5 * 4], cv_words[5]);
289 store32(&bytes_out[6 * 4], cv_words[6]);
290 store32(&bytes_out[7 * 4], cv_words[7]);
291}
292
293} // namespace blake3_full
294
295#endif /* BLAKE3_IMPL_H */