32namespace blake3_full {
37#define INLINE static __forceinline
39#define INLINE static inline __attribute__((always_inline))
42#if defined(__x86_64__) || defined(_M_X64)
47#if defined(__i386__) || defined(_M_IX86)
64#define MAX_SIMD_DEGREE 1
69#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
77size_t blake3_simd_degree(
void)
169static unsigned int highest_one(uint64_t x)
171#if defined(__GNUC__) || defined(__clang__)
172 return uint32_t(63) ^ uint32_t(__builtin_clzll(x));
173#elif defined(_MSC_VER) && defined(IS_X86_64)
175 _BitScanReverse64(&index, x);
177#elif defined(_MSC_VER) && defined(IS_X86_32)
180 _BitScanReverse(&index, x >> 32);
184 _BitScanReverse(&index, x);
189 if (x & 0xffffffff00000000ULL) {
193 if (x & 0x00000000ffff0000ULL) {
197 if (x & 0x000000000000ff00ULL) {
201 if (x & 0x00000000000000f0ULL) {
205 if (x & 0x000000000000000cULL) {
209 if (x & 0x0000000000000002ULL) {
217INLINE
unsigned int popcnt(uint64_t x)
219#if defined(__GNUC__) || defined(__clang__)
220 return uint32_t(__builtin_popcountll(x));
222 unsigned int count = 0;
232INLINE uint32_t rotr32(uint32_t w, uint32_t c)
234 return (w >> c) | (w << (32 - c));
239INLINE uint64_t round_down_to_power_of_2(uint64_t x)
241 return 1ULL << highest_one(x | 1);
244INLINE uint32_t counter_low(uint64_t counter)
246 return (uint32_t)counter;
249INLINE uint32_t counter_high(uint64_t counter)
251 return (uint32_t)(counter >> 32);
254INLINE uint32_t load32(
const void* src)
256 const uint8_t* p = (
const uint8_t*)src;
257 return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) | ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
260INLINE
void load_key_words(
const uint8_t key[BLAKE3_KEY_LEN], uint32_t key_words[8])
262 key_words[0] = load32(&key[0 * 4]);
263 key_words[1] = load32(&key[1 * 4]);
264 key_words[2] = load32(&key[2 * 4]);
265 key_words[3] = load32(&key[3 * 4]);
266 key_words[4] = load32(&key[4 * 4]);
267 key_words[5] = load32(&key[5 * 4]);
268 key_words[6] = load32(&key[6 * 4]);
269 key_words[7] = load32(&key[7 * 4]);
272INLINE
void store32(
void* dst, uint32_t w)
274 uint8_t* p = (uint8_t*)dst;
275 p[0] = (uint8_t)(w >> 0);
276 p[1] = (uint8_t)(w >> 8);
277 p[2] = (uint8_t)(w >> 16);
278 p[3] = (uint8_t)(w >> 24);
281INLINE
void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8])
283 store32(&bytes_out[0 * 4], cv_words[0]);
284 store32(&bytes_out[1 * 4], cv_words[1]);
285 store32(&bytes_out[2 * 4], cv_words[2]);
286 store32(&bytes_out[3 * 4], cv_words[3]);
287 store32(&bytes_out[4 * 4], cv_words[4]);
288 store32(&bytes_out[5 * 4], cv_words[5]);
289 store32(&bytes_out[6 * 4], cv_words[6]);
290 store32(&bytes_out[7 * 4], cv_words[7]);