barretenberg-doxygen/3f0fad0de81d472cf71c03df11cf01ed6e7e09e0/concurrentqueue_8h_source.html

// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue.

// An overview, including benchmark results, is provided here:

//     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++

// The full design is also described in excruciating detail at:

//    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue


// Simplified BSD license:

// Copyright (c) 2013-2020, Cameron Desrochers.

// All rights reserved.

//

// Redistribution and use in source and binary forms, with or without modification,

// are permitted provided that the following conditions are met:

//

// - Redistributions of source code must retain the above copyright notice, this list of

// conditions and the following disclaimer.

// - Redistributions in binary form must reproduce the above copyright notice, this list of

// conditions and the following disclaimer in the documentation and/or other materials

// provided with the distribution.

//

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY

// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF

// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL

// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT

// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR

// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,

// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


// Also dual-licensed under the Boost Software License (see LICENSE.md)


#pragma once


#if defined(__GNUC__) && !defined(__INTEL_COMPILER)

// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and

// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings

// upon assigning any computed values)

#pragma GCC diagnostic push

#pragma GCC diagnostic ignored "-Wconversion"


#ifdef MCDBGQ_USE_RELACY

#pragma GCC diagnostic ignored "-Wint-to-pointer-cast"

#endif

#endif


#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)

// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher

// does not support `if constexpr`, so we have no choice but to simply disable the warning

#pragma warning(push)

#pragma warning(disable : 4127) // conditional expression is constant

#endif


#if defined(__APPLE__)

#include "TargetConditionals.h"

#endif


#ifdef MCDBGQ_USE_RELACY

#include "relacy/relacy_std.hpp"

#include "relacy_shims.h"

// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.

// We'll override the default trait malloc ourselves without a macro.

#undef new

#undef delete

#undef malloc

#undef free

#else

#include <atomic> // Requires C++11. Sorry VS2010.

#include <cassert>

#endif

#include <cstddef> // for max_align_t

#include <cstdint>

#include <cstdlib>

#include <type_traits>

#include <algorithm>

#include <utility>

#include <limits>

#include <climits> // for CHAR_BIT

#include <array>

#include <thread> // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading

#include <mutex>  // used for thread exit synchronization


// Platform-specific definitions of a numeric thread ID type and an invalid value

namespace moodycamel {

namespace details {

template <typename thread_id_t> struct thread_id_converter {

    typedef thread_id_t thread_id_numeric_size_t;

    typedef thread_id_t thread_id_hash_t;

    static thread_id_hash_t prehash(thread_id_t const& x) { return x; }

};

} // namespace details

} // namespace moodycamel

#if defined(MCDBGQ_USE_RELACY)

namespace moodycamel {

namespace details {

typedef std::uint32_t thread_id_t;

static const thread_id_t invalid_thread_id = 0xFFFFFFFFU;

static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;

static inline thread_id_t thread_id()

{

    return rl::thread_index();

}

} // namespace details

} // namespace moodycamel

#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)

// No sense pulling in windows.h in a header, we'll manually declare the function

// we use and rely on backwards-compatibility for this not to break

extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);

namespace moodycamel {

namespace details {

static_assert(sizeof(unsigned long) == sizeof(std::uint32_t),

              "Expected size of unsigned long to be 32 bits on Windows");

typedef std::uint32_t thread_id_t;

static const thread_id_t invalid_thread_id = 0; // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx

static const thread_id_t invalid_thread_id2 =

    0xFFFFFFFFU; // Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread

                 // IDs are presently multiples of 4.

static inline thread_id_t thread_id()

{

    return static_cast<thread_id_t>(::GetCurrentThreadId());

}

} // namespace details

} // namespace moodycamel

#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) ||       \

    defined(MOODYCAMEL_NO_THREAD_LOCAL)

namespace moodycamel {

namespace details {

static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8,

              "std::thread::id is expected to be either 4 or 8 bytes");


typedef std::thread::id thread_id_t;

static const thread_id_t invalid_thread_id; // Default ctor creates invalid ID


// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's

// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't

// be.

static inline thread_id_t thread_id()

{

    return std::this_thread::get_id();

}


template <std::size_t> struct thread_id_size {};

template <> struct thread_id_size<4> {

    typedef std::uint32_t numeric_t;

};

template <> struct thread_id_size<8> {

    typedef std::uint64_t numeric_t;

};


template <> struct thread_id_converter<thread_id_t> {

    typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t;

#ifndef __APPLE__

    typedef std::size_t thread_id_hash_t;

#else

    typedef thread_id_numeric_size_t thread_id_hash_t;

#endif


    static thread_id_hash_t prehash(thread_id_t const& x)

    {

#ifndef __APPLE__

        return std::hash<std::thread::id>()(x);

#else

        return *reinterpret_cast<thread_id_hash_t const*>(&x);

#endif

    }

};

}

}

#else

// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475

// In order to get a numeric thread ID in a platform-independent way, we use a thread-local

// static variable's address as a thread identifier :-)

#if defined(__GNUC__) || defined(__INTEL_COMPILER)

#define MOODYCAMEL_THREADLOCAL __thread

#elif defined(_MSC_VER)

#define MOODYCAMEL_THREADLOCAL __declspec(thread)

#else

// Assume C++11 compliant compiler

#define MOODYCAMEL_THREADLOCAL thread_local

#endif

namespace moodycamel {

namespace details {

typedef std::uintptr_t thread_id_t;

static const thread_id_t invalid_thread_id = 0; // Address can't be nullptr

static const thread_id_t invalid_thread_id2 =

    1; // Member accesses off a null pointer are also generally invalid. Plus it's not aligned.

inline thread_id_t thread_id()

{

    static MOODYCAMEL_THREADLOCAL int x;

    return reinterpret_cast<thread_id_t>(&x);

}

}

}

#endif


// Constexpr if

#ifndef MOODYCAMEL_CONSTEXPR_IF

#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L

#define MOODYCAMEL_CONSTEXPR_IF if constexpr

#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]

#else

#define MOODYCAMEL_CONSTEXPR_IF if

#define MOODYCAMEL_MAYBE_UNUSED

#endif

#endif


// Exceptions

#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED

#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) ||                      \

    (!defined(_MSC_VER) && !defined(__GNUC__))

#define MOODYCAMEL_EXCEPTIONS_ENABLED

#endif

#endif

#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED

#define MOODYCAMEL_TRY try

#define MOODYCAMEL_CATCH(...) catch (__VA_ARGS__)

#define MOODYCAMEL_RETHROW throw

#define MOODYCAMEL_THROW(expr) throw(expr)

#else

#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF(true)

#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF(false)

#define MOODYCAMEL_RETHROW

#define MOODYCAMEL_THROW(expr)

#endif


#ifndef MOODYCAMEL_NOEXCEPT

#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)

#define MOODYCAMEL_NOEXCEPT

#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true

#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true

#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800

// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(

// We have to assume *all* non-trivial constructors may throw on VS2012!

#define MOODYCAMEL_NOEXCEPT _NOEXCEPT

#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)                                                                \

    (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value                             \

         ? std::is_trivially_move_constructible<type>::value                                                           \

         : std::is_trivially_copy_constructible<type>::value)

#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr)                                                              \

    ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value                               \

          ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value             \

          : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) &&         \

     MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))

#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900

#define MOODYCAMEL_NOEXCEPT _NOEXCEPT

#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)                                                                \

    (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value                             \

         ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value        \

         : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)

#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr)                                                              \

    ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value                               \

          ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value             \

          : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) &&         \

     MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))

#else

#define MOODYCAMEL_NOEXCEPT noexcept

#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)

#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)

#endif

#endif


#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

#ifdef MCDBGQ_USE_RELACY

#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

#else

// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug:

// http://sourceforge.net/p/mingw-w64/bugs/445 g++ <=4.7 doesn't support thread_local either. Finally, iOS/ARM doesn't

// have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work

#if (!defined(_MSC_VER) || _MSC_VER >= 1900) &&                                                                        \

    (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) &&                             \

    (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) &&                                  \

    (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)

// Assume `thread_local` is fully supported in all other C++11 compilers/platforms

#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // tentatively enabled for now; years ago several users report having

                                                // problems with it on

#endif

#endif

#endif


// VS2012 doesn't support deleted functions.

// In this case, we declare the function normally but don't define it. A link error will be generated if the function is

// called.

#ifndef MOODYCAMEL_DELETE_FUNCTION

#if defined(_MSC_VER) && _MSC_VER < 1800

#define MOODYCAMEL_DELETE_FUNCTION

#else

#define MOODYCAMEL_DELETE_FUNCTION = delete

#endif

#endif


namespace moodycamel {

namespace details {

#ifndef MOODYCAMEL_ALIGNAS

// VS2013 doesn't support alignas or alignof, and align() requires a constant literal

#if defined(_MSC_VER) && _MSC_VER <= 1800

#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))

#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)

#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type

template <int Align, typename T> struct Vs2013Aligned {}; // default, unsupported alignment

template <typename T> struct Vs2013Aligned<1, T> {

    typedef __declspec(align(1)) T type;

};

template <typename T> struct Vs2013Aligned<2, T> {

    typedef __declspec(align(2)) T type;

};

template <typename T> struct Vs2013Aligned<4, T> {

    typedef __declspec(align(4)) T type;

};

template <typename T> struct Vs2013Aligned<8, T> {

    typedef __declspec(align(8)) T type;

};

template <typename T> struct Vs2013Aligned<16, T> {

    typedef __declspec(align(16)) T type;

};

template <typename T> struct Vs2013Aligned<32, T> {

    typedef __declspec(align(32)) T type;

};

template <typename T> struct Vs2013Aligned<64, T> {

    typedef __declspec(align(64)) T type;

};

template <typename T> struct Vs2013Aligned<128, T> {

    typedef __declspec(align(128)) T type;

};

template <typename T> struct Vs2013Aligned<256, T> {

    typedef __declspec(align(256)) T type;

};

#else

template <typename T> struct identity {

    typedef T type;

};

#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)

#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)

#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity<T>::type

#endif

#endif

} // namespace details

} // namespace moodycamel


// TSAN can false report races in lock-free code.  To enable TSAN to be used from projects that use this one,

// we can apply per-function compile-time suppression.

// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer

#define MOODYCAMEL_NO_TSAN

#if defined(__has_feature)

#if __has_feature(thread_sanitizer)

#undef MOODYCAMEL_NO_TSAN

#define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))

#endif // TSAN

#endif // TSAN


// Compiler-specific likely/unlikely hints

namespace moodycamel {

namespace details {

#if defined(__GNUC__)

static inline bool(likely)(bool x)

{

    return __builtin_expect((x), true);

}

static inline bool(unlikely)(bool x)

{

    return __builtin_expect((x), false);

}

#else

static inline bool(likely)(bool x)

{

    return x;

}

static inline bool(unlikely)(bool x)

{

    return x;

}

#endif

} // namespace details

} // namespace moodycamel


#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

#include "internal/concurrentqueue_internal_debug.h"

#endif


namespace moodycamel {

namespace details {

template <typename T> struct const_numeric_max {

    static_assert(std::is_integral<T>::value, "const_numeric_max can only be used with integers");

    static const T value = std::numeric_limits<T>::is_signed

                               ? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1)

                               : static_cast<T>(-1);

};


#if defined(__GLIBCXX__)

typedef ::max_align_t std_max_align_t; // libstdc++ forgot to add it to std:: for a while

#else

typedef std::max_align_t std_max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std::

#endif


// Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting

// 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64.

typedef union {

    std_max_align_t x;

    long long y;

    void* z;

} max_align_t;

} // namespace details


// Default traits for the ConcurrentQueue. To change some of the

// traits without re-implementing all of them, inherit from this

// struct and shadow the declarations you wish to be different;

// since the traits are used as a template type parameter, the

// shadowed declarations will be used where defined, and the defaults

// otherwise.

struct ConcurrentQueueDefaultTraits {

    // General-purpose size type. std::size_t is strongly recommended.

    typedef std::size_t size_t;


    // The type used for the enqueue and dequeue indices. Must be at least as

    // large as size_t. Should be significantly larger than the number of elements

    // you expect to hold at once, especially if you have a high turnover rate;

    // for example, on 32-bit x86, if you expect to have over a hundred million

    // elements or pump several million elements through your queue in a very

    // short space of time, using a 32-bit type *may* trigger a race condition.

    // A 64-bit int type is recommended in that case, and in practice will

    // prevent a race condition no matter the usage of the queue. Note that

    // whether the queue is lock-free with a 64-int type depends on the whether

    // std::atomic<std::uint64_t> is lock-free, which is platform-specific.

    typedef std::size_t index_t;


    // Internally, all elements are enqueued and dequeued from multi-element

    // blocks; this is the smallest controllable unit. If you expect few elements

    // but many producers, a smaller block size should be favoured. For few producers

    // and/or many elements, a larger block size is preferred. A sane default

    // is provided. Must be a power of 2.

    static const size_t BLOCK_SIZE = 32;


    // For explicit producers (i.e. when using a producer token), the block is

    // checked for being empty by iterating through a list of flags, one per element.

    // For large block sizes, this is too inefficient, and switching to an atomic

    // counter-based approach is faster. The switch is made for block sizes strictly

    // larger than this threshold.

    static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;


    // How many full blocks can be expected for a single explicit producer? This should

    // reflect that number's maximum for optimal performance. Must be a power of 2.

    static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;


    // How many full blocks can be expected for a single implicit producer? This should

    // reflect that number's maximum for optimal performance. Must be a power of 2.

    static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;


    // The initial size of the hash table mapping thread IDs to implicit producers.

    // Note that the hash is resized every time it becomes half full.

    // Must be a power of two, and either 0 or at least 1. If 0, implicit production

    // (using the enqueue methods without an explicit producer token) is disabled.

    static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;


    // Controls the number of items that an explicit consumer (i.e. one with a token)

    // must consume before it causes all consumers to rotate and move on to the next

    // internal queue.

    static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;


    // The maximum number of elements (inclusive) that can be enqueued to a sub-queue.

    // Enqueue operations that would cause this limit to be surpassed will fail. Note

    // that this limit is enforced at the block level (for performance reasons), i.e.

    // it's rounded up to the nearest block size.

    static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value;


    // The number of times to spin before sleeping when waiting on a semaphore.

    // Recommended values are on the order of 1000-10000 unless the number of

    // consumer threads exceeds the number of idle cores (in which case try 0-100).

    // Only affects instances of the BlockingConcurrentQueue.

    static const int MAX_SEMA_SPINS = 10000;


    // Whether to recycle dynamically-allocated blocks into an internal free list or

    // not. If false, only pre-allocated blocks (controlled by the constructor

    // arguments) will be recycled, and all others will be `free`d back to the heap.

    // Note that blocks consumed by explicit producers are only freed on destruction

    // of the queue (not following destruction of the token) regardless of this trait.

    static const bool RECYCLE_ALLOCATED_BLOCKS = false;


#ifndef MCDBGQ_USE_RELACY

    // Memory allocation can be customized if needed.

    // malloc should return nullptr on failure, and handle alignment like std::malloc.

#if defined(malloc) || defined(free)

    // Gah, this is 2015, stop defining macros that break standard code already!

    // Work around malloc/free being special macros:

    static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); }

    static inline void WORKAROUND_free(void* ptr) { return free(ptr); }

    static inline void*(malloc)(size_t size) { return WORKAROUND_malloc(size); }

    static inline void(free)(void* ptr) { return WORKAROUND_free(ptr); }

#else

    static inline void* malloc(size_t size) { return std::malloc(size); }

    static inline void free(void* ptr) { return std::free(ptr); }

#endif

#else

    // Debug versions when running under the Relacy race detector (ignore

    // these in user code)

    static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); }

    static inline void free(void* ptr) { return rl::rl_free(ptr, $); }

#endif

};


// When producing or consuming many elements, the most efficient way is to:

//    1) Use one of the bulk-operation methods of the queue with a token

//    2) Failing that, use the bulk-operation methods without a token

//    3) Failing that, create a token and use that with the single-item methods

//    4) Failing that, use the single-parameter methods of the queue

// Having said that, don't create tokens willy-nilly -- ideally there should be

// a maximum of one token per thread (of each kind).

struct ProducerToken;

struct ConsumerToken;


template <typename T, typename Traits> class ConcurrentQueue;

template <typename T, typename Traits> class BlockingConcurrentQueue;

class ConcurrentQueueTests;


namespace details {

struct ConcurrentQueueProducerTypelessBase {

    ConcurrentQueueProducerTypelessBase* next;

    std::atomic<bool> inactive;

    ProducerToken* token;


    ConcurrentQueueProducerTypelessBase()

        : next(nullptr)

        , inactive(false)

        , token(nullptr)

    {}

};


template <bool use32> struct _hash_32_or_64 {

    static inline std::uint32_t hash(std::uint32_t h)

    {

        // MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp

        // Since the thread ID is already unique, all we really want to do is propagate that

        // uniqueness evenly across all the bits, so that we can use a subset of the bits while

        // reducing collisions significantly

        h ^= h >> 16;

        h *= 0x85ebca6b;

        h ^= h >> 13;

        h *= 0xc2b2ae35;

        return h ^ (h >> 16);

    }

};

template <> struct _hash_32_or_64<1> {

    static inline std::uint64_t hash(std::uint64_t h)

    {

        h ^= h >> 33;

        h *= 0xff51afd7ed558ccd;

        h ^= h >> 33;

        h *= 0xc4ceb9fe1a85ec53;

        return h ^ (h >> 33);

    }

};

template <std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {};


static inline size_t hash_thread_id(thread_id_t id)

{

    static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");

    return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(

        thread_id_converter<thread_id_t>::prehash(id)));

}


template <typename T> static inline bool circular_less_than(T a, T b)

{

    static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed,

                  "circular_less_than is intended to be used only with unsigned integer types");

    return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) << (static_cast<T>(sizeof(T) * CHAR_BIT - 1)));

    // Note: extra parens around rhs of operator<< is MSVC bug:

    // https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931

    //       silencing the bug requires #pragma warning(disable: 4554) around the calling code and has no effect when

    //       done here.

}


template <typename U> static inline char* align_for(char* ptr)

{

    const std::size_t alignment = std::alignment_of<U>::value;

    return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;

}


template <typename T> static inline T ceil_to_pow_2(T x)

{

    static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed,

                  "ceil_to_pow_2 is intended to be used only with unsigned integer types");


    // Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2

    --x;

    x |= x >> 1;

    x |= x >> 2;

    x |= x >> 4;

    for (std::size_t i = 1; i < sizeof(T); i <<= 1) {

        x |= x >> (i << 3);

    }

    ++x;

    return x;

}


template <typename T> static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right)

{

    T temp = std::move(left.load(std::memory_order_relaxed));

    left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);

    right.store(std::move(temp), std::memory_order_relaxed);

}


template <typename T> static inline T const& nomove(T const& x)

{

    return x;

}


template <bool Enable> struct nomove_if {

    template <typename T> static inline T const& eval(T const& x) { return x; }

};


template <> struct nomove_if<false> {

    template <typename U> static inline auto eval(U&& x) -> decltype(std::forward<U>(x)) { return std::forward<U>(x); }

};


template <typename It> static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT->decltype(*it)

{

    return *it;

}


#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)

template <typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> {};

#else

template <typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> {};

#endif


#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

#ifdef MCDBGQ_USE_RELACY

typedef RelacyThreadExitListener ThreadExitListener;

typedef RelacyThreadExitNotifier ThreadExitNotifier;

#else

class ThreadExitNotifier;


struct ThreadExitListener {

    typedef void (*callback_t)(void*);

    callback_t callback;

    void* userData;


    ThreadExitListener* next;  // reserved for use by the ThreadExitNotifier

    ThreadExitNotifier* chain; // reserved for use by the ThreadExitNotifier

};


class ThreadExitNotifier {

  public:

    static void subscribe(ThreadExitListener* listener)

    {

        auto& tlsInst = instance();

        std::lock_guard<std::mutex> guard(mutex());

        listener->next = tlsInst.tail;

        listener->chain = &tlsInst;

        tlsInst.tail = listener;

    }


    static void unsubscribe(ThreadExitListener* listener)

    {

        std::lock_guard<std::mutex> guard(mutex());

        if (!listener->chain) {

            return; // race with ~ThreadExitNotifier

        }

        auto& tlsInst = *listener->chain;

        listener->chain = nullptr;

        ThreadExitListener** prev = &tlsInst.tail;

        for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {

            if (ptr == listener) {

                *prev = ptr->next;

                break;

            }

            prev = &ptr->next;

        }

    }


  private:

    ThreadExitNotifier()

        : tail(nullptr)

    {}

    ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;

    ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;


    ~ThreadExitNotifier()

    {

        // This thread is about to exit, let everyone know!

        assert(this == &instance() &&

               "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that "

               "MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");

        std::lock_guard<std::mutex> guard(mutex());

        for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {

            ptr->chain = nullptr;

            ptr->callback(ptr->userData);

        }

    }


    // Thread-local

    static inline ThreadExitNotifier& instance()

    {

        static thread_local ThreadExitNotifier notifier;

        return notifier;

    }


    static inline std::mutex& mutex()

    {

        // Must be static because the ThreadExitNotifier could be destroyed while unsubscribe is called

        static std::mutex mutex;

        return mutex;

    }


  private:

    ThreadExitListener* tail;

};

#endif

#endif


template <typename T> struct static_is_lock_free_num {

    enum { value = 0 };

};

template <> struct static_is_lock_free_num<signed char> {

    enum { value = ATOMIC_CHAR_LOCK_FREE };

};

template <> struct static_is_lock_free_num<short> {

    enum { value = ATOMIC_SHORT_LOCK_FREE };

};

template <> struct static_is_lock_free_num<int> {

    enum { value = ATOMIC_INT_LOCK_FREE };

};

template <> struct static_is_lock_free_num<long> {

    enum { value = ATOMIC_LONG_LOCK_FREE };

};

template <> struct static_is_lock_free_num<long long> {

    enum { value = ATOMIC_LLONG_LOCK_FREE };

};

template <typename T> struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type> {};

template <> struct static_is_lock_free<bool> {

    enum { value = ATOMIC_BOOL_LOCK_FREE };

};

template <typename U> struct static_is_lock_free<U*> {

    enum { value = ATOMIC_POINTER_LOCK_FREE };

};

} // namespace details


struct ProducerToken {

    template <typename T, typename Traits> explicit ProducerToken(ConcurrentQueue<T, Traits>& queue);


    template <typename T, typename Traits> explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue);


    ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT : producer(other.producer)

    {

        other.producer = nullptr;

        if (producer != nullptr) {

            producer->token = this;

        }

    }


    inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT

    {

        swap(other);

        return *this;

    }


    void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT

    {

        std::swap(producer, other.producer);

        if (producer != nullptr) {

            producer->token = this;

        }

        if (other.producer != nullptr) {

            other.producer->token = &other;

        }

    }


    // A token is always valid unless:

    //     1) Memory allocation failed during construction

    //     2) It was moved via the move constructor

    //        (Note: assignment does a swap, leaving both potentially valid)

    //     3) The associated queue was destroyed

    // Note that if valid() returns true, that only indicates

    // that the token is valid for use with a specific queue,

    // but not which one; that's up to the user to track.

    inline bool valid() const { return producer != nullptr; }


    ~ProducerToken()

    {

        if (producer != nullptr) {

            producer->token = nullptr;

            producer->inactive.store(true, std::memory_order_release);

        }

    }


    // Disable copying and assignment

    ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;

    ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;


  private:

    template <typename T, typename Traits> friend class ConcurrentQueue;

    friend class ConcurrentQueueTests;


  protected:

    details::ConcurrentQueueProducerTypelessBase* producer;

};


struct ConsumerToken {

    template <typename T, typename Traits> explicit ConsumerToken(ConcurrentQueue<T, Traits>& q);


    template <typename T, typename Traits> explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q);


    ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT : initialOffset(other.initialOffset),

                                                               lastKnownGlobalOffset(other.lastKnownGlobalOffset),

                                                               itemsConsumedFromCurrent(other.itemsConsumedFromCurrent),

                                                               currentProducer(other.currentProducer),

                                                               desiredProducer(other.desiredProducer)

    {}


    inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT

    {

        swap(other);

        return *this;

    }


    void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT

    {

        std::swap(initialOffset, other.initialOffset);

        std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);

        std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);

        std::swap(currentProducer, other.currentProducer);

        std::swap(desiredProducer, other.desiredProducer);

    }


    // Disable copying and assignment

    ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;

    ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;


  private:

    template <typename T, typename Traits> friend class ConcurrentQueue;

    friend class ConcurrentQueueTests;


  private: // but shared with ConcurrentQueue

    std::uint32_t initialOffset;

    std::uint32_t lastKnownGlobalOffset;

    std::uint32_t itemsConsumedFromCurrent;

    details::ConcurrentQueueProducerTypelessBase* currentProducer;

    details::ConcurrentQueueProducerTypelessBase* desiredProducer;

};


// Need to forward-declare this swap because it's in a namespace.

// See

// http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces

template <typename T, typename Traits>

inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a,

                 typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT;


template <typename T, typename Traits = ConcurrentQueueDefaultTraits> class ConcurrentQueue {

  public:

    typedef ::moodycamel::ProducerToken producer_token_t;

    typedef ::moodycamel::ConsumerToken consumer_token_t;


    typedef typename Traits::index_t index_t;

    typedef typename Traits::size_t size_t;


    static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);

    static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD =

        static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);

    static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);

    static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);

    static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE =

        static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);

    static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE =

        static_cast<std::uint32_t>(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);

#ifdef _MSC_VER

#pragma warning(push)

#pragma warning(disable : 4307) // + integral constant overflow (that's what the ternary expression is for!)

#pragma warning(disable : 4309) // static_cast: Truncation of constant value

#endif

    static const size_t MAX_SUBQUEUE_SIZE =

        (details::const_numeric_max<size_t>::value - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE)

            ? details::const_numeric_max<size_t>::value

            : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE);

#ifdef _MSC_VER

#pragma warning(pop)

#endif


    static_assert(!std::numeric_limits<size_t>::is_signed && std::is_integral<size_t>::value,

                  "Traits::size_t must be an unsigned integral type");

    static_assert(!std::numeric_limits<index_t>::is_signed && std::is_integral<index_t>::value,

                  "Traits::index_t must be an unsigned integral type");

    static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t");

    static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)),

                  "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");

    static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) &&

                      !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)),

                  "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)");

    static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) &&

                      !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)),

                  "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");

    static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) &&

                      !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)),

                  "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");

    static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) ||

                      !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)),

                  "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");

    static_assert(

        INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1,

        "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)");


  public:

    // Creates a queue with at least `capacity` element slots; note that the

    // actual number of elements that can be inserted without additional memory

    // allocation depends on the number of producers and the block size (e.g. if

    // the block size is equal to `capacity`, only a single block will be allocated

    // up-front, which means only a single producer will be able to enqueue elements

    // without an extra allocation -- blocks aren't shared between producers).

    // This method is not thread safe -- it is up to the user to ensure that the

    // queue is fully constructed before it starts being used by other threads (this

    // includes making the memory effects of construction visible, possibly with a

    // memory barrier).

    explicit ConcurrentQueue(size_t capacity = 32 * BLOCK_SIZE)

        : producerListTail(nullptr)

        , producerCount(0)

        , initialBlockPoolIndex(0)

        , nextExplicitConsumerId(0)

        , globalExplicitConsumerOffset(0)

    {

        implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);

        populate_initial_implicit_producer_hash();

        populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));


#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

        // Track all the producers using a fully-resolved typed list for

        // each kind; this makes it possible to debug them starting from

        // the root queue object (otherwise wacky casts are needed that

        // don't compile in the debugger's expression evaluator).

        explicitProducers.store(nullptr, std::memory_order_relaxed);

        implicitProducers.store(nullptr, std::memory_order_relaxed);

#endif

    }


    // Computes the correct amount of pre-allocated blocks for you based

    // on the minimum number of elements you want available at any given

    // time, and the maximum concurrent number of each type of producer.

    ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)

        : producerListTail(nullptr)

        , producerCount(0)

        , initialBlockPoolIndex(0)

        , nextExplicitConsumerId(0)

        , globalExplicitConsumerOffset(0)

    {

        implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);

        populate_initial_implicit_producer_hash();

        size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) +

                        2 * (maxExplicitProducers + maxImplicitProducers);

        populate_initial_block_list(blocks);


#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

        explicitProducers.store(nullptr, std::memory_order_relaxed);

        implicitProducers.store(nullptr, std::memory_order_relaxed);

#endif

    }


    // Note: The queue should not be accessed concurrently while it's

    // being deleted. It's up to the user to synchronize this.

    // This method is not thread safe.

    ~ConcurrentQueue()

    {

        // Destroy producers

        auto ptr = producerListTail.load(std::memory_order_relaxed);

        while (ptr != nullptr) {

            auto next = ptr->next_prod();

            if (ptr->token != nullptr) {

                ptr->token->producer = nullptr;

            }

            destroy(ptr);

            ptr = next;

        }


        // Destroy implicit producer hash tables

        MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0)

        {

            auto hash = implicitProducerHash.load(std::memory_order_relaxed);

            while (hash != nullptr) {

                auto prev = hash->prev;

                if (prev != nullptr) { // The last hash is part of this object and was not allocated dynamically

                    for (size_t i = 0; i != hash->capacity; ++i) {

                        hash->entries[i].~ImplicitProducerKVP();

                    }

                    hash->~ImplicitProducerHash();

                    (Traits::free)(hash);

                }

                hash = prev;

            }

        }


        // Destroy global free list

        auto block = freeList.head_unsafe();

        while (block != nullptr) {

            auto next = block->freeListNext.load(std::memory_order_relaxed);

            if (block->dynamicallyAllocated) {

                destroy(block);

            }

            block = next;

        }


        // Destroy initial free list

        destroy_array(initialBlockPool, initialBlockPoolSize);

    }


    // Disable copying and copy assignment

    ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;

    ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;


    // Moving is supported, but note that it is *not* a thread-safe operation.

    // Nobody can use the queue while it's being moved, and the memory effects

    // of that move must be propagated to other threads before they can use it.

    // Note: When a queue is moved, its tokens are still valid but can only be

    // used with the destination queue (i.e. semantically they are moved along

    // with the queue itself).

    ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT

        : producerListTail(other.producerListTail.load(std::memory_order_relaxed)),

          producerCount(other.producerCount.load(std::memory_order_relaxed)),

          initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)),

          initialBlockPool(other.initialBlockPool),

          initialBlockPoolSize(other.initialBlockPoolSize),

          freeList(std::move(other.freeList)),

          nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)),

          globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed))

    {

        // Move the other one into this, and leave the other one as an empty queue

        implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);

        populate_initial_implicit_producer_hash();

        swap_implicit_producer_hashes(other);


        other.producerListTail.store(nullptr, std::memory_order_relaxed);

        other.producerCount.store(0, std::memory_order_relaxed);

        other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);

        other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);


#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

        explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);

        other.explicitProducers.store(nullptr, std::memory_order_relaxed);

        implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);

        other.implicitProducers.store(nullptr, std::memory_order_relaxed);

#endif


        other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);

        other.initialBlockPoolSize = 0;

        other.initialBlockPool = nullptr;


        reown_producers();

    }


    inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT { return swap_internal(other); }


    // Swaps this queue's state with the other's. Not thread-safe.

    // Swapping two queues does not invalidate their tokens, however

    // the tokens that were created for one queue must be used with

    // only the swapped queue (i.e. the tokens are tied to the

    // queue's movable state, not the object itself).

    inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT { swap_internal(other); }


  private:

    ConcurrentQueue& swap_internal(ConcurrentQueue& other)

    {

        if (this == &other) {

            return *this;

        }


        details::swap_relaxed(producerListTail, other.producerListTail);

        details::swap_relaxed(producerCount, other.producerCount);

        details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);

        std::swap(initialBlockPool, other.initialBlockPool);

        std::swap(initialBlockPoolSize, other.initialBlockPoolSize);

        freeList.swap(other.freeList);

        details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);

        details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);


        swap_implicit_producer_hashes(other);


        reown_producers();

        other.reown_producers();


#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

        details::swap_relaxed(explicitProducers, other.explicitProducers);

        details::swap_relaxed(implicitProducers, other.implicitProducers);

#endif


        return *this;

    }


  public:

    // Enqueues a single item (by copying it).

    // Allocates memory if required. Only fails if memory allocation fails (or implicit

    // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,

    // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).

    // Thread-safe.

    inline bool enqueue(T const& item)

    {

        MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;

        else return inner_enqueue<CanAlloc>(item);

    }


    // Enqueues a single item (by moving it, if possible).

    // Allocates memory if required. Only fails if memory allocation fails (or implicit

    // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,

    // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).

    // Thread-safe.

    inline bool enqueue(T&& item)

    {

        MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;

        else return inner_enqueue<CanAlloc>(std::move(item));

    }


    // Enqueues a single item (by copying it) using an explicit producer token.

    // Allocates memory if required. Only fails if memory allocation fails (or

    // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).

    // Thread-safe.

    inline bool enqueue(producer_token_t const& token, T const& item) { return inner_enqueue<CanAlloc>(token, item); }


    // Enqueues a single item (by moving it, if possible) using an explicit producer token.

    // Allocates memory if required. Only fails if memory allocation fails (or

    // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).

    // Thread-safe.

    inline bool enqueue(producer_token_t const& token, T&& item)

    {

        return inner_enqueue<CanAlloc>(token, std::move(item));

    }


    // Enqueues several items.

    // Allocates memory if required. Only fails if memory allocation fails (or

    // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE

    // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).

    // Note: Use std::make_move_iterator if the elements should be moved instead of copied.

    // Thread-safe.

    template <typename It> bool enqueue_bulk(It itemFirst, size_t count)

    {

        MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;

        else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);

    }


    // Enqueues several items using an explicit producer token.

    // Allocates memory if required. Only fails if memory allocation fails

    // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).

    // Note: Use std::make_move_iterator if the elements should be moved

    // instead of copied.

    // Thread-safe.

    template <typename It> bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)

    {

        return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);

    }


    // Enqueues a single item (by copying it).

    // Does not allocate memory. Fails if not enough room to enqueue (or implicit

    // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE

    // is 0).

    // Thread-safe.

    inline bool try_enqueue(T const& item)

    {

        MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;

        else return inner_enqueue<CannotAlloc>(item);

    }


    // Enqueues a single item (by moving it, if possible).

    // Does not allocate memory (except for one-time implicit producer).

    // Fails if not enough room to enqueue (or implicit production is

    // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).

    // Thread-safe.

    inline bool try_enqueue(T&& item)

    {

        MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;

        else return inner_enqueue<CannotAlloc>(std::move(item));

    }


    // Enqueues a single item (by copying it) using an explicit producer token.

    // Does not allocate memory. Fails if not enough room to enqueue.

    // Thread-safe.

    inline bool try_enqueue(producer_token_t const& token, T const& item)

    {

        return inner_enqueue<CannotAlloc>(token, item);

    }


    // Enqueues a single item (by moving it, if possible) using an explicit producer token.

    // Does not allocate memory. Fails if not enough room to enqueue.

    // Thread-safe.

    inline bool try_enqueue(producer_token_t const& token, T&& item)

    {

        return inner_enqueue<CannotAlloc>(token, std::move(item));

    }


    // Enqueues several items.

    // Does not allocate memory (except for one-time implicit producer).

    // Fails if not enough room to enqueue (or implicit production is

    // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).

    // Note: Use std::make_move_iterator if the elements should be moved

    // instead of copied.

    // Thread-safe.

    template <typename It> bool try_enqueue_bulk(It itemFirst, size_t count)

    {

        MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;

        else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);

    }


    // Enqueues several items using an explicit producer token.

    // Does not allocate memory. Fails if not enough room to enqueue.

    // Note: Use std::make_move_iterator if the elements should be moved

    // instead of copied.

    // Thread-safe.

    template <typename It> bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)

    {

        return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);

    }


    // Attempts to dequeue from the queue.

    // Returns false if all producer streams appeared empty at the time they

    // were checked (so, the queue is likely but not guaranteed to be empty).

    // Never allocates. Thread-safe.

    template <typename U> bool try_dequeue(U& item)

    {

        // Instead of simply trying each producer in turn (which could cause needless contention on the first

        // producer), we score them heuristically.

        size_t nonEmptyCount = 0;

        ProducerBase* best = nullptr;

        size_t bestSize = 0;

        for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr;

             ptr = ptr->next_prod()) {

            auto size = ptr->size_approx();

            if (size > 0) {

                if (size > bestSize) {

                    bestSize = size;

                    best = ptr;

                }

                ++nonEmptyCount;

            }

        }


        // If there was at least one non-empty queue but it appears empty at the time

        // we try to dequeue from it, we need to make sure every queue's been tried

        if (nonEmptyCount > 0) {

            if ((details::likely)(best->dequeue(item))) {

                return true;

            }

            for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {

                if (ptr != best && ptr->dequeue(item)) {

                    return true;

                }

            }

        }

        return false;

    }


    // Attempts to dequeue from the queue.

    // Returns false if all producer streams appeared empty at the time they

    // were checked (so, the queue is likely but not guaranteed to be empty).

    // This differs from the try_dequeue(item) method in that this one does

    // not attempt to reduce contention by interleaving the order that producer

    // streams are dequeued from. So, using this method can reduce overall throughput

    // under contention, but will give more predictable results in single-threaded

    // consumer scenarios. This is mostly only useful for internal unit tests.

    // Never allocates. Thread-safe.

    template <typename U> bool try_dequeue_non_interleaved(U& item)

    {

        for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {

            if (ptr->dequeue(item)) {

                return true;

            }

        }

        return false;

    }


    // Attempts to dequeue from the queue using an explicit consumer token.

    // Returns false if all producer streams appeared empty at the time they

    // were checked (so, the queue is likely but not guaranteed to be empty).

    // Never allocates. Thread-safe.

    template <typename U> bool try_dequeue(consumer_token_t& token, U& item)

    {

        // The idea is roughly as follows:

        // Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the

        // highest efficiency consumer dictates the rotation speed of everyone else, more or less If you see that the

        // global offset has changed, you must reset your consumption counter and move to your designated place If

        // there's no items where you're supposed to be, keep moving until you find a producer with some items If the

        // global offset has not changed but you've run out of items to consume, move over from your current position

        // until you find an producer with something in it


        if (token.desiredProducer == nullptr ||

            token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {

            if (!update_current_producer_after_rotation(token)) {

                return false;

            }

        }


        // If there was at least one non-empty queue but it appears empty at the time

        // we try to dequeue from it, we need to make sure every queue's been tried

        if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {

            if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {

                globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);

            }

            return true;

        }


        auto tail = producerListTail.load(std::memory_order_acquire);

        auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();

        if (ptr == nullptr) {

            ptr = tail;

        }

        while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {

            if (ptr->dequeue(item)) {

                token.currentProducer = ptr;

                token.itemsConsumedFromCurrent = 1;

                return true;

            }

            ptr = ptr->next_prod();

            if (ptr == nullptr) {

                ptr = tail;

            }

        }

        return false;

    }


    // Attempts to dequeue several elements from the queue.

    // Returns the number of items actually dequeued.

    // Returns 0 if all producer streams appeared empty at the time they

    // were checked (so, the queue is likely but not guaranteed to be empty).

    // Never allocates. Thread-safe.

    template <typename It> size_t try_dequeue_bulk(It itemFirst, size_t max)

    {

        size_t count = 0;

        for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {

            count += ptr->dequeue_bulk(itemFirst, max - count);

            if (count == max) {

                break;

            }

        }

        return count;

    }


    // Attempts to dequeue several elements from the queue using an explicit consumer token.

    // Returns the number of items actually dequeued.

    // Returns 0 if all producer streams appeared empty at the time they

    // were checked (so, the queue is likely but not guaranteed to be empty).

    // Never allocates. Thread-safe.

    template <typename It> size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)

    {

        if (token.desiredProducer == nullptr ||

            token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {

            if (!update_current_producer_after_rotation(token)) {

                return 0;

            }

        }


        size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);

        if (count == max) {

            if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >=

                EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {

                globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);

            }

            return max;

        }

        token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);

        max -= count;


        auto tail = producerListTail.load(std::memory_order_acquire);

        auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();

        if (ptr == nullptr) {

            ptr = tail;

        }

        while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {

            auto dequeued = ptr->dequeue_bulk(itemFirst, max);

            count += dequeued;

            if (dequeued != 0) {

                token.currentProducer = ptr;

                token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);

            }

            if (dequeued == max) {

                break;

            }

            max -= dequeued;

            ptr = ptr->next_prod();

            if (ptr == nullptr) {

                ptr = tail;

            }

        }

        return count;

    }


    // Attempts to dequeue from a specific producer's inner queue.

    // If you happen to know which producer you want to dequeue from, this

    // is significantly faster than using the general-case try_dequeue methods.

    // Returns false if the producer's queue appeared empty at the time it

    // was checked (so, the queue is likely but not guaranteed to be empty).

    // Never allocates. Thread-safe.

    template <typename U> inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item)

    {

        return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);

    }


    // Attempts to dequeue several elements from a specific producer's inner queue.

    // Returns the number of items actually dequeued.

    // If you happen to know which producer you want to dequeue from, this

    // is significantly faster than using the general-case try_dequeue methods.

    // Returns 0 if the producer's queue appeared empty at the time it

    // was checked (so, the queue is likely but not guaranteed to be empty).

    // Never allocates. Thread-safe.

    template <typename It>

    inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max)

    {

        return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);

    }


    // Returns an estimate of the total number of elements currently in the queue. This

    // estimate is only accurate if the queue has completely stabilized before it is called

    // (i.e. all enqueue and dequeue operations have completed and their memory effects are

    // visible on the calling thread, and no further operations start while this method is

    // being called).

    // Thread-safe.

    size_t size_approx() const

    {

        size_t size = 0;

        for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {

            size += ptr->size_approx();

        }

        return size;

    }


    // Returns true if the underlying atomic variables used by

    // the queue are lock-free (they should be on most platforms).

    // Thread-safe.

    static constexpr bool is_lock_free()

    {

        return details::static_is_lock_free<bool>::value == 2 && details::static_is_lock_free<size_t>::value == 2 &&

               details::static_is_lock_free<std::uint32_t>::value == 2 &&

               details::static_is_lock_free<index_t>::value == 2 && details::static_is_lock_free<void*>::value == 2 &&

               details::static_is_lock_free<

                   typename details::thread_id_converter<details::thread_id_t>::thread_id_numeric_size_t>::value == 2;

    }


  private:

    friend struct ProducerToken;

    friend struct ConsumerToken;

    struct ExplicitProducer;

    friend struct ExplicitProducer;

    struct ImplicitProducer;

    friend struct ImplicitProducer;

    friend class ConcurrentQueueTests;


    enum AllocationMode { CanAlloc, CannotAlloc };


    // Queue methods


    template <AllocationMode canAlloc, typename U> inline bool inner_enqueue(producer_token_t const& token, U&& element)

    {

        return static_cast<ExplicitProducer*>(token.producer)

            ->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));

    }


    template <AllocationMode canAlloc, typename U> inline bool inner_enqueue(U&& element)

    {

        auto producer = get_or_add_implicit_producer();

        return producer == nullptr

                   ? false

                   : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));

    }


    template <AllocationMode canAlloc, typename It>

    inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)

    {

        return static_cast<ExplicitProducer*>(token.producer)

            ->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);

    }


    template <AllocationMode canAlloc, typename It> inline bool inner_enqueue_bulk(It itemFirst, size_t count)

    {

        auto producer = get_or_add_implicit_producer();

        return producer == nullptr

                   ? false

                   : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);

    }


    inline bool update_current_producer_after_rotation(consumer_token_t& token)

    {

        // Ah, there's been a rotation, figure out where we should be!

        auto tail = producerListTail.load(std::memory_order_acquire);

        if (token.desiredProducer == nullptr && tail == nullptr) {

            return false;

        }

        auto prodCount = producerCount.load(std::memory_order_relaxed);

        auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed);

        if ((details::unlikely)(token.desiredProducer == nullptr)) {

            // Aha, first time we're dequeueing anything.

            // Figure out our local position

            // Note: offset is from start, not end, but we're traversing from end -- subtract from count first

            std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);

            token.desiredProducer = tail;

            for (std::uint32_t i = 0; i != offset; ++i) {

                token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();

                if (token.desiredProducer == nullptr) {

                    token.desiredProducer = tail;

                }

            }

        }


        std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;

        if (delta >= prodCount) {

            delta = delta % prodCount;

        }

        for (std::uint32_t i = 0; i != delta; ++i) {

            token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();

            if (token.desiredProducer == nullptr) {

                token.desiredProducer = tail;

            }

        }


        token.lastKnownGlobalOffset = globalOffset;

        token.currentProducer = token.desiredProducer;

        token.itemsConsumedFromCurrent = 0;

        return true;

    }


    // Free list


    template <typename N> struct FreeListNode {

        FreeListNode()

            : freeListRefs(0)

            , freeListNext(nullptr)

        {}


        std::atomic<std::uint32_t> freeListRefs;

        std::atomic<N*> freeListNext;

    };


    // A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but

    // simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly

    // speedy under low contention.

    template <typename N> // N must inherit FreeListNode or have the same fields (and initialization of them)

    struct FreeList {

        FreeList()

            : freeListHead(nullptr)

        {}

        FreeList(FreeList&& other)

            : freeListHead(other.freeListHead.load(std::memory_order_relaxed))

        {

            other.freeListHead.store(nullptr, std::memory_order_relaxed);

        }

        void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); }


        FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;

        FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;


        inline void add(N* node)

        {

#ifdef MCDBGQ_NOLOCKFREE_FREELIST

            debug::DebugLock lock(mutex);

#endif

            // We know that the should-be-on-freelist bit is 0 at this point, so it's safe to

            // set it using a fetch_add

            if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) {

                // Oh look! We were the last ones referencing this node, and we know

                // we want to add it to the free list, so let's do it!

                add_knowing_refcount_is_zero(node);

            }

        }


        inline N* try_get()

        {

#ifdef MCDBGQ_NOLOCKFREE_FREELIST

            debug::DebugLock lock(mutex);

#endif

            auto head = freeListHead.load(std::memory_order_acquire);

            while (head != nullptr) {

                auto prevHead = head;

                auto refs = head->freeListRefs.load(std::memory_order_relaxed);

                if ((refs & REFS_MASK) == 0 ||

                    !head->freeListRefs.compare_exchange_strong(

                        refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) {

                    head = freeListHead.load(std::memory_order_acquire);

                    continue;

                }


                // Good, reference count has been incremented (it wasn't at zero), which means we can read the

                // next and not worry about it changing between now and the time we do the CAS

                auto next = head->freeListNext.load(std::memory_order_relaxed);

                if (freeListHead.compare_exchange_strong(

                        head, next, std::memory_order_acquire, std::memory_order_relaxed)) {

                    // Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no

                    // matter the refcount (because nobody else knows it's been taken off yet, it can't have been put

                    // back on).

                    assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0);


                    // Decrease refcount twice, once for our ref, and once for the list's ref

                    head->freeListRefs.fetch_sub(2, std::memory_order_release);

                    return head;

                }


                // OK, the head must have changed on us, but we still need to decrease the refcount we increased.

                // Note that we don't need to release any memory effects, but we do need to ensure that the reference

                // count decrement happens-after the CAS on the head.

                refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);

                if (refs == SHOULD_BE_ON_FREELIST + 1) {

                    add_knowing_refcount_is_zero(prevHead);

                }

            }


            return nullptr;

        }


        // Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)

        N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); }


      private:

        inline void add_knowing_refcount_is_zero(N* node)

        {

            // Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run

            // only one copy of this method per node at a time, i.e. the single thread case), then we know

            // we can safely change the next pointer of the node; however, once the refcount is back above

            // zero, then other threads could increase it (happens under heavy contention, when the refcount

            // goes to zero in between a load and a refcount increment of a node in try_get, then back up to

            // something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS

            // to add the node to the actual list fails, decrease the refcount and leave the add operation to

            // the next thread who puts the refcount back at zero (which could be us, hence the loop).

            auto head = freeListHead.load(std::memory_order_relaxed);

            while (true) {

                node->freeListNext.store(head, std::memory_order_relaxed);

                node->freeListRefs.store(1, std::memory_order_release);

                if (!freeListHead.compare_exchange_strong(

                        head, node, std::memory_order_release, std::memory_order_relaxed)) {

                    // Hmm, the add failed, but we can only try again when the refcount goes back to zero

                    if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) {

                        continue;

                    }

                }

                return;

            }

        }


      private:

        // Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under

        // contention)

        std::atomic<N*> freeListHead;


        static const std::uint32_t REFS_MASK = 0x7FFFFFFF;

        static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;


#ifdef MCDBGQ_NOLOCKFREE_FREELIST

        debug::DebugMutex mutex;

#endif

    };


    // Block


    enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };


    struct Block {

        Block()

            : next(nullptr)

            , elementsCompletelyDequeued(0)

            , freeListRefs(0)

            , freeListNext(nullptr)

            , dynamicallyAllocated(true)

        {

#ifdef MCDBGQ_TRACKMEM

            owner = nullptr;

#endif

        }


        template <InnerQueueContext context> inline bool is_empty() const

        {

            MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD)

            {

                // Check flags

                for (size_t i = 0; i < BLOCK_SIZE; ++i) {

                    if (!emptyFlags[i].load(std::memory_order_relaxed)) {

                        return false;

                    }

                }


                // Aha, empty; make sure we have all other memory effects that happened before the empty flags were set

                std::atomic_thread_fence(std::memory_order_acquire);

                return true;

            }

            else

            {

                // Check counter

                if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) {

                    std::atomic_thread_fence(std::memory_order_acquire);

                    return true;

                }

                assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE);

                return false;

            }

        }


        // Returns true if the block is now empty (does not apply in explicit context)

        template <InnerQueueContext context> inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i)

        {

            MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD)

            {

                // Set flag

                assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(

                    std::memory_order_relaxed));

                emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(

                    true, std::memory_order_release);

                return false;

            }

            else

            {

                // Increment counter

                auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);

                assert(prevVal < BLOCK_SIZE);

                return prevVal == BLOCK_SIZE - 1;

            }

        }


        // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).

        // Returns true if the block is now empty (does not apply in explicit context).

        template <InnerQueueContext context> inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count)

        {

            MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD)

            {

                // Set flags

                std::atomic_thread_fence(std::memory_order_release);

                i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;

                for (size_t j = 0; j != count; ++j) {

                    assert(!emptyFlags[i + j].load(std::memory_order_relaxed));

                    emptyFlags[i + j].store(true, std::memory_order_relaxed);

                }

                return false;

            }

            else

            {

                // Increment counter

                auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release);

                assert(prevVal + count <= BLOCK_SIZE);

                return prevVal + count == BLOCK_SIZE;

            }

        }


        template <InnerQueueContext context> inline void set_all_empty()

        {

            MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD)

            {

                // Set all flags

                for (size_t i = 0; i != BLOCK_SIZE; ++i) {

                    emptyFlags[i].store(true, std::memory_order_relaxed);

                }

            }

            else

            {

                // Reset counter

                elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);

            }

        }


        template <InnerQueueContext context> inline void reset_empty()

        {

            MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD)

            {

                // Reset flags

                for (size_t i = 0; i != BLOCK_SIZE; ++i) {

                    emptyFlags[i].store(false, std::memory_order_relaxed);

                }

            }

            else

            {

                // Reset counter

                elementsCompletelyDequeued.store(0, std::memory_order_relaxed);

            }

        }


        inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT

        {

            return static_cast<T*>(static_cast<void*>(elements)) +

                   static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1));

        }

        inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT

        {

            return static_cast<T const*>(static_cast<void const*>(elements)) +

                   static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1));

        }


      private:

        static_assert(std::alignment_of<T>::value <= sizeof(T),

                      "The queue does not support types with an alignment greater than their size at this time");

        MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements;


      public:

        Block* next;

        std::atomic<size_t> elementsCompletelyDequeued;

        std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];


      public:

        std::atomic<std::uint32_t> freeListRefs;

        std::atomic<Block*> freeListNext;

        bool dynamicallyAllocated; // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'


#ifdef MCDBGQ_TRACKMEM

        void* owner;

#endif

    };

    static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value,

                  "Internal error: Blocks must be at least as aligned as the type they are wrapping");


#ifdef MCDBGQ_TRACKMEM

  public:

    struct MemStats;


  private:

#endif


    // Producer base


    struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase {

        ProducerBase(ConcurrentQueue* parent_, bool isExplicit_)

            : tailIndex(0)

            , headIndex(0)

            , dequeueOptimisticCount(0)

            , dequeueOvercommit(0)

            , tailBlock(nullptr)

            , isExplicit(isExplicit_)

            , parent(parent_)

        {}


        virtual ~ProducerBase() {}


        template <typename U> inline bool dequeue(U& element)

        {

            if (isExplicit) {

                return static_cast<ExplicitProducer*>(this)->dequeue(element);

            } else {

                return static_cast<ImplicitProducer*>(this)->dequeue(element);

            }

        }


        template <typename It> inline size_t dequeue_bulk(It& itemFirst, size_t max)

        {

            if (isExplicit) {

                return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max);

            } else {

                return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max);

            }

        }


        inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }


        inline size_t size_approx() const

        {

            auto tail = tailIndex.load(std::memory_order_relaxed);

            auto head = headIndex.load(std::memory_order_relaxed);

            return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0;

        }


        inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); }


      protected:

        std::atomic<index_t> tailIndex; // Where to enqueue to next

        std::atomic<index_t> headIndex; // Where to dequeue from next


        std::atomic<index_t> dequeueOptimisticCount;

        std::atomic<index_t> dequeueOvercommit;


        Block* tailBlock;


      public:

        bool isExplicit;

        ConcurrentQueue* parent;


      protected:

#ifdef MCDBGQ_TRACKMEM

        friend struct MemStats;

#endif

    };


    // Explicit queue


    struct ExplicitProducer : public ProducerBase {

        explicit ExplicitProducer(ConcurrentQueue* parent_)

            : ProducerBase(parent_, true)

            , blockIndex(nullptr)

            , pr_blockIndexSlotsUsed(0)

            , pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1)

            , pr_blockIndexFront(0)

            , pr_blockIndexEntries(nullptr)

            , pr_blockIndexRaw(nullptr)

        {

            size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;

            if (poolBasedIndexSize > pr_blockIndexSize) {

                pr_blockIndexSize = poolBasedIndexSize;

            }


            new_block_index(

                0); // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE

        }


        ~ExplicitProducer()

        {

            // Destruct any elements not yet dequeued.

            // Since we're in the destructor, we can assume all elements

            // are either completely dequeued or completely not (no halfways).

            if (this->tailBlock != nullptr) { // Note this means there must be a block index too

                // First find the block that's partially dequeued, if any

                Block* halfDequeuedBlock = nullptr;

                if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {

                    // The head's not on a block boundary, meaning a block somewhere is partially dequeued

                    // (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a

                    // boundary)

                    size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1);

                    while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE,

                                                                this->headIndex.load(std::memory_order_relaxed))) {

                        i = (i + 1) & (pr_blockIndexSize - 1);

                    }

                    assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base,

                                                                this->headIndex.load(std::memory_order_relaxed)));

                    halfDequeuedBlock = pr_blockIndexEntries[i].block;

                }


                // Start at the head block (note the first line in the loop gives us the head from the tail on the first

                // iteration)

                auto block = this->tailBlock;

                do {

                    block = block->next;

                    if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {

                        continue;

                    }


                    size_t i = 0; // Offset into block

                    if (block == halfDequeuedBlock) {

                        i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) &

                                                static_cast<index_t>(BLOCK_SIZE - 1));

                    }


                    // Walk through all the items in the block; if this is the tail block, we need to stop when we reach

                    // the tail index

                    auto lastValidIndex =

                        (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0

                            ? BLOCK_SIZE

                            : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) &

                                                  static_cast<index_t>(BLOCK_SIZE - 1));

                    while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {

                        (*block)[i++]->~T();

                    }

                } while (block != this->tailBlock);

            }


            // Destroy all blocks that we own

            if (this->tailBlock != nullptr) {

                auto block = this->tailBlock;

                do {

                    auto nextBlock = block->next;

                    this->parent->add_block_to_free_list(block);

                    block = nextBlock;

                } while (block != this->tailBlock);

            }


            // Destroy the block indices

            auto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw);

            while (header != nullptr) {

                auto prev = static_cast<BlockIndexHeader*>(header->prev);

                header->~BlockIndexHeader();

                (Traits::free)(header);

                header = prev;

            }

        }


        template <AllocationMode allocMode, typename U> inline bool enqueue(U&& element)

        {

            index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);

            index_t newTailIndex = 1 + currentTailIndex;

            if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {

                // We reached the end of a block, start a new one

                auto startBlock = this->tailBlock;

                auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;

                if (this->tailBlock != nullptr &&

                    this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {

                    // We can re-use the block ahead of us, it's empty!

                    this->tailBlock = this->tailBlock->next;

                    this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();


                    // We'll put the block on the block index (guaranteed to be room since we're conceptually removing

                    // the last block from it first -- except instead of removing then adding, we can just overwrite).

                    // Note that there must be a valid block index here, since even if allocation failed in the ctor,

                    // it would have been re-attempted when adding the first block to the queue; since there is such

                    // a block, a block index must have been successfully allocated.

                } else {

                    // Whatever head value we see here is >= the last value we saw here (relatively),

                    // and <= its current value. Since we have the most recent tail, the head must be

                    // <= to it.

                    auto head = this->headIndex.load(std::memory_order_relaxed);

                    assert(!details::circular_less_than<index_t>(currentTailIndex, head));

                    if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) ||

                        (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&

                         (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {

                        // We can't enqueue in another block because there's not enough leeway -- the

                        // tail could surpass the head by the time the block fills up! (Or we'll exceed

                        // the size limit, if the second part of the condition was true.)

                        return false;

                    }

                    // We're going to need a new block; check that the block index has room

                    if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) {

                        // Hmm, the circular block index is already full -- we'll need

                        // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if

                        // the initial allocation failed in the constructor.


                        MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc)

                        {

                            return false;

                        }

                        else if (!new_block_index(pr_blockIndexSlotsUsed))

                        {

                            return false;

                        }

                    }


                    // Insert a new block in the circular linked list

                    auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();

                    if (newBlock == nullptr) {

                        return false;

                    }

#ifdef MCDBGQ_TRACKMEM

                    newBlock->owner = this;

#endif

                    newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();

                    if (this->tailBlock == nullptr) {

                        newBlock->next = newBlock;

                    } else {

                        newBlock->next = this->tailBlock->next;

                        this->tailBlock->next = newBlock;

                    }

                    this->tailBlock = newBlock;

                    ++pr_blockIndexSlotsUsed;

                }


                MOODYCAMEL_CONSTEXPR_IF(

                    !MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element))))

                {

                    // The constructor may throw. We want the element not to appear in the queue in

                    // that case (without corrupting the queue):

                    MOODYCAMEL_TRY

                    {

                        new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));

                    }

                    MOODYCAMEL_CATCH(...)

                    {

                        // Revert change to the current block, but leave the new block available

                        // for next time

                        pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;

                        this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock;

                        MOODYCAMEL_RETHROW;

                    }

                }

                else

                {

                    (void)startBlock;

                    (void)originalBlockIndexSlotsUsed;

                }


                // Add block to block index

                auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];

                entry.base = currentTailIndex;

                entry.block = this->tailBlock;

                blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);

                pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);


                MOODYCAMEL_CONSTEXPR_IF(

                    !MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element))))

                {

                    this->tailIndex.store(newTailIndex, std::memory_order_release);

                    return true;

                }

            }


            // Enqueue

            new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));


            this->tailIndex.store(newTailIndex, std::memory_order_release);

            return true;

        }


        template <typename U> bool dequeue(U& element)

        {

            auto tail = this->tailIndex.load(std::memory_order_relaxed);

            auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);

            if (details::circular_less_than<index_t>(

                    this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {

                // Might be something to dequeue, let's give it a try


                // Note that this if is purely for performance purposes in the common case when the queue is

                // empty and the values are eventually consistent -- we may enter here spuriously.


                // Note that whatever the values of overcommit and tail are, they are not going to change (unless we

                // change them) and must be the same value at this point (inside the if) as when the if condition was

                // evaluated.


                // We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit

                // below. This ensures that whatever the value we got loaded into overcommit, the load of

                // dequeueOptisticCount in the fetch_add below will result in a value at least as recent as that (and

                // therefore at least as large). Note that I believe a compiler (signal) fence here would be sufficient

                // due to the nature of fetch_add (all read-modify-write operations are guaranteed to work on the latest

                // value in the modification order), but unfortunately that can't be shown to be correct using only the

                // C++11 standard. See

                // http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case

                std::atomic_thread_fence(std::memory_order_acquire);


                // Increment optimistic counter, then check if it went over the boundary

                auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);


                // Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is

                // only ever incremented after dequeueOptimisticCount -- this is enforced in the `else` block below),

                // and since we now have a version of dequeueOptimisticCount that is at least as recent as overcommit

                // (due to the release upon incrementing dequeueOvercommit and the acquire above that synchronizes with

                // it), overcommit <= myDequeueCount. However, we can't assert this since both dequeueOptimisticCount

                // and dequeueOvercommit may (independently) overflow; in such a case, though, the logic still holds

                // since the difference between the two is maintained.


                // Note that we reload tail here in case it changed; it will be the same value as before or greater,

                // since this load is sequenced after (happens after) the earlier load above. This is supported by

                // read-read coherency (as defined in the standard), explained here:

                // http://en.cppreference.com/w/cpp/atomic/memory_order

                tail = this->tailIndex.load(std::memory_order_acquire);

                if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {

                    // Guaranteed to be at least one element to dequeue!


                    // Get the index. Note that since there's guaranteed to be at least one element, this

                    // will never exceed tail. We need to do an acquire-release fence here since it's possible

                    // that whatever condition got us to this point was for an earlier enqueued element (that

                    // we already see the memory effects for), but that by the time we increment somebody else

                    // has incremented it, and we need to see the memory effects for *that* element, which is

                    // in such a case is necessarily visible on the thread that incremented it in the first

                    // place with the more current condition (they must have acquired a tail that is at least

                    // as recent).

                    auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);


                    // Determine which block the element is in


                    auto localBlockIndex = blockIndex.load(std::memory_order_acquire);

                    auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);


                    // We need to be careful here about subtracting and dividing because of index wrap-around.

                    // When an index wraps, we need to preserve the sign of the offset when dividing it by the

                    // block size (in order to get a correct signed block count offset in all cases):

                    auto headBase = localBlockIndex->entries[localBlockIndexHead].base;

                    auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);

                    auto offset = static_cast<size_t>(

                        static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) /

                        static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));

                    auto block =

                        localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;


                    // Dequeue

                    auto& el = *((*block)[index]);

                    if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {

                        // Make sure the element is still fully dequeued and destroyed even if the assignment

                        // throws

                        struct Guard {

                            Block* block;

                            index_t index;


                            ~Guard()

                            {

                                (*block)[index]->~T();

                                block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);

                            }

                        } guard = { block, index };


                        element = std::move(el); // NOLINT

                    } else {

                        element = std::move(el); // NOLINT

                        el.~T();                 // NOLINT

                        block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);

                    }


                    return true;

                } else {

                    // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent

                    this->dequeueOvercommit.fetch_add(

                        1, std::memory_order_release); // Release so that the fetch_add on dequeueOptimisticCount is

                                                       // guaranteed to happen before this write

                }

            }


            return false;

        }


        template <AllocationMode allocMode, typename It>

        bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count)

        {

            // First, we need to make sure we have enough room to enqueue all of the elements;

            // this means pre-allocating blocks and putting them in the block index (but only if

            // all the allocations succeeded).

            index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);

            auto startBlock = this->tailBlock;

            auto originalBlockIndexFront = pr_blockIndexFront;

            auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;


            Block* firstAllocatedBlock = nullptr;


            // Figure out how many blocks we'll need to allocate, and do so

            size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) -

                                   ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));

            index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);

            if (blockBaseDiff > 0) {

                // Allocate as many blocks as possible from ahead

                while (blockBaseDiff > 0 && this->tailBlock != nullptr &&

                       this->tailBlock->next != firstAllocatedBlock &&

                       this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {

                    blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);

                    currentTailIndex += static_cast<index_t>(BLOCK_SIZE);


                    this->tailBlock = this->tailBlock->next;

                    firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;


                    auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];

                    entry.base = currentTailIndex;

                    entry.block = this->tailBlock;

                    pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);

                }


                // Now allocate as many blocks as necessary from the block pool

                while (blockBaseDiff > 0) {

                    blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);

                    currentTailIndex += static_cast<index_t>(BLOCK_SIZE);


                    auto head = this->headIndex.load(std::memory_order_relaxed);

                    assert(!details::circular_less_than<index_t>(currentTailIndex, head));

                    bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) ||

                                (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&

                                 (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));

                    if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {

                        MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc)

                        {

                            // Failed to allocate, undo changes (but keep injected blocks)

                            pr_blockIndexFront = originalBlockIndexFront;

                            pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;

                            this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;

                            return false;

                        }

                        else if (full || !new_block_index(originalBlockIndexSlotsUsed))

                        {

                            // Failed to allocate, undo changes (but keep injected blocks)

                            pr_blockIndexFront = originalBlockIndexFront;

                            pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;

                            this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;

                            return false;

                        }


                        // pr_blockIndexFront is updated inside new_block_index, so we need to

                        // update our fallback value too (since we keep the new index even if we

                        // later fail)

                        originalBlockIndexFront = originalBlockIndexSlotsUsed;

                    }


                    // Insert a new block in the circular linked list

                    auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();

                    if (newBlock == nullptr) {

                        pr_blockIndexFront = originalBlockIndexFront;

                        pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;

                        this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;

                        return false;

                    }


#ifdef MCDBGQ_TRACKMEM

                    newBlock->owner = this;

#endif

                    newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();

                    if (this->tailBlock == nullptr) {

                        newBlock->next = newBlock;

                    } else {

                        newBlock->next = this->tailBlock->next;

                        this->tailBlock->next = newBlock;

                    }

                    this->tailBlock = newBlock;

                    firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;


                    ++pr_blockIndexSlotsUsed;


                    auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];

                    entry.base = currentTailIndex;

                    entry.block = this->tailBlock;

                    pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);

                }


                // Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and

                // publish the new block index front

                auto block = firstAllocatedBlock;

                while (true) {

                    block->ConcurrentQueue::Block::template reset_empty<explicit_context>();

                    if (block == this->tailBlock) {

                        break;

                    }

                    block = block->next;

                }


                MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(

                    T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst))))

                {

                    blockIndex.load(std::memory_order_relaxed)

                        ->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);

                }

            }


            // Enqueue, one block at a time

            index_t newTailIndex = startTailIndex + static_cast<index_t>(count);

            currentTailIndex = startTailIndex;

            auto endBlock = this->tailBlock;

            this->tailBlock = startBlock;

            assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr ||

                   count == 0);

            if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {

                this->tailBlock = firstAllocatedBlock;

            }

            while (true) {

                index_t stopIndex =

                    (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);

                if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {

                    stopIndex = newTailIndex;

                }

                MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(

                    T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst))))

                {

                    while (currentTailIndex != stopIndex) {

                        new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);

                    }

                }

                else

                {

                    MOODYCAMEL_TRY

                    {

                        while (currentTailIndex != stopIndex) {

                            // Must use copy constructor even if move constructor is available

                            // because we may have to revert if there's an exception.

                            // Sorry about the horrible templated next line, but it was the only way

                            // to disable moving *at compile time*, which is important because a type

                            // may only define a (noexcept) move constructor, and so calls to the

                            // cctor will not compile, even if they are in an if branch that will never

                            // be executed

                            new ((*this->tailBlock)[currentTailIndex])

                                T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(

                                      T,

                                      decltype(*itemFirst),

                                      new (static_cast<T*>(nullptr))

                                          T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));

                            ++currentTailIndex;

                            ++itemFirst;

                        }

                    }

                    MOODYCAMEL_CATCH(...)

                    {

                        // Oh dear, an exception's been thrown -- destroy the elements that

                        // were enqueued so far and revert the entire bulk operation (we'll keep

                        // any allocated blocks in our linked list for later, though).

                        auto constructedStopIndex = currentTailIndex;

                        auto lastBlockEnqueued = this->tailBlock;


                        pr_blockIndexFront = originalBlockIndexFront;

                        pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;

                        this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;


                        if (!details::is_trivially_destructible<T>::value) {

                            auto block = startBlock;

                            if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {

                                block = firstAllocatedBlock;

                            }

                            currentTailIndex = startTailIndex;

                            while (true) {

                                stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +

                                            static_cast<index_t>(BLOCK_SIZE);

                                if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {

                                    stopIndex = constructedStopIndex;

                                }

                                while (currentTailIndex != stopIndex) {

                                    (*block)[currentTailIndex++]->~T();

                                }

                                if (block == lastBlockEnqueued) {

                                    break;

                                }

                                block = block->next;

                            }

                        }

                        MOODYCAMEL_RETHROW;

                    }

                }


                if (this->tailBlock == endBlock) {

                    assert(currentTailIndex == newTailIndex);

                    break;

                }

                this->tailBlock = this->tailBlock->next;

            }


            MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(

                T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst))))

            {

                if (firstAllocatedBlock != nullptr)

                    blockIndex.load(std::memory_order_relaxed)

                        ->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);

            }


            this->tailIndex.store(newTailIndex, std::memory_order_release);

            return true;

        }


        template <typename It> size_t dequeue_bulk(It& itemFirst, size_t max)

        {

            auto tail = this->tailIndex.load(std::memory_order_relaxed);

            auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);

            auto desiredCount =

                static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));

            if (details::circular_less_than<size_t>(0, desiredCount)) {

                desiredCount = desiredCount < max ? desiredCount : max;

                std::atomic_thread_fence(std::memory_order_acquire);


                auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);


                tail = this->tailIndex.load(std::memory_order_acquire);

                auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));

                if (details::circular_less_than<size_t>(0, actualCount)) {

                    actualCount = desiredCount < actualCount ? desiredCount : actualCount;

                    if (actualCount < desiredCount) {

                        this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);

                    }


                    // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this

                    // will never exceed tail.

                    auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);


                    // Determine which block the first element is in

                    auto localBlockIndex = blockIndex.load(std::memory_order_acquire);

                    auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);


                    auto headBase = localBlockIndex->entries[localBlockIndexHead].base;

                    auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);

                    auto offset = static_cast<size_t>(

                        static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) /

                        static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));

                    auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);


                    // Iterate the blocks and dequeue

                    auto index = firstIndex;

                    do {

                        auto firstIndexInBlock = index;

                        index_t endIndex =

                            (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);

                        endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount),

                                                                        endIndex)

                                       ? firstIndex + static_cast<index_t>(actualCount)

                                       : endIndex;

                        auto block = localBlockIndex->entries[indexIndex].block;

                        if (MOODYCAMEL_NOEXCEPT_ASSIGN(

                                T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {

                            while (index != endIndex) {

                                auto& el = *((*block)[index]);

                                *itemFirst++ = std::move(el);

                                el.~T();

                                ++index;

                            }

                        } else {

                            MOODYCAMEL_TRY

                            {

                                while (index != endIndex) {

                                    auto& el = *((*block)[index]);

                                    *itemFirst = std::move(el);

                                    ++itemFirst;

                                    el.~T();

                                    ++index;

                                }

                            }

                            MOODYCAMEL_CATCH(...)

                            {

                                // It's too late to revert the dequeue, but we can make sure that all

                                // the dequeued objects are properly destroyed and the block index

                                // (and empty count) are properly updated before we propagate the exception

                                do {

                                    block = localBlockIndex->entries[indexIndex].block;

                                    while (index != endIndex) {

                                        (*block)[index++]->~T();

                                    }

                                    block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(

                                        firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));

                                    indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);


                                    firstIndexInBlock = index;

                                    endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +

                                               static_cast<index_t>(BLOCK_SIZE);

                                    endIndex = details::circular_less_than<index_t>(

                                                   firstIndex + static_cast<index_t>(actualCount), endIndex)

                                                   ? firstIndex + static_cast<index_t>(actualCount)

                                                   : endIndex;

                                } while (index != firstIndex + actualCount);


                                MOODYCAMEL_RETHROW;

                            }

                        }

                        block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(

                            firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));

                        indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);

                    } while (index != firstIndex + actualCount);


                    return actualCount;

                } else {

                    // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent

                    this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);

                }

            }


            return 0;

        }


      private:

        struct BlockIndexEntry {

            index_t base;

            Block* block;

        };


        struct BlockIndexHeader {

            size_t size;

            std::atomic<size_t> front; // Current slot (not next, like pr_blockIndexFront)

            BlockIndexEntry* entries;

            void* prev;

        };


        bool new_block_index(size_t numberOfFilledSlotsToExpose)

        {

            auto prevBlockSizeMask = pr_blockIndexSize - 1;


            // Create the new block

            pr_blockIndexSize <<= 1;

            auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) +

                                                                 std::alignment_of<BlockIndexEntry>::value - 1 +

                                                                 sizeof(BlockIndexEntry) * pr_blockIndexSize));

            if (newRawPtr == nullptr) {

                pr_blockIndexSize >>= 1; // Reset to allow graceful retry

                return false;

            }


            auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(

                details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader)));


            // Copy in all the old indices, if any

            size_t j = 0;

            if (pr_blockIndexSlotsUsed != 0) {

                auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;

                do {

                    newBlockIndexEntries[j++] = pr_blockIndexEntries[i];

                    i = (i + 1) & prevBlockSizeMask;

                } while (i != pr_blockIndexFront);

            }


            // Update everything

            auto header = new (newRawPtr) BlockIndexHeader;

            header->size = pr_blockIndexSize;

            header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed);

            header->entries = newBlockIndexEntries;

            header->prev = pr_blockIndexRaw; // we link the new block to the old one so we can free it later


            pr_blockIndexFront = j;

            pr_blockIndexEntries = newBlockIndexEntries;

            pr_blockIndexRaw = newRawPtr;

            blockIndex.store(header, std::memory_order_release);


            return true;

        }


      private:

        std::atomic<BlockIndexHeader*> blockIndex;


        // To be used by producer only -- consumer must use the ones in referenced by blockIndex

        size_t pr_blockIndexSlotsUsed;

        size_t pr_blockIndexSize;

        size_t pr_blockIndexFront; // Next slot (not current)

        BlockIndexEntry* pr_blockIndexEntries;

        void* pr_blockIndexRaw;


#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

      public:

        ExplicitProducer* nextExplicitProducer;


      private:

#endif


#ifdef MCDBGQ_TRACKMEM

        friend struct MemStats;

#endif

    };


    // Implicit queue


    struct ImplicitProducer : public ProducerBase {

        ImplicitProducer(ConcurrentQueue* parent_)

            : ProducerBase(parent_, false)

            , nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE)

            , blockIndex(nullptr)

        {

            new_block_index();

        }


        ~ImplicitProducer()

        {

            // Note that since we're in the destructor we can assume that all enqueue/dequeue operations

            // completed already; this means that all undequeued elements are placed contiguously across

            // contiguous blocks, and that only the first and last remaining blocks can be only partially

            // empty (all other remaining blocks must be completely full).


#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

            // Unregister ourselves for thread termination notification

            if (!this->inactive.load(std::memory_order_relaxed)) {

                details::ThreadExitNotifier::unsubscribe(&threadExitListener);

            }

#endif


            // Destroy all remaining elements!

            auto tail = this->tailIndex.load(std::memory_order_relaxed);

            auto index = this->headIndex.load(std::memory_order_relaxed);

            Block* block = nullptr;

            assert(index == tail || details::circular_less_than(index, tail));

            bool forceFreeLastBlock =

                index != tail; // If we enter the loop, then the last (tail) block will not be freed

            while (index != tail) {

                if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) {

                    if (block != nullptr) {

                        // Free the old block

                        this->parent->add_block_to_free_list(block);

                    }


                    block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);

                }


                ((*block)[index])->~T();

                ++index;

            }

            // Even if the queue is empty, there's still one block that's not on the free list

            // (unless the head index reached the end of it, in which case the tail will be poised

            // to create a new block).

            if (this->tailBlock != nullptr &&

                (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {

                this->parent->add_block_to_free_list(this->tailBlock);

            }


            // Destroy block index

            auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);

            if (localBlockIndex != nullptr) {

                for (size_t i = 0; i != localBlockIndex->capacity; ++i) {

                    localBlockIndex->index[i]->~BlockIndexEntry();

                }

                do {

                    auto prev = localBlockIndex->prev;

                    localBlockIndex->~BlockIndexHeader();

                    (Traits::free)(localBlockIndex);

                    localBlockIndex = prev;

                } while (localBlockIndex != nullptr);

            }

        }


        template <AllocationMode allocMode, typename U> inline bool enqueue(U&& element)

        {

            index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);

            index_t newTailIndex = 1 + currentTailIndex;

            if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {

                // We reached the end of a block, start a new one

                auto head = this->headIndex.load(std::memory_order_relaxed);

                assert(!details::circular_less_than<index_t>(currentTailIndex, head));

                if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) ||

                    (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&

                     (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {

                    return false;

                }

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

                debug::DebugLock lock(mutex);

#endif

                // Find out where we'll be inserting this block in the block index

                BlockIndexEntry* idxEntry;

                if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {

                    return false;

                }


                // Get ahold of a new block

                auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();

                if (newBlock == nullptr) {

                    rewind_block_index_tail();

                    idxEntry->value.store(nullptr, std::memory_order_relaxed);

                    return false;

                }

#ifdef MCDBGQ_TRACKMEM

                newBlock->owner = this;

#endif

                newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();


                MOODYCAMEL_CONSTEXPR_IF(

                    !MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element))))

                {

                    // May throw, try to insert now before we publish the fact that we have this new block

                    MOODYCAMEL_TRY

                    {

                        new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));

                    }

                    MOODYCAMEL_CATCH(...)

                    {

                        rewind_block_index_tail();

                        idxEntry->value.store(nullptr, std::memory_order_relaxed);

                        this->parent->add_block_to_free_list(newBlock);

                        MOODYCAMEL_RETHROW;

                    }

                }


                // Insert the new block into the index

                idxEntry->value.store(newBlock, std::memory_order_relaxed);


                this->tailBlock = newBlock;


                MOODYCAMEL_CONSTEXPR_IF(

                    !MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element))))

                {

                    this->tailIndex.store(newTailIndex, std::memory_order_release);

                    return true;

                }

            }


            // Enqueue

            new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));


            this->tailIndex.store(newTailIndex, std::memory_order_release);

            return true;

        }


        template <typename U> bool dequeue(U& element)

        {

            // See ExplicitProducer::dequeue for rationale and explanation

            index_t tail = this->tailIndex.load(std::memory_order_relaxed);

            index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);

            if (details::circular_less_than<index_t>(

                    this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {

                std::atomic_thread_fence(std::memory_order_acquire);


                index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);

                tail = this->tailIndex.load(std::memory_order_acquire);

                if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {

                    index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);


                    // Determine which block the element is in

                    auto entry = get_block_index_entry_for_index(index);


                    // Dequeue

                    auto block = entry->value.load(std::memory_order_relaxed);

                    auto& el = *((*block)[index]);


                    if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

                        // Note: Acquiring the mutex with every dequeue instead of only when a block

                        // is released is very sub-optimal, but it is, after all, purely debug code.

                        debug::DebugLock lock(producer->mutex);

#endif

                        struct Guard {

                            Block* block;

                            index_t index;

                            BlockIndexEntry* entry;

                            ConcurrentQueue* parent;


                            ~Guard()

                            {

                                (*block)[index]->~T();

                                if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {

                                    entry->value.store(nullptr, std::memory_order_relaxed);

                                    parent->add_block_to_free_list(block);

                                }

                            }

                        } guard = { block, index, entry, this->parent };


                        element = std::move(el); // NOLINT

                    } else {

                        element = std::move(el); // NOLINT

                        el.~T();                 // NOLINT


                        if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {

                            {

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

                                debug::DebugLock lock(mutex);

#endif

                                // Add the block back into the global free pool (and remove from block index)

                                entry->value.store(nullptr, std::memory_order_relaxed);

                            }

                            this->parent->add_block_to_free_list(block); // releases the above store

                        }

                    }


                    return true;

                } else {

                    this->dequeueOvercommit.fetch_add(1, std::memory_order_release);

                }

            }


            return false;

        }


#ifdef _MSC_VER

#pragma warning(push)

#pragma warning(disable : 4706) // assignment within conditional expression

#endif

        template <AllocationMode allocMode, typename It> bool enqueue_bulk(It itemFirst, size_t count)

        {

            // First, we need to make sure we have enough room to enqueue all of the elements;

            // this means pre-allocating blocks and putting them in the block index (but only if

            // all the allocations succeeded).


            // Note that the tailBlock we start off with may not be owned by us any more;

            // this happens if it was filled up exactly to the top (setting tailIndex to

            // the first index of the next block which is not yet allocated), then dequeued

            // completely (putting it on the free list) before we enqueue again.


            index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);

            auto startBlock = this->tailBlock;

            Block* firstAllocatedBlock = nullptr;

            auto endBlock = this->tailBlock;


            // Figure out how many blocks we'll need to allocate, and do so

            size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) -

                                   ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));

            index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);

            if (blockBaseDiff > 0) {

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

                debug::DebugLock lock(mutex);

#endif

                do {

                    blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);

                    currentTailIndex += static_cast<index_t>(BLOCK_SIZE);


                    // Find out where we'll be inserting this block in the block index

                    BlockIndexEntry* idxEntry =

                        nullptr; // initialization here unnecessary but compiler can't always tell

                    Block* newBlock;

                    bool indexInserted = false;

                    auto head = this->headIndex.load(std::memory_order_relaxed);

                    assert(!details::circular_less_than<index_t>(currentTailIndex, head));

                    bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) ||

                                (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&

                                 (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));


                    if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) ||

                        (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) ==

                            nullptr) {

                        // Index allocation or block allocation failed; revert any other allocations

                        // and index insertions done so far for this operation

                        if (indexInserted) {

                            rewind_block_index_tail();

                            idxEntry->value.store(nullptr, std::memory_order_relaxed);

                        }

                        currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);

                        for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {

                            currentTailIndex += static_cast<index_t>(BLOCK_SIZE);

                            idxEntry = get_block_index_entry_for_index(currentTailIndex);

                            idxEntry->value.store(nullptr, std::memory_order_relaxed);

                            rewind_block_index_tail();

                        }

                        this->parent->add_blocks_to_free_list(firstAllocatedBlock);

                        this->tailBlock = startBlock;


                        return false;

                    }


#ifdef MCDBGQ_TRACKMEM

                    newBlock->owner = this;

#endif

                    newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();

                    newBlock->next = nullptr;


                    // Insert the new block into the index

                    idxEntry->value.store(newBlock, std::memory_order_relaxed);


                    // Store the chain of blocks so that we can undo if later allocations fail,

                    // and so that we can find the blocks when we do the actual enqueueing

                    if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 ||

                        firstAllocatedBlock != nullptr) {

                        assert(this->tailBlock != nullptr);

                        this->tailBlock->next = newBlock;

                    }

                    this->tailBlock = newBlock;

                    endBlock = newBlock;

                    firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;

                } while (blockBaseDiff > 0);

            }


            // Enqueue, one block at a time

            index_t newTailIndex = startTailIndex + static_cast<index_t>(count);

            currentTailIndex = startTailIndex;

            this->tailBlock = startBlock;

            assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr ||

                   count == 0);

            if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {

                this->tailBlock = firstAllocatedBlock;

            }

            while (true) {

                index_t stopIndex =

                    (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);

                if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {

                    stopIndex = newTailIndex;

                }

                MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(

                    T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst))))

                {

                    while (currentTailIndex != stopIndex) {

                        new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);

                    }

                }

                else

                {

                    MOODYCAMEL_TRY

                    {

                        while (currentTailIndex != stopIndex) {

                            new ((*this->tailBlock)[currentTailIndex])

                                T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(

                                      T,

                                      decltype(*itemFirst),

                                      new (static_cast<T*>(nullptr))

                                          T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));

                            ++currentTailIndex;

                            ++itemFirst;

                        }

                    }

                    MOODYCAMEL_CATCH(...)

                    {

                        auto constructedStopIndex = currentTailIndex;

                        auto lastBlockEnqueued = this->tailBlock;


                        if (!details::is_trivially_destructible<T>::value) {

                            auto block = startBlock;

                            if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {

                                block = firstAllocatedBlock;

                            }

                            currentTailIndex = startTailIndex;

                            while (true) {

                                stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +

                                            static_cast<index_t>(BLOCK_SIZE);

                                if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {

                                    stopIndex = constructedStopIndex;

                                }

                                while (currentTailIndex != stopIndex) {

                                    (*block)[currentTailIndex++]->~T();

                                }

                                if (block == lastBlockEnqueued) {

                                    break;

                                }

                                block = block->next;

                            }

                        }


                        currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);

                        for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {

                            currentTailIndex += static_cast<index_t>(BLOCK_SIZE);

                            auto idxEntry = get_block_index_entry_for_index(currentTailIndex);

                            idxEntry->value.store(nullptr, std::memory_order_relaxed);

                            rewind_block_index_tail();

                        }

                        this->parent->add_blocks_to_free_list(firstAllocatedBlock);

                        this->tailBlock = startBlock;

                        MOODYCAMEL_RETHROW;

                    }

                }


                if (this->tailBlock == endBlock) {

                    assert(currentTailIndex == newTailIndex);

                    break;

                }

                this->tailBlock = this->tailBlock->next;

            }

            this->tailIndex.store(newTailIndex, std::memory_order_release);

            return true;

        }

#ifdef _MSC_VER

#pragma warning(pop)

#endif


        template <typename It> size_t dequeue_bulk(It& itemFirst, size_t max)

        {

            auto tail = this->tailIndex.load(std::memory_order_relaxed);

            auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);

            auto desiredCount =

                static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));

            if (details::circular_less_than<size_t>(0, desiredCount)) {

                desiredCount = desiredCount < max ? desiredCount : max;

                std::atomic_thread_fence(std::memory_order_acquire);


                auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);


                tail = this->tailIndex.load(std::memory_order_acquire);

                auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));

                if (details::circular_less_than<size_t>(0, actualCount)) {

                    actualCount = desiredCount < actualCount ? desiredCount : actualCount;

                    if (actualCount < desiredCount) {

                        this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);

                    }


                    // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this

                    // will never exceed tail.

                    auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);


                    // Iterate the blocks and dequeue

                    auto index = firstIndex;

                    BlockIndexHeader* localBlockIndex;

                    auto indexIndex = get_block_index_index_for_index(index, localBlockIndex);

                    do {

                        auto blockStartIndex = index;

                        index_t endIndex =

                            (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);

                        endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount),

                                                                        endIndex)

                                       ? firstIndex + static_cast<index_t>(actualCount)

                                       : endIndex;


                        auto entry = localBlockIndex->index[indexIndex];

                        auto block = entry->value.load(std::memory_order_relaxed);

                        if (MOODYCAMEL_NOEXCEPT_ASSIGN(

                                T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {

                            while (index != endIndex) {

                                auto& el = *((*block)[index]);

                                *itemFirst++ = std::move(el);

                                el.~T();

                                ++index;

                            }

                        } else {

                            MOODYCAMEL_TRY

                            {

                                while (index != endIndex) {

                                    auto& el = *((*block)[index]);

                                    *itemFirst = std::move(el);

                                    ++itemFirst;

                                    el.~T();

                                    ++index;

                                }

                            }

                            MOODYCAMEL_CATCH(...)

                            {

                                do {

                                    entry = localBlockIndex->index[indexIndex];

                                    block = entry->value.load(std::memory_order_relaxed);

                                    while (index != endIndex) {

                                        (*block)[index++]->~T();

                                    }


                                    if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(

                                            blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

                                        debug::DebugLock lock(mutex);

#endif

                                        entry->value.store(nullptr, std::memory_order_relaxed);

                                        this->parent->add_block_to_free_list(block);

                                    }

                                    indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);


                                    blockStartIndex = index;

                                    endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +

                                               static_cast<index_t>(BLOCK_SIZE);

                                    endIndex = details::circular_less_than<index_t>(

                                                   firstIndex + static_cast<index_t>(actualCount), endIndex)

                                                   ? firstIndex + static_cast<index_t>(actualCount)

                                                   : endIndex;

                                } while (index != firstIndex + actualCount);


                                MOODYCAMEL_RETHROW;

                            }

                        }

                        if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(

                                blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {

                            {

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

                                debug::DebugLock lock(mutex);

#endif

                                // Note that the set_many_empty above did a release, meaning that anybody who acquires

                                // the block we're about to free can use it safely since our writes (and reads!) will

                                // have happened-before then.

                                entry->value.store(nullptr, std::memory_order_relaxed);

                            }

                            this->parent->add_block_to_free_list(block); // releases the above store

                        }

                        indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);

                    } while (index != firstIndex + actualCount);


                    return actualCount;

                } else {

                    this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);

                }

            }


            return 0;

        }


      private:

        // The block size must be > 1, so any number with the low bit set is an invalid block base index

        static const index_t INVALID_BLOCK_BASE = 1;


        struct BlockIndexEntry {

            std::atomic<index_t> key;

            std::atomic<Block*> value;

        };


        struct BlockIndexHeader {

            size_t capacity;

            std::atomic<size_t> tail;

            BlockIndexEntry* entries;

            BlockIndexEntry** index;

            BlockIndexHeader* prev;

        };


        template <AllocationMode allocMode>

        inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex)

        {

            auto localBlockIndex =

                blockIndex.load(std::memory_order_relaxed); // We're the only writer thread, relaxed is OK

            if (localBlockIndex == nullptr) {

                return false; // this can happen if new_block_index failed in the constructor

            }

            size_t newTail =

                (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);

            idxEntry = localBlockIndex->index[newTail];

            if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||

                idxEntry->value.load(std::memory_order_relaxed) == nullptr) {


                idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);

                localBlockIndex->tail.store(newTail, std::memory_order_release);

                return true;

            }


            // No room in the old block index, try to allocate another one!

            MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc)

            {

                return false;

            }

            else if (!new_block_index())

            {

                return false;

            }

            else

            {

                localBlockIndex = blockIndex.load(std::memory_order_relaxed);

                newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);

                idxEntry = localBlockIndex->index[newTail];

                assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);

                idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);

                localBlockIndex->tail.store(newTail, std::memory_order_release);

                return true;

            }

        }


        inline void rewind_block_index_tail()

        {

            auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);

            localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) &

                                            (localBlockIndex->capacity - 1),

                                        std::memory_order_relaxed);

        }


        inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const

        {

            BlockIndexHeader* localBlockIndex;

            auto idx = get_block_index_index_for_index(index, localBlockIndex);

            return localBlockIndex->index[idx];

        }


        inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const

        {

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

            debug::DebugLock lock(mutex);

#endif

            index &= ~static_cast<index_t>(BLOCK_SIZE - 1);

            localBlockIndex = blockIndex.load(std::memory_order_acquire);

            auto tail = localBlockIndex->tail.load(std::memory_order_acquire);

            auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);

            assert(tailBase != INVALID_BLOCK_BASE);

            // Note: Must use division instead of shift because the index may wrap around, causing a negative

            // offset, whose negativity we want to preserve

            auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) /

                                              static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));

            size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);

            assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index &&

                   localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);

            return idx;

        }


        bool new_block_index()

        {

            auto prev = blockIndex.load(std::memory_order_relaxed);

            size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;

            auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;

            auto raw = static_cast<char*>(

                (Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 +

                                 sizeof(BlockIndexEntry) * entryCount + std::alignment_of<BlockIndexEntry*>::value - 1 +

                                 sizeof(BlockIndexEntry*) * nextBlockIndexCapacity));

            if (raw == nullptr) {

                return false;

            }


            auto header = new (raw) BlockIndexHeader;

            auto entries =

                reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));

            auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(

                reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount));

            if (prev != nullptr) {

                auto prevTail = prev->tail.load(std::memory_order_relaxed);

                auto prevPos = prevTail;

                size_t i = 0;

                do {

                    prevPos = (prevPos + 1) & (prev->capacity - 1);

                    index[i++] = prev->index[prevPos];

                } while (prevPos != prevTail);

                assert(i == prevCapacity);

            }

            for (size_t i = 0; i != entryCount; ++i) {

                new (entries + i) BlockIndexEntry;

                entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);

                index[prevCapacity + i] = entries + i;

            }

            header->prev = prev;

            header->entries = entries;

            header->index = index;

            header->capacity = nextBlockIndexCapacity;

            header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed);


            blockIndex.store(header, std::memory_order_release);


            nextBlockIndexCapacity <<= 1;


            return true;

        }


      private:

        size_t nextBlockIndexCapacity;

        std::atomic<BlockIndexHeader*> blockIndex;


#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

      public:

        details::ThreadExitListener threadExitListener;


      private:

#endif


#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

      public:

        ImplicitProducer* nextImplicitProducer;


      private:

#endif


#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

        mutable debug::DebugMutex mutex;

#endif

#ifdef MCDBGQ_TRACKMEM

        friend struct MemStats;

#endif

    };


    // Block pool manipulation


    void populate_initial_block_list(size_t blockCount)

    {

        initialBlockPoolSize = blockCount;

        if (initialBlockPoolSize == 0) {

            initialBlockPool = nullptr;

            return;

        }


        initialBlockPool = create_array<Block>(blockCount);

        if (initialBlockPool == nullptr) {

            initialBlockPoolSize = 0;

        }

        for (size_t i = 0; i < initialBlockPoolSize; ++i) {

            initialBlockPool[i].dynamicallyAllocated = false;

        }

    }


    inline Block* try_get_block_from_initial_pool()

    {

        if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) {

            return nullptr;

        }


        auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);


        return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;

    }


    inline void add_block_to_free_list(Block* block)

    {

#ifdef MCDBGQ_TRACKMEM

        block->owner = nullptr;

#endif

        if (!Traits::RECYCLE_ALLOCATED_BLOCKS && block->dynamicallyAllocated) {

            destroy(block);

        } else {

            freeList.add(block);

        }

    }


    inline void add_blocks_to_free_list(Block* block)

    {

        while (block != nullptr) {

            auto next = block->next;

            add_block_to_free_list(block);

            block = next;

        }

    }


    inline Block* try_get_block_from_free_list() { return freeList.try_get(); }


    // Gets a free block from one of the memory pools, or allocates a new one (if applicable)

    template <AllocationMode canAlloc> Block* requisition_block()

    {

        auto block = try_get_block_from_initial_pool();

        if (block != nullptr) {

            return block;

        }


        block = try_get_block_from_free_list();

        if (block != nullptr) {

            return block;

        }


        MOODYCAMEL_CONSTEXPR_IF(canAlloc == CanAlloc)

        {

            return create<Block>();

        }

        else

        {

            return nullptr;

        }

    }


#ifdef MCDBGQ_TRACKMEM

  public:

    struct MemStats {

        size_t allocatedBlocks;

        size_t usedBlocks;

        size_t freeBlocks;

        size_t ownedBlocksExplicit;

        size_t ownedBlocksImplicit;

        size_t implicitProducers;

        size_t explicitProducers;

        size_t elementsEnqueued;

        size_t blockClassBytes;

        size_t queueClassBytes;

        size_t implicitBlockIndexBytes;

        size_t explicitBlockIndexBytes;


        friend class ConcurrentQueue;


      private:

        static MemStats getFor(ConcurrentQueue* q)

        {

            MemStats stats = { 0 };


            stats.elementsEnqueued = q->size_approx();


            auto block = q->freeList.head_unsafe();

            while (block != nullptr) {

                ++stats.allocatedBlocks;

                ++stats.freeBlocks;

                block = block->freeListNext.load(std::memory_order_relaxed);

            }


            for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr;

                 ptr = ptr->next_prod()) {

                bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;

                stats.implicitProducers += implicit ? 1 : 0;

                stats.explicitProducers += implicit ? 0 : 1;


                if (implicit) {

                    auto prod = static_cast<ImplicitProducer*>(ptr);

                    stats.queueClassBytes += sizeof(ImplicitProducer);

                    auto head = prod->headIndex.load(std::memory_order_relaxed);

                    auto tail = prod->tailIndex.load(std::memory_order_relaxed);

                    auto hash = prod->blockIndex.load(std::memory_order_relaxed);

                    if (hash != nullptr) {

                        for (size_t i = 0; i != hash->capacity; ++i) {

                            if (hash->index[i]->key.load(std::memory_order_relaxed) !=

                                    ImplicitProducer::INVALID_BLOCK_BASE &&

                                hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) {

                                ++stats.allocatedBlocks;

                                ++stats.ownedBlocksImplicit;

                            }

                        }

                        stats.implicitBlockIndexBytes +=

                            hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry);

                        for (; hash != nullptr; hash = hash->prev) {

                            stats.implicitBlockIndexBytes +=

                                sizeof(typename ImplicitProducer::BlockIndexHeader) +

                                hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*);

                        }

                    }

                    for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) {

                        // auto block = prod->get_block_index_entry_for_index(head);

                        ++stats.usedBlocks;

                    }

                } else {

                    auto prod = static_cast<ExplicitProducer*>(ptr);

                    stats.queueClassBytes += sizeof(ExplicitProducer);

                    auto tailBlock = prod->tailBlock;

                    bool wasNonEmpty = false;

                    if (tailBlock != nullptr) {

                        auto block = tailBlock;

                        do {

                            ++stats.allocatedBlocks;

                            if (!block->ConcurrentQueue::Block::template is_empty<explicit_context>() || wasNonEmpty) {

                                ++stats.usedBlocks;

                                wasNonEmpty = wasNonEmpty || block != tailBlock;

                            }

                            ++stats.ownedBlocksExplicit;

                            block = block->next;

                        } while (block != tailBlock);

                    }

                    auto index = prod->blockIndex.load(std::memory_order_relaxed);

                    while (index != nullptr) {

                        stats.explicitBlockIndexBytes +=

                            sizeof(typename ExplicitProducer::BlockIndexHeader) +

                            index->size * sizeof(typename ExplicitProducer::BlockIndexEntry);

                        index = static_cast<typename ExplicitProducer::BlockIndexHeader*>(index->prev);

                    }

                }

            }


            auto freeOnInitialPool =

                q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize

                    ? 0

                    : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);

            stats.allocatedBlocks += freeOnInitialPool;

            stats.freeBlocks += freeOnInitialPool;


            stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;

            stats.queueClassBytes += sizeof(ConcurrentQueue);


            return stats;

        }

    };


    // For debugging only. Not thread-safe.

    MemStats getMemStats() { return MemStats::getFor(this); }


  private:

    friend struct MemStats;

#endif


    // Producer list manipulation


    ProducerBase* recycle_or_create_producer(bool isExplicit)

    {

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH

        debug::DebugLock lock(implicitProdMutex);

#endif

        // Try to re-use one first

        for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {

            if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) {

                bool expected = true;

                if (ptr->inactive.compare_exchange_strong(

                        expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) {

                    // We caught one! It's been marked as activated, the caller can have it

                    return ptr;

                }

            }

        }


        return add_producer(isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this))

                                       : create<ImplicitProducer>(this));

    }


    ProducerBase* add_producer(ProducerBase* producer)

    {

        // Handle failed memory allocation

        if (producer == nullptr) {

            return nullptr;

        }


        producerCount.fetch_add(1, std::memory_order_relaxed);


        // Add it to the lock-free list

        auto prevTail = producerListTail.load(std::memory_order_relaxed);

        do {

            producer->next = prevTail;

        } while (!producerListTail.compare_exchange_weak(

            prevTail, producer, std::memory_order_release, std::memory_order_relaxed));


#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

        if (producer->isExplicit) {

            auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);

            do {

                static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit;

            } while (!explicitProducers.compare_exchange_weak(prevTailExplicit,

                                                              static_cast<ExplicitProducer*>(producer),

                                                              std::memory_order_release,

                                                              std::memory_order_relaxed));

        } else {

            auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);

            do {

                static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit;

            } while (!implicitProducers.compare_exchange_weak(prevTailImplicit,

                                                              static_cast<ImplicitProducer*>(producer),

                                                              std::memory_order_release,

                                                              std::memory_order_relaxed));

        }

#endif


        return producer;

    }


    void reown_producers()

    {

        // After another instance is moved-into/swapped-with this one, all the

        // producers we stole still think their parents are the other queue.

        // So fix them up!

        for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) {

            ptr->parent = this;

        }

    }


    // Implicit producer hash


    struct ImplicitProducerKVP {

        std::atomic<details::thread_id_t> key;

        ImplicitProducer*

            value; // No need for atomicity since it's only read by the thread that sets it in the first place


        ImplicitProducerKVP()

            : value(nullptr)

        {}


        ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT

        {

            key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);

            value = other.value;

        }


        inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT

        {

            swap(other);

            return *this;

        }


        inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT

        {

            if (this != &other) {

                details::swap_relaxed(key, other.key);

                std::swap(value, other.value);

            }

        }

    };


    template <typename XT, typename XTraits>

    friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&,

                                 typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT;


    struct ImplicitProducerHash {

        size_t capacity;

        ImplicitProducerKVP* entries;

        ImplicitProducerHash* prev;

    };


    inline void populate_initial_implicit_producer_hash()

    {

        MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)

        {

            return;

        }

        else

        {

            implicitProducerHashCount.store(0, std::memory_order_relaxed);

            auto hash = &initialImplicitProducerHash;

            hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;

            hash->entries = &initialImplicitProducerHashEntries[0];

            for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {

                initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);

            }

            hash->prev = nullptr;

            implicitProducerHash.store(hash, std::memory_order_relaxed);

        }

    }


    void swap_implicit_producer_hashes(ConcurrentQueue& other)

    {

        MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)

        {

            return;

        }

        else

        {

            // Swap (assumes our implicit producer hash is initialized)

            initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);

            initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];

            other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];


            details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);


            details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);

            if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {

                implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);

            } else {

                ImplicitProducerHash* hash;

                for (hash = implicitProducerHash.load(std::memory_order_relaxed);

                     hash->prev != &other.initialImplicitProducerHash;

                     hash = hash->prev) {

                    continue;

                }

                hash->prev = &initialImplicitProducerHash;

            }

            if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {

                other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);

            } else {

                ImplicitProducerHash* hash;

                for (hash = other.implicitProducerHash.load(std::memory_order_relaxed);

                     hash->prev != &initialImplicitProducerHash;

                     hash = hash->prev) {

                    continue;

                }

                hash->prev = &other.initialImplicitProducerHash;

            }

        }

    }


    // Only fails (returns nullptr) if memory allocation fails

    ImplicitProducer* get_or_add_implicit_producer()

    {

        // Note that since the data is essentially thread-local (key is thread ID),

        // there's a reduced need for fences (memory ordering is already consistent

        // for any individual thread), except for the current table itself.


        // Start by looking for the thread ID in the current and all previous hash tables.

        // If it's not found, it must not be in there yet, since this same thread would

        // have added it previously to one of the tables that we traversed.


        // Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table


#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH

        debug::DebugLock lock(implicitProdMutex);

#endif


        auto id = details::thread_id();

        auto hashedId = details::hash_thread_id(id);


        auto mainHash = implicitProducerHash.load(std::memory_order_acquire);

        assert(mainHash != nullptr); // silence clang-tidy and MSVC warnings (hash cannot be null)

        for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {

            // Look for the id in this hash

            auto index = hashedId;

            while (true) { // Not an infinite loop because at least one slot is free in the hash table

                index &= hash->capacity - 1u;


                auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed);

                if (probedKey == id) {

                    // Found it! If we had to search several hashes deep, though, we should lazily add it

                    // to the current main hash table to avoid the extended search next time.

                    // Note there's guaranteed to be room in the current hash table since every subsequent

                    // table implicitly reserves space for all previous tables (there's only one

                    // implicitProducerHashCount).

                    auto value = hash->entries[index].value;

                    if (hash != mainHash) {

                        index = hashedId;

                        while (true) {

                            index &= mainHash->capacity - 1u;

                            auto empty = details::invalid_thread_id;

#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

                            auto reusable = details::invalid_thread_id2;

                            if (mainHash->entries[index].key.compare_exchange_strong(

                                    empty, id, std::memory_order_seq_cst, std::memory_order_relaxed) ||

                                mainHash->entries[index].key.compare_exchange_strong(

                                    reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) {

#else

                            if (mainHash->entries[index].key.compare_exchange_strong(

                                    empty, id, std::memory_order_seq_cst, std::memory_order_relaxed)) {

#endif

                                mainHash->entries[index].value = value;

                                break;

                            }

                            ++index;

                        }

                    }


                    return value;

                }

                if (probedKey == details::invalid_thread_id) {

                    break; // Not in this hash table

                }

                ++index;

            }

        }


        // Insert!

        auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);

        while (true) {

            // NOLINTNEXTLINE(clang-analyzer-core.NullDereference)

            if (newCount >= (mainHash->capacity >> 1) &&

                !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {

                // We've acquired the resize lock, try to allocate a bigger hash table.

                // Note the acquire fence synchronizes with the release fence at the end of this block, and hence when

                // we reload implicitProducerHash it must be the most recent version (it only gets changed within this

                // locked block).

                mainHash = implicitProducerHash.load(std::memory_order_acquire);

                if (newCount >= (mainHash->capacity >> 1)) {

                    size_t newCapacity = mainHash->capacity << 1;

                    while (newCount >= (newCapacity >> 1)) {

                        newCapacity <<= 1;

                    }

                    auto raw = static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) +

                                                                   std::alignment_of<ImplicitProducerKVP>::value - 1 +

                                                                   sizeof(ImplicitProducerKVP) * newCapacity));

                    if (raw == nullptr) {

                        // Allocation failed

                        implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);

                        implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);

                        return nullptr;

                    }


                    auto newHash = new (raw) ImplicitProducerHash;

                    newHash->capacity = static_cast<size_t>(newCapacity);

                    newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(

                        details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));

                    for (size_t i = 0; i != newCapacity; ++i) {

                        new (newHash->entries + i) ImplicitProducerKVP;

                        newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);

                    }

                    newHash->prev = mainHash;

                    implicitProducerHash.store(newHash, std::memory_order_release);

                    implicitProducerHashResizeInProgress.clear(std::memory_order_release);

                    mainHash = newHash;

                } else {

                    implicitProducerHashResizeInProgress.clear(std::memory_order_release);

                }

            }


            // If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table

            // to finish being allocated by another thread (and if we just finished allocating above, the condition will

            // always be true)

            if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {

                auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false));

                if (producer == nullptr) {

                    implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);

                    return nullptr;

                }


#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

                producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback;

                producer->threadExitListener.userData = producer;

                details::ThreadExitNotifier::subscribe(&producer->threadExitListener);

#endif


                auto index = hashedId;

                while (true) {

                    index &= mainHash->capacity - 1u;

                    auto empty = details::invalid_thread_id;

#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

                    auto reusable = details::invalid_thread_id2;

                    if (mainHash->entries[index].key.compare_exchange_strong(

                            reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) {

                        implicitProducerHashCount.fetch_sub(

                            1, std::memory_order_relaxed); // already counted as a used slot

                        mainHash->entries[index].value = producer;

                        break;

                    }

#endif

                    if (mainHash->entries[index].key.compare_exchange_strong(

                            empty, id, std::memory_order_seq_cst, std::memory_order_relaxed)) {

                        mainHash->entries[index].value = producer;

                        break;

                    }

                    ++index;

                }

                return producer;

            }


            // Hmm, the old hash is quite full and somebody else is busy allocating a new one.

            // We need to wait for the allocating thread to finish (if it succeeds, we add, if not,

            // we try to allocate ourselves).

            mainHash = implicitProducerHash.load(std::memory_order_acquire);

        }

    }


#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

    void implicit_producer_thread_exited(ImplicitProducer* producer)

    {

        // Remove from hash

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH

        debug::DebugLock lock(implicitProdMutex);

#endif

        auto hash = implicitProducerHash.load(std::memory_order_acquire);

        assert(hash !=

               nullptr); // The thread exit listener is only registered if we were added to a hash in the first place

        auto id = details::thread_id();

        auto hashedId = details::hash_thread_id(id);

        details::thread_id_t probedKey;


        // We need to traverse all the hashes just in case other threads aren't on the current one yet and are

        // trying to add an entry thinking there's a free slot (because they reused a producer)

        for (; hash != nullptr; hash = hash->prev) {

            auto index = hashedId;

            do {

                index &= hash->capacity - 1u;

                probedKey = id;

                if (hash->entries[index].key.compare_exchange_strong(

                        probedKey, details::invalid_thread_id2, std::memory_order_seq_cst, std::memory_order_relaxed)) {

                    break;

                }

                ++index;

            } while (probedKey !=

                     details::invalid_thread_id); // Can happen if the hash has changed but we weren't put back in it

                                                  // yet, or if we weren't added to this hash in the first place

        }


        // Mark the queue as being recyclable

        producer->inactive.store(true, std::memory_order_release);

    }


    static void implicit_producer_thread_exited_callback(void* userData)

    {

        auto producer = static_cast<ImplicitProducer*>(userData);

        auto queue = producer->parent;

        queue->implicit_producer_thread_exited(producer);

    }

#endif


    // Utility functions


    template <typename TAlign> static inline void* aligned_malloc(size_t size)

    {

        MOODYCAMEL_CONSTEXPR_IF(std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)

        return (Traits::malloc)(size);

        else

        {

            size_t alignment = std::alignment_of<TAlign>::value;

            void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*));

            if (!raw)

                return nullptr;

            char* ptr = details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*));

            *(reinterpret_cast<void**>(ptr) - 1) = raw;

            return ptr;

        }

    }


    template <typename TAlign> static inline void aligned_free(void* ptr)

    {

        MOODYCAMEL_CONSTEXPR_IF(std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)

        return (Traits::free)(ptr);

        else(Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 1) : nullptr);

    }


    template <typename U> static inline U* create_array(size_t count)

    {

        assert(count > 0);

        U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count));

        if (p == nullptr)

            return nullptr;


        for (size_t i = 0; i != count; ++i)

            new (p + i) U();

        return p;

    }


    template <typename U> static inline void destroy_array(U* p, size_t count)

    {

        if (p != nullptr) {

            assert(count > 0);

            for (size_t i = count; i != 0;)

                (p + --i)->~U();

        }

        aligned_free<U>(p);

    }


    template <typename U> static inline U* create()

    {

        void* p = aligned_malloc<U>(sizeof(U));

        return p != nullptr ? new (p) U : nullptr;

    }


    template <typename U, typename A1> static inline U* create(A1&& a1)

    {

        void* p = aligned_malloc<U>(sizeof(U));

        return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;

    }


    template <typename U> static inline void destroy(U* p)

    {

        if (p != nullptr)

            p->~U();

        aligned_free<U>(p);

    }


  private:

    std::atomic<ProducerBase*> producerListTail;

    std::atomic<std::uint32_t> producerCount;


    std::atomic<size_t> initialBlockPoolIndex;

    Block* initialBlockPool;

    size_t initialBlockPoolSize;


#ifndef MCDBGQ_USEDEBUGFREELIST

    FreeList<Block> freeList;

#else

    debug::DebugFreeList<Block> freeList;

#endif


    std::atomic<ImplicitProducerHash*> implicitProducerHash;

    std::atomic<size_t> implicitProducerHashCount; // Number of slots logically used

    ImplicitProducerHash initialImplicitProducerHash;

    std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;

    std::atomic_flag implicitProducerHashResizeInProgress;


    std::atomic<std::uint32_t> nextExplicitConsumerId;

    std::atomic<std::uint32_t> globalExplicitConsumerOffset;


#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH

    debug::DebugMutex implicitProdMutex;

#endif


#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

    std::atomic<ExplicitProducer*> explicitProducers;

    std::atomic<ImplicitProducer*> implicitProducers;

#endif

};


template <typename T, typename Traits>

ProducerToken::ProducerToken(ConcurrentQueue<T, Traits>& queue)

    : producer(queue.recycle_or_create_producer(true))

{

    if (producer != nullptr) {

        producer->token = this;

    }

}


template <typename T, typename Traits>

ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits>& queue)

    : producer(reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->recycle_or_create_producer(true))

{

    if (producer != nullptr) {

        producer->token = this;

    }

}


template <typename T, typename Traits>

ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)

    : itemsConsumedFromCurrent(0)

    , currentProducer(nullptr)

    , desiredProducer(nullptr)

{

    initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);

    lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);

}


template <typename T, typename Traits>

ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue)

    : itemsConsumedFromCurrent(0)

    , currentProducer(nullptr)

    , desiredProducer(nullptr)

{

    initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(

        1, std::memory_order_release);

    lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);

}


template <typename T, typename Traits>

inline void swap(ConcurrentQueue<T, Traits>& a, ConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT

{

    a.swap(b);

}


inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT

{

    a.swap(b);

}


inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT

{

    a.swap(b);

}


template <typename T, typename Traits>

inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a,

                 typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT

{

    a.swap(b);

}


} // namespace moodycamel


#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)

#pragma warning(pop)

#endif


#if defined(__GNUC__) && !defined(__INTEL_COMPILER)

#pragma GCC diagnostic pop

#endif

moodycamel::BlockingConcurrentQueue
Definition: blockingconcurrentqueue.h:23

moodycamel::ConcurrentQueue
Definition: concurrentqueue.h:845

moodycamel::details::ThreadExitNotifier
Definition: concurrentqueue.h:639

moodycamel::ConcurrentQueueDefaultTraits
Definition: concurrentqueue.h:408

moodycamel::ConsumerToken
Definition: concurrentqueue.h:795

moodycamel::ProducerToken
Definition: concurrentqueue.h:735

moodycamel::details::ConcurrentQueueProducerTypelessBase
Definition: concurrentqueue.h:513

moodycamel::details::ThreadExitListener
Definition: concurrentqueue.h:630

moodycamel::details::_hash_32_or_64
Definition: concurrentqueue.h:525

moodycamel::details::const_numeric_max
Definition: concurrentqueue.h:380

moodycamel::details::hash_32_or_64
Definition: concurrentqueue.h:549

moodycamel::details::identity
Definition: concurrentqueue.h:327

moodycamel::details::is_trivially_destructible
Definition: concurrentqueue.h:618

moodycamel::details::nomove_if
Definition: concurrentqueue.h:604

moodycamel::details::static_is_lock_free_num
Definition: concurrentqueue.h:708

moodycamel::details::static_is_lock_free
Definition: concurrentqueue.h:726

moodycamel::details::thread_id_converter
Definition: concurrentqueue.h:85

moodycamel::details::max_align_t
Definition: concurrentqueue.h:395