diff options
Diffstat (limited to 'gr')
25 files changed, 9 insertions, 7680 deletions
diff --git a/gr/include/gnuradio/buffer_cpu_host.h b/gr/include/gnuradio/buffer_cpu_host.h deleted file mode 100644 index fbc9a8429..000000000 --- a/gr/include/gnuradio/buffer_cpu_host.h +++ /dev/null @@ -1,142 +0,0 @@ -#pragma once - -#include <algorithm> -#include <cstdint> -#include <cstring> -#include <memory> -#include <vector> - -#include <gnuradio/buffer_sm.h> - -namespace gr { -enum class buffer_cpu_host_type { H2H, D2D, H2D, D2H, UNKNOWN }; -class buffer_cpu_host_reader; -class buffer_cpu_host : public buffer_sm -{ -private: - std::vector<uint8_t> _host_buffer; - std::vector<uint8_t> _device_buffer; - buffer_cpu_host_type _transfer_type = buffer_cpu_host_type::H2H; - -public: - buffer_cpu_host(size_t num_items, - size_t item_size, - buffer_cpu_host_type type, - std::shared_ptr<buffer_properties> buf_properties); - - static buffer_uptr make(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties); - - void* read_ptr(size_t index) override; - void* write_ptr() override; - - void post_write(int num_items) override; - - buffer_cpu_host_type transfer_type() { return _transfer_type; } - - buffer_reader_uptr add_reader(std::shared_ptr<buffer_properties> buf_props, - size_t itemsize) override; - - virtual bool output_blocked_callback(bool force = false) override - { - switch (_transfer_type) { - case buffer_cpu_host_type::H2H: - case buffer_cpu_host_type::H2D: - case buffer_cpu_host_type::D2D: - case buffer_cpu_host_type::D2H: - return output_blocked_callback_logic(force, std::memmove); - default: - return false; - } - } -}; - -class buffer_cpu_host_reader : public buffer_sm_reader -{ -private: - buffer_cpu_host* _buffer_cpu_host; - -public: - buffer_cpu_host_reader(buffer_cpu_host* bufp, - std::shared_ptr<buffer_properties> buf_props, - size_t itemsize, - size_t read_index = 0) - : buffer_sm_reader(bufp, itemsize, buf_props, read_index), _buffer_cpu_host(bufp) - { - } - - virtual bool input_blocked_callback(size_t items_required) override - { - // Only singly mapped buffers need to do anything with this callback - // std::scoped_lock guard(*(_buffer->mutex())); - std::lock_guard<std::mutex> guard(*(_buffer->mutex())); - - auto items_avail = items_available(); - - // GR_LOG_DEBUG(d_debug_logger, - // "input_blocked_callback: items_avail {}, _read_index {}, " - // "_write_index {}, items_required {}", - // items_avail, - // _read_index, - // _buffer->write_index(), - // items_required); - - // GR_LOG_DEBUG(d_debug_logger, - // "input_blocked_callback: total_written {}, total_read {}", - // _buffer->total_written(), - // total_read()); - - - // Maybe adjust read pointers from min read index? - // This would mean that *all* readers must be > (passed) the write index - if (items_avail < items_required && _buffer->write_index() < read_index()) { - // GR_LOG_DEBUG(d_debug_logger, "Calling adjust_buffer_data "); - - switch (_buffer_cpu_host->transfer_type()) { - case buffer_cpu_host_type::H2H: - case buffer_cpu_host_type::H2D: - case buffer_cpu_host_type::D2D: - case buffer_cpu_host_type::D2H: - return _buffer_cpu_host->adjust_buffer_data(std::memcpy, std::memmove); - default: - return false; - } - } - - return false; - } -}; - -class buffer_cpu_host_properties : public buffer_properties -{ -public: - // using std::shared_ptr<buffer_properties> = sptr; - buffer_cpu_host_properties(buffer_cpu_host_type buffer_type_) - : buffer_properties(), _buffer_type(buffer_type_) - { - _bff = buffer_cpu_host::make; - } - buffer_cpu_host_type buffer_type() { return _buffer_type; } - static std::shared_ptr<buffer_properties> - make(buffer_cpu_host_type buffer_type_ = buffer_cpu_host_type::H2H) - { - return std::static_pointer_cast<buffer_properties>( - std::make_shared<buffer_cpu_host_properties>(buffer_type_)); - } - -private: - buffer_cpu_host_type _buffer_type; -}; - -#define BUFFER_CPU_HOST_ARGS_H2H \ - buffer_cpu_host_properties::make(buffer_cpu_host_type::H2H) -#define BUFFER_CPU_HOST_ARGS_H2D \ - buffer_cpu_host_properties::make(buffer_cpu_host_type::H2D) -#define BUFFER_CPU_HOST_ARGS_D2H \ - buffer_cpu_host_properties::make(buffer_cpu_host_type::D2H) -#define BUFFER_CPU_HOST_ARGS_D2D \ - buffer_cpu_host_properties::make(buffer_cpu_host_type::D2D) - - -} // namespace gr diff --git a/gr/include/gnuradio/buffer_cuda.h b/gr/include/gnuradio/buffer_cuda.h deleted file mode 100644 index 7874716bd..000000000 --- a/gr/include/gnuradio/buffer_cuda.h +++ /dev/null @@ -1,84 +0,0 @@ -#include <string.h> -#include <algorithm> -#include <cstdint> -#include <memory> -#include <mutex> -#include <vector> - -#include <gnuradio/buffer.h> -#include <cuda.h> -#include <cuda_runtime.h> - -namespace gr { -enum class buffer_cuda_type { D2D, H2D, D2H, UNKNOWN }; - -class buffer_cuda : public buffer -{ -private: - uint8_t* _host_buffer; - uint8_t* _device_buffer; - buffer_cuda_type _type = buffer_cuda_type::UNKNOWN; - cudaStream_t stream; - -public: - using sptr = std::shared_ptr<buffer_cuda>; - buffer_cuda(size_t num_items, - size_t item_size, - buffer_cuda_type type, - std::shared_ptr<buffer_properties> buf_properties); - ~buffer_cuda(); - - static buffer_uptr make(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties); - - void* read_ptr(size_t read_index); - void* write_ptr(); - - virtual void post_write(int num_items); - - virtual buffer_reader_uptr add_reader(std::shared_ptr<buffer_properties> buf_props, - size_t itemsize); -}; - -class buffer_cuda_reader : public buffer_reader -{ -public: - buffer_cuda_reader(buffer* bufp, - std::shared_ptr<buffer_properties> buf_props, - size_t itemsize, - size_t read_index) - : buffer_reader(bufp, buf_props, itemsize, read_index) - { - } - - virtual void post_read(int num_items); -}; - -class buffer_cuda_properties : public buffer_properties -{ -public: - // using std::shared_ptr<buffer_properties> = sptr; - buffer_cuda_properties(buffer_cuda_type buffer_type_) - : buffer_properties(), _buffer_type(buffer_type_) - { - _bff = buffer_cuda::make; - } - buffer_cuda_type buffer_type() { return _buffer_type; } - static std::shared_ptr<buffer_properties> - make(buffer_cuda_type buffer_type_ = buffer_cuda_type::D2D) - { - return std::static_pointer_cast<buffer_properties>( - std::make_shared<buffer_cuda_properties>(buffer_type_)); - } - -private: - buffer_cuda_type _buffer_type; -}; - - -} // namespace gr - -#define CUDA_BUFFER_ARGS_H2D buffer_cuda_properties::make(buffer_cuda_type::H2D) -#define CUDA_BUFFER_ARGS_D2H buffer_cuda_properties::make(buffer_cuda_type::D2H) -#define CUDA_BUFFER_ARGS_D2D buffer_cuda_properties::make(buffer_cuda_type::D2D) diff --git a/gr/include/gnuradio/buffer_cuda_pinned.h b/gr/include/gnuradio/buffer_cuda_pinned.h deleted file mode 100644 index 809902b06..000000000 --- a/gr/include/gnuradio/buffer_cuda_pinned.h +++ /dev/null @@ -1,67 +0,0 @@ -#include <string.h> -#include <algorithm> -#include <cstdint> -#include <memory> -#include <mutex> -#include <vector> - -#include <gnuradio/buffer.h> - -namespace gr { - - -class buffer_cuda_pinned : public buffer -{ -private: - uint8_t* _pinned_buffer; - -public: - using sptr = std::shared_ptr<buffer_cuda_pinned>; - buffer_cuda_pinned(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buf_properties); - ~buffer_cuda_pinned(); - - static buffer_uptr make(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties); - void* read_ptr(size_t index); - void* write_ptr(); - virtual void post_write(int num_items); - - virtual buffer_reader_uptr add_reader(std::shared_ptr<buffer_properties> buf_props, - size_t itemsize); -}; -class buffer_cuda_pinned_reader : public buffer_reader -{ -public: - buffer_cuda_pinned_reader(buffer* bufp, - std::shared_ptr<buffer_properties> buf_props, - size_t itemsize, - size_t read_index) - : buffer_reader(bufp, buf_props, itemsize, read_index) - { - } - - virtual void post_read(int num_items); -}; - -class buffer_cuda_pinned_properties : public buffer_properties -{ -public: - // using std::shared_ptr<buffer_properties> = sptr; - buffer_cuda_pinned_properties() : buffer_properties() - { - _bff = buffer_cuda_pinned::make; - } - static std::shared_ptr<buffer_properties> make() - { - return std::dynamic_pointer_cast<buffer_properties>( - std::make_shared<buffer_cuda_pinned_properties>()); - } -}; - - -} // namespace gr - -#define CUDA_BUFFER_PINNED_ARGS buffer_cuda_pinned_properties::make() diff --git a/gr/include/gnuradio/buffer_cuda_sm.h b/gr/include/gnuradio/buffer_cuda_sm.h deleted file mode 100644 index e8a38b7f0..000000000 --- a/gr/include/gnuradio/buffer_cuda_sm.h +++ /dev/null @@ -1,151 +0,0 @@ -#include <string.h> -#include <algorithm> -#include <cstdint> -#include <memory> -#include <mutex> -#include <vector> - -#include <gnuradio/buffer_sm.h> -#include <cuda.h> -#include <cuda_runtime.h> - -namespace gr { -enum class buffer_cuda_sm_type { D2D, H2D, D2H, UNKNOWN }; - -class buffer_cuda_sm : public buffer_sm -{ -private: - uint8_t* _host_buffer; - uint8_t* _device_buffer; - buffer_cuda_sm_type _type = buffer_cuda_sm_type::UNKNOWN; - cudaStream_t stream; - -public: - using sptr = std::shared_ptr<buffer_cuda_sm>; - buffer_cuda_sm(size_t num_items, - size_t item_size, - buffer_cuda_sm_type type, - std::shared_ptr<buffer_properties> buf_properties); - ~buffer_cuda_sm(); - - static buffer_uptr make(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties); - - void* read_ptr(size_t read_index); - void* write_ptr(); - buffer_cuda_sm_type type() { return _type; } - - virtual void post_write(int num_items); - - virtual buffer_reader_uptr add_reader(std::shared_ptr<buffer_properties> buf_props, - size_t itemsize); - - static void* cuda_memcpy(void* dest, const void* src, std::size_t count); - static void* cuda_memmove(void* dest, const void* src, std::size_t count); - - virtual bool output_blocked_callback(bool force = false) override - { - switch (_type) { - case buffer_cuda_sm_type::H2D: - return output_blocked_callback_logic(force, std::memmove); - case buffer_cuda_sm_type::D2D: - case buffer_cuda_sm_type::D2H: - return output_blocked_callback_logic(force, cuda_memmove); - default: - return false; - } - } -}; - -class buffer_cuda_sm_reader : public buffer_sm_reader -{ -private: - // logger_ptr d_logger; - // logger_ptr d_debug_logger; - - buffer_cuda_sm* _buffer_cuda_sm; - -public: - buffer_cuda_sm_reader(buffer_cuda_sm* buffer, - std::shared_ptr<buffer_properties> buf_props, - size_t itemsize, - size_t read_index) - : buffer_sm_reader(buffer, itemsize, buf_props, read_index) - { - _buffer_cuda_sm = buffer; - // gr::configure_default_loggers(d_logger, d_debug_logger, "buffer_cuda_sm"); - } - - // virtual void post_read(int num_items); - - virtual bool input_blocked_callback(size_t items_required) override - { - // Only singly mapped buffers need to do anything with this callback - // std::scoped_lock guard(*(_buffer->mutex())); - std::lock_guard<std::mutex> guard(*(_buffer->mutex())); - - auto items_avail = items_available(); - - // GR_LOG_DEBUG(d_debug_logger, - // "input_blocked_callback: items_avail {}, _read_index {}, " - // "_write_index {}, items_required {}", - // items_avail, - // _read_index, - // _buffer->write_index(), - // items_required); - - // GR_LOG_DEBUG(d_debug_logger, - // "input_blocked_callback: total_written {}, total_read {}", - // _buffer->total_written(), - // total_read()); - - - // Maybe adjust read pointers from min read index? - // This would mean that *all* readers must be > (passed) the write index - if (items_avail < items_required && _buffer->write_index() < read_index()) { - // GR_LOG_DEBUG(d_debug_logger, "Calling adjust_buffer_data "); - - switch (_buffer_cuda_sm->type()) { - case buffer_cuda_sm_type::H2D: - case buffer_cuda_sm_type::D2D: - return _buffer_sm->adjust_buffer_data(buffer_cuda_sm::cuda_memcpy, - buffer_cuda_sm::cuda_memmove); - case buffer_cuda_sm_type::D2H: - return _buffer_sm->adjust_buffer_data(std::memcpy, std::memmove); - default: - return false; - } - } - - return false; - } -}; - -class buffer_cuda_sm_properties : public buffer_properties -{ -public: - // using std::shared_ptr<buffer_properties> = sptr; - buffer_cuda_sm_properties(buffer_cuda_sm_type buffer_type_) - : buffer_properties(), _buffer_type(buffer_type_) - { - _bff = buffer_cuda_sm::make; - } - buffer_cuda_sm_type buffer_type() { return _buffer_type; } - static std::shared_ptr<buffer_properties> - make(buffer_cuda_sm_type buffer_type_ = buffer_cuda_sm_type::D2D) - { - return std::static_pointer_cast<buffer_properties>( - std::make_shared<buffer_cuda_sm_properties>(buffer_type_)); - } - -private: - buffer_cuda_sm_type _buffer_type; -}; - - -} // namespace gr - -#define CUDA_BUFFER_SM_ARGS_H2D buffer_cuda_sm_properties::make(buffer_cuda_sm_type::H2D) -#define CUDA_BUFFER_SM_ARGS_D2H buffer_cuda_sm_properties::make(buffer_cuda_sm_type::D2H) -#define CUDA_BUFFER_SM_ARGS_D2D buffer_cuda_sm_properties::make(buffer_cuda_sm_type::D2D) diff --git a/gr/include/gnuradio/buffer_net_zmq.h b/gr/include/gnuradio/buffer_net_zmq.h deleted file mode 100644 index af31c6110..000000000 --- a/gr/include/gnuradio/buffer_net_zmq.h +++ /dev/null @@ -1,137 +0,0 @@ -#pragma once - -#include <algorithm> -#include <cstdint> -#include <cstring> -#include <memory> -#include <vector> - -#include <zmq.hpp> - -#include <gnuradio/buffer.h> - -namespace gr { - - -class buffer_net_zmq_reader; - -class buffer_net_zmq : public buffer -{ -private: - std::vector<uint8_t> _buffer; - - zmq::context_t _context; - zmq::socket_t _socket; - - -public: - using sptr = std::shared_ptr<buffer_net_zmq>; - buffer_net_zmq(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties, - int port); - ~buffer_net_zmq() override{}; - static buffer_uptr make(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties); - - void* read_ptr(size_t index) override { return nullptr; } - size_t space_available() override { return _num_items; } - void* write_ptr() override { return _buffer.data(); } - - void post_write(int num_items) override - { - // send the data from buffer over the socket - d_debug_logger->debug("sending {} items", num_items); - auto res = _socket.send(zmq::buffer(write_ptr(), num_items * _item_size), - zmq::send_flags::none); - d_debug_logger->debug("send returned code {}", *res); - } - - buffer_reader_uptr add_reader(std::shared_ptr<buffer_properties> buf_props, - size_t itemsize) override - { - // do nothing because readers will be added on zmq connect - return nullptr; - } -}; - -class buffer_net_zmq_reader : public buffer_reader -{ -private: - zmq::context_t _context; - zmq::socket_t _socket; - zmq::message_t _msg; - size_t _msg_idx = 0; - size_t _msg_size = 0; - - // Circular buffer for zmq to write into - gr::buffer_uptr _circbuf; - gr::buffer_reader_uptr _circbuf_rdr; - - logger_ptr d_logger; - logger_ptr d_debug_logger; - -public: - bool _recv_done = false; - static buffer_reader_uptr make(size_t itemsize, - std::shared_ptr<buffer_properties> buf_props); - buffer_net_zmq_reader(std::shared_ptr<buffer_properties> buf_props, - size_t itemsize, - const std::string& ipaddr, - int port); - - ~buffer_net_zmq_reader() override{}; - - bool read_info(buffer_info_t& info) override - { - auto ret = _circbuf_rdr->read_info(info); - return ret; - } - void* read_ptr() override { return _circbuf_rdr->read_ptr(); } - - // Tags not supported yet - const std::vector<tag_t>& tags() const override { return _circbuf->tags(); } - std::vector<tag_t> get_tags(size_t num_items) override { return {}; } - void post_read(int num_items) override - { - d_debug_logger->debug("post_read: {}", num_items); - _circbuf_rdr->post_read(num_items); - } -}; - - -class buffer_net_zmq_properties : public buffer_properties -{ -public: - // using std::shared_ptr<buffer_properties> = sptr; - buffer_net_zmq_properties(const std::string& ipaddr, int port) - : buffer_properties(), _ipaddr(ipaddr), _port(port) - { - _bff = buffer_net_zmq::make; - _brff = buffer_net_zmq_reader::make; - } - ~buffer_net_zmq_properties() override{}; - - static std::shared_ptr<buffer_properties> make(const std::string& ipaddr, - int port = 0) - { - return std::dynamic_pointer_cast<buffer_properties>( - std::make_shared<buffer_net_zmq_properties>(ipaddr, port)); - } - - static std::shared_ptr<buffer_properties> - make_from_params(const std::string& json_str); - - void set_port(int port) { _port = port; } - auto port() { return _port; } - auto ipaddr() { return _ipaddr; } - - std::string to_json() override; - -private: - std::string _ipaddr; - int _port; -}; - -} // namespace gr diff --git a/gr/include/gnuradio/buffer_sm.h b/gr/include/gnuradio/buffer_sm.h deleted file mode 100644 index 73a850eca..000000000 --- a/gr/include/gnuradio/buffer_sm.h +++ /dev/null @@ -1,107 +0,0 @@ -#pragma once - -#include <algorithm> -#include <cstdint> -#include <cstring> -#include <memory> -#include <vector> - -#include <gnuradio/buffer.h> -// #include <gnuradio/logger.h> - -namespace gr { - -typedef void* (*memcpy_func_t)(void* dest, const void* src, std::size_t count); -typedef void* (*memmove_func_t)(void* dest, const void* src, std::size_t count); - -class buffer_sm_reader; -class buffer_sm : public buffer -{ -private: - std::vector<uint8_t> _buffer; - uint8_t* _raw_buffer; - - // logger_ptr d_logger; - // logger_ptr d_debug_logger; - -public: - using sptr = std::shared_ptr<buffer_sm>; - buffer_sm(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buf_properties); - - static buffer_uptr - make(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties = nullptr); - - void* read_ptr(size_t index) override; - void* write_ptr() override; - - void post_write(int num_items) override; - - virtual bool - output_blocked_callback_logic(bool force = false, - memmove_func_t memmove_func = std::memmove); - - /*! - * \brief Return true if thread is ready to call the callback, false otherwise - */ - bool output_blkd_cb_ready(int output_multiple) override; - bool output_blocked_callback(bool force = false) override; - size_t space_available() override; - - bool write_info(buffer_info_t& info) override; - buffer_reader_uptr add_reader(std::shared_ptr<buffer_properties> buf_props, - size_t itemsize) override; - - bool adjust_buffer_data(memcpy_func_t memcpy_func, memmove_func_t memmove_func); - - // Wipes away the default single mapped buffer and replaces it with something - // defined externally in the derived class - void set_bufp(uint8_t* ptr) - { - _raw_buffer = ptr; - _buffer.resize(0); - } -}; - -class buffer_sm_reader : public buffer_reader -{ -private: - logger_ptr d_logger; - logger_ptr d_debug_logger; - -protected: - buffer_sm* _buffer_sm; - -public: - buffer_sm_reader(buffer_sm* buffer, - size_t itemsize, - std::shared_ptr<buffer_properties> buf_props = nullptr, - size_t read_index = 0); - - void post_read(int num_items) override; - - /*! - * \brief Return true if thread is ready to call input_blocked_callback, - * false otherwise - */ - bool input_blkd_cb_ready(int items_required) override; - bool input_blocked_callback(size_t items_required) override; - size_t bytes_available() override; -}; - - -class buffer_sm_properties : public buffer_properties -{ -public: - buffer_sm_properties(); - - static std::shared_ptr<buffer_properties> make(); -}; - - -#define SM_BUFFER_ARGS buffer_sm_properties::make() - -} // namespace gr diff --git a/gr/include/gnuradio/meson.build b/gr/include/gnuradio/meson.build index 4c5e9d51f..64d995b5d 100644 --- a/gr/include/gnuradio/meson.build +++ b/gr/include/gnuradio/meson.build @@ -8,8 +8,6 @@ runtime_headers = [ 'block_group_properties.h', 'clonable_block.h', 'concurrent_queue.h', - 'buffer_cuda.h', - 'buffer_cuda_pinned.h', 'edge.h', 'flat_graph.h', 'flowgraph.h', @@ -43,12 +41,10 @@ runtime_headers = [ 'sptr_magic.h', 'realtime.h', 'runtime.h', - 'buffer_sm.h', 'executor.h', 'constants.h', 'registry.h', 'high_res_timer.h', - 'custom_lock.h' ] install_headers(runtime_headers, subdir : 'gnuradio') diff --git a/gr/include/moodycamel/LICENSE.md b/gr/include/moodycamel/LICENSE.md deleted file mode 100644 index 519338976..000000000 --- a/gr/include/moodycamel/LICENSE.md +++ /dev/null @@ -1,62 +0,0 @@ -This license file applies to everything in this repository except that which -is explicitly annotated as being written by other authors, i.e. the Boost -queue (included in the benchmarks for comparison), Intel's TBB library (ditto), -dlib::pipe (ditto), -the CDSChecker tool (used for verification), the Relacy model checker (ditto), -and Jeff Preshing's semaphore implementation (used in the blocking queue) which -has a zlib license (embedded in lightweightsempahore.h). - ---- - -Simplified BSD License: - -Copyright (c) 2013-2016, Cameron Desrochers. -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -- Redistributions of source code must retain the above copyright notice, this list of -conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright notice, this list of -conditions and the following disclaimer in the documentation and/or other materials -provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT -OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR -TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, -EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ---- - -I have also chosen to dual-license under the Boost Software License as an alternative to -the Simplified BSD license above: - -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/gr/include/moodycamel/blockingconcurrentqueue.h b/gr/include/moodycamel/blockingconcurrentqueue.h deleted file mode 100644 index 7db0ef364..000000000 --- a/gr/include/moodycamel/blockingconcurrentqueue.h +++ /dev/null @@ -1,615 +0,0 @@ -// Provides an efficient blocking version of moodycamel::ConcurrentQueue. -// ©2015-2020 Cameron Desrochers. Distributed under the terms of the simplified -// BSD license, available at the top of concurrentqueue.h. -// Also dual-licensed under the Boost Software License (see LICENSE.md) -// Uses Jeff Preshing's semaphore implementation (under the terms of its -// separate zlib license, see lightweightsemaphore.h). - -#pragma once - -#include "concurrentqueue.h" -#include "lightweightsemaphore.h" - -#include <type_traits> -#include <cerrno> -#include <chrono> -#include <ctime> -#include <memory> - -namespace moodycamel { -// This is a blocking version of the queue. It has an almost identical interface to -// the normal non-blocking version, with the addition of various wait_dequeue() methods -// and the removal of producer-specific dequeue methods. -template <typename T, typename Traits = ConcurrentQueueDefaultTraits> -class BlockingConcurrentQueue -{ -private: - typedef ::moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue; - typedef ::moodycamel::LightweightSemaphore LightweightSemaphore; - -public: - typedef typename ConcurrentQueue::producer_token_t producer_token_t; - typedef typename ConcurrentQueue::consumer_token_t consumer_token_t; - - typedef typename ConcurrentQueue::index_t index_t; - typedef typename ConcurrentQueue::size_t size_t; - typedef typename std::make_signed<size_t>::type ssize_t; - - static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE; - static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = - ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD; - static const size_t EXPLICIT_INITIAL_INDEX_SIZE = - ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE; - static const size_t IMPLICIT_INITIAL_INDEX_SIZE = - ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE; - static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = - ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; - static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = - ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE; - static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE; - -public: - // Creates a queue with at least `capacity` element slots; note that the - // actual number of elements that can be inserted without additional memory - // allocation depends on the number of producers and the block size (e.g. if - // the block size is equal to `capacity`, only a single block will be allocated - // up-front, which means only a single producer will be able to enqueue elements - // without an extra allocation -- blocks aren't shared between producers). - // This method is not thread safe -- it is up to the user to ensure that the - // queue is fully constructed before it starts being used by other threads (this - // includes making the memory effects of construction visible, possibly with a - // memory barrier). - explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE) - : inner(capacity), - sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), - &BlockingConcurrentQueue::template destroy<LightweightSemaphore>) - { - assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == - &((BlockingConcurrentQueue*)1)->inner && - "BlockingConcurrentQueue must have ConcurrentQueue as its first member"); - if (!sema) { - MOODYCAMEL_THROW(std::bad_alloc()); - } - } - - BlockingConcurrentQueue(size_t minCapacity, - size_t maxExplicitProducers, - size_t maxImplicitProducers) - : inner(minCapacity, maxExplicitProducers, maxImplicitProducers), - sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), - &BlockingConcurrentQueue::template destroy<LightweightSemaphore>) - { - assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == - &((BlockingConcurrentQueue*)1)->inner && - "BlockingConcurrentQueue must have ConcurrentQueue as its first member"); - if (!sema) { - MOODYCAMEL_THROW(std::bad_alloc()); - } - } - - // Disable copying and copy assignment - BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; - BlockingConcurrentQueue& - operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; - - // Moving is supported, but note that it is *not* a thread-safe operation. - // Nobody can use the queue while it's being moved, and the memory effects - // of that move must be propagated to other threads before they can use it. - // Note: When a queue is moved, its tokens are still valid but can only be - // used with the destination queue (i.e. semantically they are moved along - // with the queue itself). - BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT - : inner(std::move(other.inner)), - sema(std::move(other.sema)) - { - } - - inline BlockingConcurrentQueue& - operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT - { - return swap_internal(other); - } - - // Swaps this queue's state with the other's. Not thread-safe. - // Swapping two queues does not invalidate their tokens, however - // the tokens that were created for one queue must be used with - // only the swapped queue (i.e. the tokens are tied to the - // queue's movable state, not the object itself). - inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT - { - swap_internal(other); - } - -private: - BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other) - { - if (this == &other) { - return *this; - } - - inner.swap(other.inner); - sema.swap(other.sema); - return *this; - } - -public: - // Enqueues a single item (by copying it). - // Allocates memory if required. Only fails if memory allocation fails (or implicit - // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, - // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(T const& item) - { - if ((details::likely)(inner.enqueue(item))) { - sema->signal(); - return true; - } - return false; - } - - // Enqueues a single item (by moving it, if possible). - // Allocates memory if required. Only fails if memory allocation fails (or implicit - // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, - // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(T&& item) - { - if ((details::likely)(inner.enqueue(std::move(item)))) { - sema->signal(); - return true; - } - return false; - } - - // Enqueues a single item (by copying it) using an explicit producer token. - // Allocates memory if required. Only fails if memory allocation fails (or - // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(producer_token_t const& token, T const& item) - { - if ((details::likely)(inner.enqueue(token, item))) { - sema->signal(); - return true; - } - return false; - } - - // Enqueues a single item (by moving it, if possible) using an explicit producer - // token. Allocates memory if required. Only fails if memory allocation fails (or - // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(producer_token_t const& token, T&& item) - { - if ((details::likely)(inner.enqueue(token, std::move(item)))) { - sema->signal(); - return true; - } - return false; - } - - // Enqueues several items. - // Allocates memory if required. Only fails if memory allocation fails (or - // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Note: Use std::make_move_iterator if the elements should be moved instead of - // copied. Thread-safe. - template <typename It> - inline bool enqueue_bulk(It itemFirst, size_t count) - { - if ((details::likely)(inner.enqueue_bulk(std::forward<It>(itemFirst), count))) { - sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); - return true; - } - return false; - } - - // Enqueues several items using an explicit producer token. - // Allocates memory if required. Only fails if memory allocation fails - // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Note: Use std::make_move_iterator if the elements should be moved - // instead of copied. - // Thread-safe. - template <typename It> - inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) - { - if ((details::likely)( - inner.enqueue_bulk(token, std::forward<It>(itemFirst), count))) { - sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); - return true; - } - return false; - } - - // Enqueues a single item (by copying it). - // Does not allocate memory. Fails if not enough room to enqueue (or implicit - // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - // is 0). - // Thread-safe. - inline bool try_enqueue(T const& item) - { - if (inner.try_enqueue(item)) { - sema->signal(); - return true; - } - return false; - } - - // Enqueues a single item (by moving it, if possible). - // Does not allocate memory (except for one-time implicit producer). - // Fails if not enough room to enqueue (or implicit production is - // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). - // Thread-safe. - inline bool try_enqueue(T&& item) - { - if (inner.try_enqueue(std::move(item))) { - sema->signal(); - return true; - } - return false; - } - - // Enqueues a single item (by copying it) using an explicit producer token. - // Does not allocate memory. Fails if not enough room to enqueue. - // Thread-safe. - inline bool try_enqueue(producer_token_t const& token, T const& item) - { - if (inner.try_enqueue(token, item)) { - sema->signal(); - return true; - } - return false; - } - - // Enqueues a single item (by moving it, if possible) using an explicit producer - // token. Does not allocate memory. Fails if not enough room to enqueue. Thread-safe. - inline bool try_enqueue(producer_token_t const& token, T&& item) - { - if (inner.try_enqueue(token, std::move(item))) { - sema->signal(); - return true; - } - return false; - } - - // Enqueues several items. - // Does not allocate memory (except for one-time implicit producer). - // Fails if not enough room to enqueue (or implicit production is - // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). - // Note: Use std::make_move_iterator if the elements should be moved - // instead of copied. - // Thread-safe. - template <typename It> - inline bool try_enqueue_bulk(It itemFirst, size_t count) - { - if (inner.try_enqueue_bulk(std::forward<It>(itemFirst), count)) { - sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); - return true; - } - return false; - } - - // Enqueues several items using an explicit producer token. - // Does not allocate memory. Fails if not enough room to enqueue. - // Note: Use std::make_move_iterator if the elements should be moved - // instead of copied. - // Thread-safe. - template <typename It> - inline bool - try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) - { - if (inner.try_enqueue_bulk(token, std::forward<It>(itemFirst), count)) { - sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); - return true; - } - return false; - } - - - // Attempts to dequeue from the queue. - // Returns false if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template <typename U> - inline bool try_dequeue(U& item) - { - if (sema->tryWait()) { - while (!inner.try_dequeue(item)) { - continue; - } - return true; - } - return false; - } - - // Attempts to dequeue from the queue using an explicit consumer token. - // Returns false if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template <typename U> - inline bool try_dequeue(consumer_token_t& token, U& item) - { - if (sema->tryWait()) { - while (!inner.try_dequeue(token, item)) { - continue; - } - return true; - } - return false; - } - - // Attempts to dequeue several elements from the queue. - // Returns the number of items actually dequeued. - // Returns 0 if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template <typename It> - inline size_t try_dequeue_bulk(It itemFirst, size_t max) - { - size_t count = 0; - max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max); - while (count != max) { - count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count); - } - return count; - } - - // Attempts to dequeue several elements from the queue using an explicit consumer - // token. Returns the number of items actually dequeued. Returns 0 if all producer - // streams appeared empty at the time they were checked (so, the queue is likely but - // not guaranteed to be empty). Never allocates. Thread-safe. - template <typename It> - inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) - { - size_t count = 0; - max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max); - while (count != max) { - count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count); - } - return count; - } - - - // Blocks the current thread until there's something to dequeue, then - // dequeues it. - // Never allocates. Thread-safe. - template <typename U> - inline void wait_dequeue(U& item) - { - while (!sema->wait()) { - continue; - } - while (!inner.try_dequeue(item)) { - continue; - } - } - - // Blocks the current thread until either there's something to dequeue - // or the timeout (specified in microseconds) expires. Returns false - // without setting `item` if the timeout expires, otherwise assigns - // to `item` and returns true. - // Using a negative timeout indicates an indefinite timeout, - // and is thus functionally equivalent to calling wait_dequeue. - // Never allocates. Thread-safe. - template <typename U> - inline bool wait_dequeue_timed(U& item, std::int64_t timeout_usecs) - { - if (!sema->wait(timeout_usecs)) { - return false; - } - while (!inner.try_dequeue(item)) { - continue; - } - return true; - } - - // Blocks the current thread until either there's something to dequeue - // or the timeout expires. Returns false without setting `item` if the - // timeout expires, otherwise assigns to `item` and returns true. - // Never allocates. Thread-safe. - template <typename U, typename Rep, typename Period> - inline bool wait_dequeue_timed(U& item, - std::chrono::duration<Rep, Period> const& timeout) - { - return wait_dequeue_timed( - item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count()); - } - - // Blocks the current thread until there's something to dequeue, then - // dequeues it using an explicit consumer token. - // Never allocates. Thread-safe. - template <typename U> - inline void wait_dequeue(consumer_token_t& token, U& item) - { - while (!sema->wait()) { - continue; - } - while (!inner.try_dequeue(token, item)) { - continue; - } - } - - // Blocks the current thread until either there's something to dequeue - // or the timeout (specified in microseconds) expires. Returns false - // without setting `item` if the timeout expires, otherwise assigns - // to `item` and returns true. - // Using a negative timeout indicates an indefinite timeout, - // and is thus functionally equivalent to calling wait_dequeue. - // Never allocates. Thread-safe. - template <typename U> - inline bool - wait_dequeue_timed(consumer_token_t& token, U& item, std::int64_t timeout_usecs) - { - if (!sema->wait(timeout_usecs)) { - return false; - } - while (!inner.try_dequeue(token, item)) { - continue; - } - return true; - } - - // Blocks the current thread until either there's something to dequeue - // or the timeout expires. Returns false without setting `item` if the - // timeout expires, otherwise assigns to `item` and returns true. - // Never allocates. Thread-safe. - template <typename U, typename Rep, typename Period> - inline bool wait_dequeue_timed(consumer_token_t& token, - U& item, - std::chrono::duration<Rep, Period> const& timeout) - { - return wait_dequeue_timed( - token, - item, - std::chrono::duration_cast<std::chrono::microseconds>(timeout).count()); - } - - // Attempts to dequeue several elements from the queue. - // Returns the number of items actually dequeued, which will - // always be at least one (this method blocks until the queue - // is non-empty) and at most max. - // Never allocates. Thread-safe. - template <typename It> - inline size_t wait_dequeue_bulk(It itemFirst, size_t max) - { - size_t count = 0; - max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max); - while (count != max) { - count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count); - } - return count; - } - - // Attempts to dequeue several elements from the queue. - // Returns the number of items actually dequeued, which can - // be 0 if the timeout expires while waiting for elements, - // and at most max. - // Using a negative timeout indicates an indefinite timeout, - // and is thus functionally equivalent to calling wait_dequeue_bulk. - // Never allocates. Thread-safe. - template <typename It> - inline size_t - wait_dequeue_bulk_timed(It itemFirst, size_t max, std::int64_t timeout_usecs) - { - size_t count = 0; - max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, - timeout_usecs); - while (count != max) { - count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count); - } - return count; - } - - // Attempts to dequeue several elements from the queue. - // Returns the number of items actually dequeued, which can - // be 0 if the timeout expires while waiting for elements, - // and at most max. - // Never allocates. Thread-safe. - template <typename It, typename Rep, typename Period> - inline size_t wait_dequeue_bulk_timed( - It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout) - { - return wait_dequeue_bulk_timed<It&>( - itemFirst, - max, - std::chrono::duration_cast<std::chrono::microseconds>(timeout).count()); - } - - // Attempts to dequeue several elements from the queue using an explicit consumer - // token. Returns the number of items actually dequeued, which will always be at least - // one (this method blocks until the queue is non-empty) and at most max. Never - // allocates. Thread-safe. - template <typename It> - inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) - { - size_t count = 0; - max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max); - while (count != max) { - count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count); - } - return count; - } - - // Attempts to dequeue several elements from the queue using an explicit consumer - // token. Returns the number of items actually dequeued, which can be 0 if the timeout - // expires while waiting for elements, and at most max. Using a negative timeout - // indicates an indefinite timeout, and is thus functionally equivalent to calling - // wait_dequeue_bulk. Never allocates. Thread-safe. - template <typename It> - inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, - It itemFirst, - size_t max, - std::int64_t timeout_usecs) - { - size_t count = 0; - max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, - timeout_usecs); - while (count != max) { - count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count); - } - return count; - } - - // Attempts to dequeue several elements from the queue using an explicit consumer - // token. Returns the number of items actually dequeued, which can be 0 if the timeout - // expires while waiting for elements, and at most max. Never allocates. Thread-safe. - template <typename It, typename Rep, typename Period> - inline size_t - wait_dequeue_bulk_timed(consumer_token_t& token, - It itemFirst, - size_t max, - std::chrono::duration<Rep, Period> const& timeout) - { - return wait_dequeue_bulk_timed<It&>( - token, - itemFirst, - max, - std::chrono::duration_cast<std::chrono::microseconds>(timeout).count()); - } - - - // Returns an estimate of the total number of elements currently in the queue. This - // estimate is only accurate if the queue has completely stabilized before it is - // called (i.e. all enqueue and dequeue operations have completed and their memory - // effects are visible on the calling thread, and no further operations start while - // this method is being called). Thread-safe. - inline size_t size_approx() const { return (size_t)sema->availableApprox(); } - - - // Returns true if the underlying atomic variables used by - // the queue are lock-free (they should be on most platforms). - // Thread-safe. - static bool is_lock_free() { return ConcurrentQueue::is_lock_free(); } - - -private: - template <typename U, typename A1, typename A2> - static inline U* create(A1&& a1, A2&& a2) - { - void* p = (Traits::malloc)(sizeof(U)); - return p != nullptr ? new (p) U(std::forward<A1>(a1), std::forward<A2>(a2)) - : nullptr; - } - - template <typename U> - static inline void destroy(U* p) - { - if (p != nullptr) { - p->~U(); - } - (Traits::free)(p); - } - -private: - ConcurrentQueue inner; - std::unique_ptr<LightweightSemaphore, void (*)(LightweightSemaphore*)> sema; -}; - - -template <typename T, typename Traits> -inline void swap(BlockingConcurrentQueue<T, Traits>& a, - BlockingConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT -{ - a.swap(b); -} - -} // end namespace moodycamel diff --git a/gr/include/moodycamel/concurrentqueue.h b/gr/include/moodycamel/concurrentqueue.h deleted file mode 100644 index 2a5bbcd82..000000000 --- a/gr/include/moodycamel/concurrentqueue.h +++ /dev/null @@ -1,4506 +0,0 @@ -// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue. -// An overview, including benchmark results, is provided here: -// http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++ -// The full design is also described in excruciating detail at: -// http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue - -// Simplified BSD license: -// Copyright (c) 2013-2020, Cameron Desrochers. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, this list of -// conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, this list -// of conditions and the following disclaimer in the documentation and/or other materials -// provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF -// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Also dual-licensed under the Boost Software License (see LICENSE.md) - -#pragma once - -#if defined(__GNUC__) -// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and -// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings -// upon assigning any computed values) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" - -#ifdef MCDBGQ_USE_RELACY -#pragma GCC diagnostic ignored "-Wint-to-pointer-cast" -#endif -#endif - -#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) -// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or -// higher does not support `if constexpr`, so we have no choice but to simply disable the -// warning -#pragma warning(push) -#pragma warning(disable : 4127) // conditional expression is constant -#endif - -#if defined(__APPLE__) -#include "TargetConditionals.h" -#endif - -#ifdef MCDBGQ_USE_RELACY -#include "relacy/relacy_std.hpp" -#include "relacy_shims.h" -// We only use malloc/free anyway, and the delete macro messes up `= delete` method -// declarations. We'll override the default trait malloc ourselves without a macro. -#undef new -#undef delete -#undef malloc -#undef free -#else -#include <atomic> // Requires C++11. Sorry VS2010. -#include <cassert> -#endif -#include <type_traits> -#include <algorithm> -#include <array> -#include <climits> // for CHAR_BIT -#include <cstddef> // for max_align_t -#include <cstdint> -#include <cstdlib> -#include <limits> -#include <thread> // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading -#include <utility> - -// Platform-specific definitions of a numeric thread ID type and an invalid value -namespace moodycamel { -namespace details { -template <typename thread_id_t> -struct thread_id_converter { - typedef thread_id_t thread_id_numeric_size_t; - typedef thread_id_t thread_id_hash_t; - static thread_id_hash_t prehash(thread_id_t const& x) { return x; } -}; -} // namespace details -} // namespace moodycamel -#if defined(MCDBGQ_USE_RELACY) -namespace moodycamel { -namespace details { -typedef std::uint32_t thread_id_t; -static const thread_id_t invalid_thread_id = 0xFFFFFFFFU; -static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU; -static inline thread_id_t thread_id() { return rl::thread_index(); } -} // namespace details -} // namespace moodycamel -#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__) -// No sense pulling in windows.h in a header, we'll manually declare the function -// we use and rely on backwards-compatibility for this not to break -extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void); -namespace moodycamel { -namespace details { -static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), - "Expected size of unsigned long to be 32 bits on Windows"); -typedef std::uint32_t thread_id_t; -static const thread_id_t invalid_thread_id = - 0; // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx -static const thread_id_t invalid_thread_id2 = - 0xFFFFFFFFU; // Not technically guaranteed to be invalid, but is never used in - // practice. Note that all Win32 thread IDs are presently multiples of 4. -static inline thread_id_t thread_id() -{ - return static_cast<thread_id_t>(::GetCurrentThreadId()); -} -} // namespace details -} // namespace moodycamel -#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \ - (defined(__APPLE__) && TARGET_OS_IPHONE) -namespace moodycamel { -namespace details { -static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, - "std::thread::id is expected to be either 4 or 8 bytes"); - -typedef std::thread::id thread_id_t; -static const thread_id_t invalid_thread_id; // Default ctor creates invalid ID - -// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's -// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't -// be. -static inline thread_id_t thread_id() { return std::this_thread::get_id(); } - -template <std::size_t> -struct thread_id_size { -}; -template <> -struct thread_id_size<4> { - typedef std::uint32_t numeric_t; -}; -template <> -struct thread_id_size<8> { - typedef std::uint64_t numeric_t; -}; - -template <> -struct thread_id_converter<thread_id_t> { - typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t; -#ifndef __APPLE__ - typedef std::size_t thread_id_hash_t; -#else - typedef thread_id_numeric_size_t thread_id_hash_t; -#endif - - static thread_id_hash_t prehash(thread_id_t const& x) - { -#ifndef __APPLE__ - return std::hash<std::thread::id>()(x); -#else - return *reinterpret_cast<thread_id_hash_t const*>(&x); -#endif - } -}; -} // namespace details -} // namespace moodycamel -#else -// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475 -// In order to get a numeric thread ID in a platform-independent way, we use a -// thread-local static variable's address as a thread identifier :-) -#if defined(__GNUC__) || defined(__INTEL_COMPILER) -#define MOODYCAMEL_THREADLOCAL __thread -#elif defined(_MSC_VER) -#define MOODYCAMEL_THREADLOCAL __declspec(thread) -#else -// Assume C++11 compliant compiler -#define MOODYCAMEL_THREADLOCAL thread_local -#endif -namespace moodycamel { -namespace details { -typedef std::uintptr_t thread_id_t; -static const thread_id_t invalid_thread_id = 0; // Address can't be nullptr -static const thread_id_t invalid_thread_id2 = - 1; // Member accesses off a null pointer are also generally invalid. Plus it's not - // aligned. -inline thread_id_t thread_id() -{ - static MOODYCAMEL_THREADLOCAL int x; - return reinterpret_cast<thread_id_t>(&x); -} -} // namespace details -} // namespace moodycamel -#endif - -// Constexpr if -#ifndef MOODYCAMEL_CONSTEXPR_IF -#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L -#define MOODYCAMEL_CONSTEXPR_IF if constexpr -#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]] -#else -#define MOODYCAMEL_CONSTEXPR_IF if -#define MOODYCAMEL_MAYBE_UNUSED -#endif -#endif - -// Exceptions -#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED -#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || \ - (defined(__GNUC__) && defined(__EXCEPTIONS)) || \ - (!defined(_MSC_VER) && !defined(__GNUC__)) -#define MOODYCAMEL_EXCEPTIONS_ENABLED -#endif -#endif -#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED -#define MOODYCAMEL_TRY try -#define MOODYCAMEL_CATCH(...) catch (__VA_ARGS__) -#define MOODYCAMEL_RETHROW throw -#define MOODYCAMEL_THROW(expr) throw(expr) -#else -#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF(true) -#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF(false) -#define MOODYCAMEL_RETHROW -#define MOODYCAMEL_THROW(expr) -#endif - -#ifndef MOODYCAMEL_NOEXCEPT -#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED) -#define MOODYCAMEL_NOEXCEPT -#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true -#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true -#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800 -// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it -// shouldn't :-( We have to assume *all* non-trivial constructors may throw on VS2012! -#define MOODYCAMEL_NOEXCEPT _NOEXCEPT -#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) \ - (std::is_rvalue_reference<valueType>::value && \ - std::is_move_constructible<type>::value \ - ? std::is_trivially_move_constructible<type>::value \ - : std::is_trivially_copy_constructible<type>::value) -#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) \ - ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value \ - ? std::is_trivially_move_assignable<type>::value || \ - std::is_nothrow_move_assignable<type>::value \ - : std::is_trivially_copy_assignable<type>::value || \ - std::is_nothrow_copy_assignable<type>::value) && \ - MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)) -#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900 -#define MOODYCAMEL_NOEXCEPT _NOEXCEPT -#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) \ - (std::is_rvalue_reference<valueType>::value && \ - std::is_move_constructible<type>::value \ - ? std::is_trivially_move_constructible<type>::value || \ - std::is_nothrow_move_constructible<type>::value \ - : std::is_trivially_copy_constructible<type>::value || \ - std::is_nothrow_copy_constructible<type>::value) -#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) \ - ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value \ - ? std::is_trivially_move_assignable<type>::value || \ - std::is_nothrow_move_assignable<type>::value \ - : std::is_trivially_copy_assignable<type>::value || \ - std::is_nothrow_copy_assignable<type>::value) && \ - MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)) -#else -#define MOODYCAMEL_NOEXCEPT noexcept -#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr) -#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr) -#endif -#endif - -#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED -#ifdef MCDBGQ_USE_RELACY -#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED -#else -// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling -// bug: http://sourceforge.net/p/mingw-w64/bugs/445 g++ <=4.7 doesn't support thread_local -// either. Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to -// compile but it's unconfirmed to actually work -#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && \ - (!defined(__MINGW32__) && !defined(__MINGW64__) || \ - !defined(__WINPTHREADS_VERSION)) && \ - (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && \ - (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && \ - !defined(_M_ARM) && !defined(__aarch64__) -// Assume `thread_local` is fully supported in all other C++11 compilers/platforms -//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // always disabled for now since -// several users report having problems with it on -#endif -#endif -#endif - -// VS2012 doesn't support deleted functions. -// In this case, we declare the function normally but don't define it. A link error will -// be generated if the function is called. -#ifndef MOODYCAMEL_DELETE_FUNCTION -#if defined(_MSC_VER) && _MSC_VER < 1800 -#define MOODYCAMEL_DELETE_FUNCTION -#else -#define MOODYCAMEL_DELETE_FUNCTION = delete -#endif -#endif - -namespace moodycamel { -namespace details { -#ifndef MOODYCAMEL_ALIGNAS -// VS2013 doesn't support alignas or alignof, and align() requires a constant literal -#if defined(_MSC_VER) && _MSC_VER <= 1800 -#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment)) -#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj) -#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) \ - typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type -template <int Align, typename T> -struct Vs2013Aligned { -}; // default, unsupported alignment -template <typename T> -struct Vs2013Aligned<1, T> { - typedef __declspec(align(1)) T type; -}; -template <typename T> -struct Vs2013Aligned<2, T> { - typedef __declspec(align(2)) T type; -}; -template <typename T> -struct Vs2013Aligned<4, T> { - typedef __declspec(align(4)) T type; -}; -template <typename T> -struct Vs2013Aligned<8, T> { - typedef __declspec(align(8)) T type; -}; -template <typename T> -struct Vs2013Aligned<16, T> { - typedef __declspec(align(16)) T type; -}; -template <typename T> -struct Vs2013Aligned<32, T> { - typedef __declspec(align(32)) T type; -}; -template <typename T> -struct Vs2013Aligned<64, T> { - typedef __declspec(align(64)) T type; -}; -template <typename T> -struct Vs2013Aligned<128, T> { - typedef __declspec(align(128)) T type; -}; -template <typename T> -struct Vs2013Aligned<256, T> { - typedef __declspec(align(256)) T type; -}; -#else -template <typename T> -struct identity { - typedef T type; -}; -#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment) -#define MOODYCAMEL_ALIGNOF(obj) alignof(obj) -#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) \ - alignas(alignof(obj)) typename details::identity<T>::type -#endif -#endif -} // namespace details -} // namespace moodycamel - - -// TSAN can false report races in lock-free code. To enable TSAN to be used from projects -// that use this one, we can apply per-function compile-time suppression. See -// https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer -#define MOODYCAMEL_NO_TSAN -#if defined(__has_feature) -#if __has_feature(thread_sanitizer) -#undef MOODYCAMEL_NO_TSAN -#define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread"))) -#endif // TSAN -#endif // TSAN - -// Compiler-specific likely/unlikely hints -namespace moodycamel { -namespace details { -#if defined(__GNUC__) -static inline bool(likely)(bool x) { return __builtin_expect((x), true); } -static inline bool(unlikely)(bool x) { return __builtin_expect((x), false); } -#else -static inline bool(likely)(bool x) { return x; } -static inline bool(unlikely)(bool x) { return x; } -#endif -} // namespace details -} // namespace moodycamel - -#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG -#include "internal/concurrentqueue_internal_debug.h" -#endif - -namespace moodycamel { -namespace details { -template <typename T> -struct const_numeric_max { - static_assert(std::is_integral<T>::value, - "const_numeric_max can only be used with integers"); - static const T value = - std::numeric_limits<T>::is_signed - ? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1) - : static_cast<T>(-1); -}; - -#if defined(__GLIBCXX__) -typedef ::max_align_t std_max_align_t; // libstdc++ forgot to add it to std:: for a while -#else -typedef std::max_align_t - std_max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std:: -#endif - -// Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even -// while supporting 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this -// with our own union. See issue #64. -typedef union { - std_max_align_t x; - long long y; - void* z; -} max_align_t; -} // namespace details - -// Default traits for the ConcurrentQueue. To change some of the -// traits without re-implementing all of them, inherit from this -// struct and shadow the declarations you wish to be different; -// since the traits are used as a template type parameter, the -// shadowed declarations will be used where defined, and the defaults -// otherwise. -struct ConcurrentQueueDefaultTraits { - // General-purpose size type. std::size_t is strongly recommended. - typedef std::size_t size_t; - - // The type used for the enqueue and dequeue indices. Must be at least as - // large as size_t. Should be significantly larger than the number of elements - // you expect to hold at once, especially if you have a high turnover rate; - // for example, on 32-bit x86, if you expect to have over a hundred million - // elements or pump several million elements through your queue in a very - // short space of time, using a 32-bit type *may* trigger a race condition. - // A 64-bit int type is recommended in that case, and in practice will - // prevent a race condition no matter the usage of the queue. Note that - // whether the queue is lock-free with a 64-int type depends on the whether - // std::atomic<std::uint64_t> is lock-free, which is platform-specific. - typedef std::size_t index_t; - - // Internally, all elements are enqueued and dequeued from multi-element - // blocks; this is the smallest controllable unit. If you expect few elements - // but many producers, a smaller block size should be favoured. For few producers - // and/or many elements, a larger block size is preferred. A sane default - // is provided. Must be a power of 2. - static const size_t BLOCK_SIZE = 32; - - // For explicit producers (i.e. when using a producer token), the block is - // checked for being empty by iterating through a list of flags, one per element. - // For large block sizes, this is too inefficient, and switching to an atomic - // counter-based approach is faster. The switch is made for block sizes strictly - // larger than this threshold. - static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; - - // How many full blocks can be expected for a single explicit producer? This should - // reflect that number's maximum for optimal performance. Must be a power of 2. - static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32; - - // How many full blocks can be expected for a single implicit producer? This should - // reflect that number's maximum for optimal performance. Must be a power of 2. - static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32; - - // The initial size of the hash table mapping thread IDs to implicit producers. - // Note that the hash is resized every time it becomes half full. - // Must be a power of two, and either 0 or at least 1. If 0, implicit production - // (using the enqueue methods without an explicit producer token) is disabled. - static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32; - - // Controls the number of items that an explicit consumer (i.e. one with a token) - // must consume before it causes all consumers to rotate and move on to the next - // internal queue. - static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256; - - // The maximum number of elements (inclusive) that can be enqueued to a sub-queue. - // Enqueue operations that would cause this limit to be surpassed will fail. Note - // that this limit is enforced at the block level (for performance reasons), i.e. - // it's rounded up to the nearest block size. - static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value; - - // The number of times to spin before sleeping when waiting on a semaphore. - // Recommended values are on the order of 1000-10000 unless the number of - // consumer threads exceeds the number of idle cores (in which case try 0-100). - // Only affects instances of the BlockingConcurrentQueue. - static const int MAX_SEMA_SPINS = 10000; - - -#ifndef MCDBGQ_USE_RELACY - // Memory allocation can be customized if needed. - // malloc should return nullptr on failure, and handle alignment like std::malloc. -#if defined(malloc) || defined(free) - // Gah, this is 2015, stop defining macros that break standard code already! - // Work around malloc/free being special macros: - static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); } - static inline void WORKAROUND_free(void* ptr) { return free(ptr); } - static inline void*(malloc)(size_t size) { return WORKAROUND_malloc(size); } - static inline void(free)(void* ptr) { return WORKAROUND_free(ptr); } -#else - static inline void* malloc(size_t size) { return std::malloc(size); } - static inline void free(void* ptr) { return std::free(ptr); } -#endif -#else - // Debug versions when running under the Relacy race detector (ignore - // these in user code) - static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); } - static inline void free(void* ptr) { return rl::rl_free(ptr, $); } -#endif -}; - - -// When producing or consuming many elements, the most efficient way is to: -// 1) Use one of the bulk-operation methods of the queue with a token -// 2) Failing that, use the bulk-operation methods without a token -// 3) Failing that, create a token and use that with the single-item methods -// 4) Failing that, use the single-parameter methods of the queue -// Having said that, don't create tokens willy-nilly -- ideally there should be -// a maximum of one token per thread (of each kind). -struct ProducerToken; -struct ConsumerToken; - -template <typename T, typename Traits> -class ConcurrentQueue; -template <typename T, typename Traits> -class BlockingConcurrentQueue; -class ConcurrentQueueTests; - - -namespace details { -struct ConcurrentQueueProducerTypelessBase { - ConcurrentQueueProducerTypelessBase* next; - std::atomic<bool> inactive; - ProducerToken* token; - - ConcurrentQueueProducerTypelessBase() : next(nullptr), inactive(false), token(nullptr) - { - } -}; - -template <bool use32> -struct _hash_32_or_64 { - static inline std::uint32_t hash(std::uint32_t h) - { - // MurmurHash3 finalizer -- see - // https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp Since - // the thread ID is already unique, all we really want to do is propagate that - // uniqueness evenly across all the bits, so that we can use a subset of the bits - // while reducing collisions significantly - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - return h ^ (h >> 16); - } -}; -template <> -struct _hash_32_or_64<1> { - static inline std::uint64_t hash(std::uint64_t h) - { - h ^= h >> 33; - h *= 0xff51afd7ed558ccd; - h ^= h >> 33; - h *= 0xc4ceb9fe1a85ec53; - return h ^ (h >> 33); - } -}; -template <std::size_t size> -struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> { -}; - -static inline size_t hash_thread_id(thread_id_t id) -{ - static_assert(sizeof(thread_id_t) <= 8, - "Expected a platform where thread IDs are at most 64-bit values"); - return static_cast<size_t>( - hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash( - thread_id_converter<thread_id_t>::prehash(id))); -} - -template <typename T> -static inline bool circular_less_than(T a, T b) -{ -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4554) -#endif - static_assert( - std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, - "circular_less_than is intended to be used only with unsigned integer types"); - return static_cast<T>(a - b) > - static_cast<T>(static_cast<T>(1) << static_cast<T>(sizeof(T) * CHAR_BIT - 1)); -#ifdef _MSC_VER -#pragma warning(pop) -#endif -} - -template <typename U> -static inline char* align_for(char* ptr) -{ - const std::size_t alignment = std::alignment_of<U>::value; - return ptr + - (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment; -} - -template <typename T> -static inline T ceil_to_pow_2(T x) -{ - static_assert( - std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, - "ceil_to_pow_2 is intended to be used only with unsigned integer types"); - - // Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - --x; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - for (std::size_t i = 1; i < sizeof(T); i <<= 1) { - x |= x >> (i << 3); - } - ++x; - return x; -} - -template <typename T> -static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right) -{ - T temp = std::move(left.load(std::memory_order_relaxed)); - left.store(std::move(right.load(std::memory_order_relaxed)), - std::memory_order_relaxed); - right.store(std::move(temp), std::memory_order_relaxed); -} - -template <typename T> -static inline T const& nomove(T const& x) -{ - return x; -} - -template <bool Enable> -struct nomove_if { - template <typename T> - static inline T const& eval(T const& x) - { - return x; - } -}; - -template <> -struct nomove_if<false> { - template <typename U> - static inline auto eval(U&& x) -> decltype(std::forward<U>(x)) - { - return std::forward<U>(x); - } -}; - -template <typename It> -static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it) -{ - return *it; -} - -#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || \ - (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) -template <typename T> -struct is_trivially_destructible : std::is_trivially_destructible<T> { -}; -#else -template <typename T> -struct is_trivially_destructible : std::has_trivial_destructor<T> { -}; -#endif - -#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED -#ifdef MCDBGQ_USE_RELACY -typedef RelacyThreadExitListener ThreadExitListener; -typedef RelacyThreadExitNotifier ThreadExitNotifier; -#else -struct ThreadExitListener { - typedef void (*callback_t)(void*); - callback_t callback; - void* userData; - - ThreadExitListener* next; // reserved for use by the ThreadExitNotifier -}; - - -class ThreadExitNotifier -{ -public: - static void subscribe(ThreadExitListener* listener) - { - auto& tlsInst = instance(); - listener->next = tlsInst.tail; - tlsInst.tail = listener; - } - - static void unsubscribe(ThreadExitListener* listener) - { - auto& tlsInst = instance(); - ThreadExitListener** prev = &tlsInst.tail; - for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) { - if (ptr == listener) { - *prev = ptr->next; - break; - } - prev = &ptr->next; - } - } - -private: - ThreadExitNotifier() : tail(nullptr) {} - ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; - ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; - - ~ThreadExitNotifier() - { - // This thread is about to exit, let everyone know! - assert(this == &instance() && - "If this assert fails, you likely have a buggy compiler! Change the " - "preprocessor conditions such that " - "MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined."); - for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) { - ptr->callback(ptr->userData); - } - } - - // Thread-local - static inline ThreadExitNotifier& instance() - { - static thread_local ThreadExitNotifier notifier; - return notifier; - } - -private: - ThreadExitListener* tail; -}; -#endif -#endif - -template <typename T> -struct static_is_lock_free_num { - enum { value = 0 }; -}; -template <> -struct static_is_lock_free_num<signed char> { - enum { value = ATOMIC_CHAR_LOCK_FREE }; -}; -template <> -struct static_is_lock_free_num<short> { - enum { value = ATOMIC_SHORT_LOCK_FREE }; -}; -template <> -struct static_is_lock_free_num<int> { - enum { value = ATOMIC_INT_LOCK_FREE }; -}; -template <> -struct static_is_lock_free_num<long> { - enum { value = ATOMIC_LONG_LOCK_FREE }; -}; -template <> -struct static_is_lock_free_num<long long> { - enum { value = ATOMIC_LLONG_LOCK_FREE }; -}; -template <typename T> -struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type> { -}; -template <> -struct static_is_lock_free<bool> { - enum { value = ATOMIC_BOOL_LOCK_FREE }; -}; -template <typename U> -struct static_is_lock_free<U*> { - enum { value = ATOMIC_POINTER_LOCK_FREE }; -}; -} // namespace details - - -struct ProducerToken { - template <typename T, typename Traits> - explicit ProducerToken(ConcurrentQueue<T, Traits>& queue); - - template <typename T, typename Traits> - explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue); - - ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT : producer(other.producer) - { - other.producer = nullptr; - if (producer != nullptr) { - producer->token = this; - } - } - - inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT - { - swap(other); - return *this; - } - - void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT - { - std::swap(producer, other.producer); - if (producer != nullptr) { - producer->token = this; - } - if (other.producer != nullptr) { - other.producer->token = &other; - } - } - - // A token is always valid unless: - // 1) Memory allocation failed during construction - // 2) It was moved via the move constructor - // (Note: assignment does a swap, leaving both potentially valid) - // 3) The associated queue was destroyed - // Note that if valid() returns true, that only indicates - // that the token is valid for use with a specific queue, - // but not which one; that's up to the user to track. - inline bool valid() const { return producer != nullptr; } - - ~ProducerToken() - { - if (producer != nullptr) { - producer->token = nullptr; - producer->inactive.store(true, std::memory_order_release); - } - } - - // Disable copying and assignment - ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; - ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; - -private: - template <typename T, typename Traits> - friend class ConcurrentQueue; - friend class ConcurrentQueueTests; - -protected: - details::ConcurrentQueueProducerTypelessBase* producer; -}; - - -struct ConsumerToken { - template <typename T, typename Traits> - explicit ConsumerToken(ConcurrentQueue<T, Traits>& q); - - template <typename T, typename Traits> - explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q); - - ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT - : initialOffset(other.initialOffset), - lastKnownGlobalOffset(other.lastKnownGlobalOffset), - itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), - currentProducer(other.currentProducer), - desiredProducer(other.desiredProducer) - { - } - - inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT - { - swap(other); - return *this; - } - - void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT - { - std::swap(initialOffset, other.initialOffset); - std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset); - std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent); - std::swap(currentProducer, other.currentProducer); - std::swap(desiredProducer, other.desiredProducer); - } - - // Disable copying and assignment - ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; - ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; - -private: - template <typename T, typename Traits> - friend class ConcurrentQueue; - friend class ConcurrentQueueTests; - -private: // but shared with ConcurrentQueue - std::uint32_t initialOffset; - std::uint32_t lastKnownGlobalOffset; - std::uint32_t itemsConsumedFromCurrent; - details::ConcurrentQueueProducerTypelessBase* currentProducer; - details::ConcurrentQueueProducerTypelessBase* desiredProducer; -}; - -// Need to forward-declare this swap because it's in a namespace. -// See -// http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces -template <typename T, typename Traits> -inline void -swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, - typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT; - - -template <typename T, typename Traits = ConcurrentQueueDefaultTraits> -class ConcurrentQueue -{ -public: - typedef ::moodycamel::ProducerToken producer_token_t; - typedef ::moodycamel::ConsumerToken consumer_token_t; - - typedef typename Traits::index_t index_t; - typedef typename Traits::size_t size_t; - - static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE); - static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = - static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD); - static const size_t EXPLICIT_INITIAL_INDEX_SIZE = - static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE); - static const size_t IMPLICIT_INITIAL_INDEX_SIZE = - static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE); - static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = - static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE); - static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = - static_cast<std::uint32_t>( - Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE); -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4307) // + integral constant overflow (that's what the ternary - // expression is for!) -#pragma warning(disable : 4309) // static_cast: Truncation of constant value -#endif - static const size_t MAX_SUBQUEUE_SIZE = - (details::const_numeric_max<size_t>::value - - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < - BLOCK_SIZE) - ? details::const_numeric_max<size_t>::value - : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / - BLOCK_SIZE * BLOCK_SIZE); -#ifdef _MSC_VER -#pragma warning(pop) -#endif - - static_assert(!std::numeric_limits<size_t>::is_signed && - std::is_integral<size_t>::value, - "Traits::size_t must be an unsigned integral type"); - static_assert(!std::numeric_limits<index_t>::is_signed && - std::is_integral<index_t>::value, - "Traits::index_t must be an unsigned integral type"); - static_assert(sizeof(index_t) >= sizeof(size_t), - "Traits::index_t must be at least as wide as Traits::size_t"); - static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), - "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)"); - static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && - !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & - (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), - "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 " - "(and greater than 1)"); - static_assert( - (EXPLICIT_INITIAL_INDEX_SIZE > 1) && - !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), - "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); - static_assert( - (IMPLICIT_INITIAL_INDEX_SIZE > 1) && - !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), - "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); - static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || - !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & - (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), - "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2"); - static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || - INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, - "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 " - "to disable implicit enqueueing)"); - -public: - // Creates a queue with at least `capacity` element slots; note that the - // actual number of elements that can be inserted without additional memory - // allocation depends on the number of producers and the block size (e.g. if - // the block size is equal to `capacity`, only a single block will be allocated - // up-front, which means only a single producer will be able to enqueue elements - // without an extra allocation -- blocks aren't shared between producers). - // This method is not thread safe -- it is up to the user to ensure that the - // queue is fully constructed before it starts being used by other threads (this - // includes making the memory effects of construction visible, possibly with a - // memory barrier). - explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE) - : producerListTail(nullptr), - producerCount(0), - initialBlockPoolIndex(0), - nextExplicitConsumerId(0), - globalExplicitConsumerOffset(0) - { - implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); - populate_initial_implicit_producer_hash(); - populate_initial_block_list(capacity / BLOCK_SIZE + - ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1)); - -#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG - // Track all the producers using a fully-resolved typed list for - // each kind; this makes it possible to debug them starting from - // the root queue object (otherwise wacky casts are needed that - // don't compile in the debugger's expression evaluator). - explicitProducers.store(nullptr, std::memory_order_relaxed); - implicitProducers.store(nullptr, std::memory_order_relaxed); -#endif - } - - // Computes the correct amount of pre-allocated blocks for you based - // on the minimum number of elements you want available at any given - // time, and the maximum concurrent number of each type of producer. - ConcurrentQueue(size_t minCapacity, - size_t maxExplicitProducers, - size_t maxImplicitProducers) - : producerListTail(nullptr), - producerCount(0), - initialBlockPoolIndex(0), - nextExplicitConsumerId(0), - globalExplicitConsumerOffset(0) - { - implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); - populate_initial_implicit_producer_hash(); - size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * - (maxExplicitProducers + 1) + - 2 * (maxExplicitProducers + maxImplicitProducers); - populate_initial_block_list(blocks); - -#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG - explicitProducers.store(nullptr, std::memory_order_relaxed); - implicitProducers.store(nullptr, std::memory_order_relaxed); -#endif - } - - // Note: The queue should not be accessed concurrently while it's - // being deleted. It's up to the user to synchronize this. - // This method is not thread safe. - ~ConcurrentQueue() - { - // Destroy producers - auto ptr = producerListTail.load(std::memory_order_relaxed); - while (ptr != nullptr) { - auto next = ptr->next_prod(); - if (ptr->token != nullptr) { - ptr->token->producer = nullptr; - } - destroy(ptr); - ptr = next; - } - - // Destroy implicit producer hash tables - MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) - { - auto hash = implicitProducerHash.load(std::memory_order_relaxed); - while (hash != nullptr) { - auto prev = hash->prev; - if (prev != nullptr) { // The last hash is part of this object and was not - // allocated dynamically - for (size_t i = 0; i != hash->capacity; ++i) { - hash->entries[i].~ImplicitProducerKVP(); - } - hash->~ImplicitProducerHash(); - (Traits::free)(hash); - } - hash = prev; - } - } - - // Destroy global free list - auto block = freeList.head_unsafe(); - while (block != nullptr) { - auto next = block->freeListNext.load(std::memory_order_relaxed); - if (block->dynamicallyAllocated) { - destroy(block); - } - block = next; - } - - // Destroy initial free list - destroy_array(initialBlockPool, initialBlockPoolSize); - } - - // Disable copying and copy assignment - ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; - ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; - - // Moving is supported, but note that it is *not* a thread-safe operation. - // Nobody can use the queue while it's being moved, and the memory effects - // of that move must be propagated to other threads before they can use it. - // Note: When a queue is moved, its tokens are still valid but can only be - // used with the destination queue (i.e. semantically they are moved along - // with the queue itself). - ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT - : producerListTail(other.producerListTail.load(std::memory_order_relaxed)), - producerCount(other.producerCount.load(std::memory_order_relaxed)), - initialBlockPoolIndex( - other.initialBlockPoolIndex.load(std::memory_order_relaxed)), - initialBlockPool(other.initialBlockPool), - initialBlockPoolSize(other.initialBlockPoolSize), - freeList(std::move(other.freeList)), - nextExplicitConsumerId( - other.nextExplicitConsumerId.load(std::memory_order_relaxed)), - globalExplicitConsumerOffset( - other.globalExplicitConsumerOffset.load(std::memory_order_relaxed)) - { - // Move the other one into this, and leave the other one as an empty queue - implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); - populate_initial_implicit_producer_hash(); - swap_implicit_producer_hashes(other); - - other.producerListTail.store(nullptr, std::memory_order_relaxed); - other.producerCount.store(0, std::memory_order_relaxed); - other.nextExplicitConsumerId.store(0, std::memory_order_relaxed); - other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed); - -#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG - explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), - std::memory_order_relaxed); - other.explicitProducers.store(nullptr, std::memory_order_relaxed); - implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), - std::memory_order_relaxed); - other.implicitProducers.store(nullptr, std::memory_order_relaxed); -#endif - - other.initialBlockPoolIndex.store(0, std::memory_order_relaxed); - other.initialBlockPoolSize = 0; - other.initialBlockPool = nullptr; - - reown_producers(); - } - - inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT - { - return swap_internal(other); - } - - // Swaps this queue's state with the other's. Not thread-safe. - // Swapping two queues does not invalidate their tokens, however - // the tokens that were created for one queue must be used with - // only the swapped queue (i.e. the tokens are tied to the - // queue's movable state, not the object itself). - inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT { swap_internal(other); } - -private: - ConcurrentQueue& swap_internal(ConcurrentQueue& other) - { - if (this == &other) { - return *this; - } - - details::swap_relaxed(producerListTail, other.producerListTail); - details::swap_relaxed(producerCount, other.producerCount); - details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex); - std::swap(initialBlockPool, other.initialBlockPool); - std::swap(initialBlockPoolSize, other.initialBlockPoolSize); - freeList.swap(other.freeList); - details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId); - details::swap_relaxed(globalExplicitConsumerOffset, - other.globalExplicitConsumerOffset); - - swap_implicit_producer_hashes(other); - - reown_producers(); - other.reown_producers(); - -#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG - details::swap_relaxed(explicitProducers, other.explicitProducers); - details::swap_relaxed(implicitProducers, other.implicitProducers); -#endif - - return *this; - } - -public: - // Enqueues a single item (by copying it). - // Allocates memory if required. Only fails if memory allocation fails (or implicit - // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, - // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(T const& item) - { - MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - else return inner_enqueue<CanAlloc>(item); - } - - // Enqueues a single item (by moving it, if possible). - // Allocates memory if required. Only fails if memory allocation fails (or implicit - // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, - // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(T&& item) - { - MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - else return inner_enqueue<CanAlloc>(std::move(item)); - } - - // Enqueues a single item (by copying it) using an explicit producer token. - // Allocates memory if required. Only fails if memory allocation fails (or - // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(producer_token_t const& token, T const& item) - { - return inner_enqueue<CanAlloc>(token, item); - } - - // Enqueues a single item (by moving it, if possible) using an explicit producer - // token. Allocates memory if required. Only fails if memory allocation fails (or - // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Thread-safe. - inline bool enqueue(producer_token_t const& token, T&& item) - { - return inner_enqueue<CanAlloc>(token, std::move(item)); - } - - // Enqueues several items. - // Allocates memory if required. Only fails if memory allocation fails (or - // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Note: Use std::make_move_iterator if the elements should be moved instead of - // copied. Thread-safe. - template <typename It> - bool enqueue_bulk(It itemFirst, size_t count) - { - MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - else return inner_enqueue_bulk<CanAlloc>(itemFirst, count); - } - - // Enqueues several items using an explicit producer token. - // Allocates memory if required. Only fails if memory allocation fails - // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). - // Note: Use std::make_move_iterator if the elements should be moved - // instead of copied. - // Thread-safe. - template <typename It> - bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) - { - return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count); - } - - // Enqueues a single item (by copying it). - // Does not allocate memory. Fails if not enough room to enqueue (or implicit - // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - // is 0). - // Thread-safe. - inline bool try_enqueue(T const& item) - { - MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - else return inner_enqueue<CannotAlloc>(item); - } - - // Enqueues a single item (by moving it, if possible). - // Does not allocate memory (except for one-time implicit producer). - // Fails if not enough room to enqueue (or implicit production is - // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). - // Thread-safe. - inline bool try_enqueue(T&& item) - { - MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - else return inner_enqueue<CannotAlloc>(std::move(item)); - } - - // Enqueues a single item (by copying it) using an explicit producer token. - // Does not allocate memory. Fails if not enough room to enqueue. - // Thread-safe. - inline bool try_enqueue(producer_token_t const& token, T const& item) - { - return inner_enqueue<CannotAlloc>(token, item); - } - - // Enqueues a single item (by moving it, if possible) using an explicit producer - // token. Does not allocate memory. Fails if not enough room to enqueue. Thread-safe. - inline bool try_enqueue(producer_token_t const& token, T&& item) - { - return inner_enqueue<CannotAlloc>(token, std::move(item)); - } - - // Enqueues several items. - // Does not allocate memory (except for one-time implicit producer). - // Fails if not enough room to enqueue (or implicit production is - // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). - // Note: Use std::make_move_iterator if the elements should be moved - // instead of copied. - // Thread-safe. - template <typename It> - bool try_enqueue_bulk(It itemFirst, size_t count) - { - MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count); - } - - // Enqueues several items using an explicit producer token. - // Does not allocate memory. Fails if not enough room to enqueue. - // Note: Use std::make_move_iterator if the elements should be moved - // instead of copied. - // Thread-safe. - template <typename It> - bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) - { - return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count); - } - - - // Attempts to dequeue from the queue. - // Returns false if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template <typename U> - bool try_dequeue(U& item) - { - // Instead of simply trying each producer in turn (which could cause needless - // contention on the first producer), we score them heuristically. - size_t nonEmptyCount = 0; - ProducerBase* best = nullptr; - size_t bestSize = 0; - for (auto ptr = producerListTail.load(std::memory_order_acquire); - nonEmptyCount < 3 && ptr != nullptr; - ptr = ptr->next_prod()) { - auto size = ptr->size_approx(); - if (size > 0) { - if (size > bestSize) { - bestSize = size; - best = ptr; - } - ++nonEmptyCount; - } - } - - // If there was at least one non-empty queue but it appears empty at the time - // we try to dequeue from it, we need to make sure every queue's been tried - if (nonEmptyCount > 0) { - if ((details::likely)(best->dequeue(item))) { - return true; - } - for (auto ptr = producerListTail.load(std::memory_order_acquire); - ptr != nullptr; - ptr = ptr->next_prod()) { - if (ptr != best && ptr->dequeue(item)) { - return true; - } - } - } - return false; - } - - // Attempts to dequeue from the queue. - // Returns false if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // This differs from the try_dequeue(item) method in that this one does - // not attempt to reduce contention by interleaving the order that producer - // streams are dequeued from. So, using this method can reduce overall throughput - // under contention, but will give more predictable results in single-threaded - // consumer scenarios. This is mostly only useful for internal unit tests. - // Never allocates. Thread-safe. - template <typename U> - bool try_dequeue_non_interleaved(U& item) - { - for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; - ptr = ptr->next_prod()) { - if (ptr->dequeue(item)) { - return true; - } - } - return false; - } - - // Attempts to dequeue from the queue using an explicit consumer token. - // Returns false if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template <typename U> - bool try_dequeue(consumer_token_t& token, U& item) - { - // The idea is roughly as follows: - // Every 256 items from one producer, make everyone rotate (increase the global - // offset) -> this means the highest efficiency consumer dictates the rotation - // speed of everyone else, more or less If you see that the global offset has - // changed, you must reset your consumption counter and move to your designated - // place If there's no items where you're supposed to be, keep moving until you - // find a producer with some items If the global offset has not changed but you've - // run out of items to consume, move over from your current position until you - // find an producer with something in it - - if (token.desiredProducer == nullptr || - token.lastKnownGlobalOffset != - globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { - if (!update_current_producer_after_rotation(token)) { - return false; - } - } - - // If there was at least one non-empty queue but it appears empty at the time - // we try to dequeue from it, we need to make sure every queue's been tried - if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) { - if (++token.itemsConsumedFromCurrent == - EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { - globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); - } - return true; - } - - auto tail = producerListTail.load(std::memory_order_acquire); - auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod(); - if (ptr == nullptr) { - ptr = tail; - } - while (ptr != static_cast<ProducerBase*>(token.currentProducer)) { - if (ptr->dequeue(item)) { - token.currentProducer = ptr; - token.itemsConsumedFromCurrent = 1; - return true; - } - ptr = ptr->next_prod(); - if (ptr == nullptr) { - ptr = tail; - } - } - return false; - } - - // Attempts to dequeue several elements from the queue. - // Returns the number of items actually dequeued. - // Returns 0 if all producer streams appeared empty at the time they - // were checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template <typename It> - size_t try_dequeue_bulk(It itemFirst, size_t max) - { - size_t count = 0; - for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; - ptr = ptr->next_prod()) { - count += ptr->dequeue_bulk(itemFirst, max - count); - if (count == max) { - break; - } - } - return count; - } - - // Attempts to dequeue several elements from the queue using an explicit consumer - // token. Returns the number of items actually dequeued. Returns 0 if all producer - // streams appeared empty at the time they were checked (so, the queue is likely but - // not guaranteed to be empty). Never allocates. Thread-safe. - template <typename It> - size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) - { - if (token.desiredProducer == nullptr || - token.lastKnownGlobalOffset != - globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { - if (!update_current_producer_after_rotation(token)) { - return 0; - } - } - - size_t count = static_cast<ProducerBase*>(token.currentProducer) - ->dequeue_bulk(itemFirst, max); - if (count == max) { - if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= - EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { - globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); - } - return max; - } - token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count); - max -= count; - - auto tail = producerListTail.load(std::memory_order_acquire); - auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod(); - if (ptr == nullptr) { - ptr = tail; - } - while (ptr != static_cast<ProducerBase*>(token.currentProducer)) { - auto dequeued = ptr->dequeue_bulk(itemFirst, max); - count += dequeued; - if (dequeued != 0) { - token.currentProducer = ptr; - token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued); - } - if (dequeued == max) { - break; - } - max -= dequeued; - ptr = ptr->next_prod(); - if (ptr == nullptr) { - ptr = tail; - } - } - return count; - } - - - // Attempts to dequeue from a specific producer's inner queue. - // If you happen to know which producer you want to dequeue from, this - // is significantly faster than using the general-case try_dequeue methods. - // Returns false if the producer's queue appeared empty at the time it - // was checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template <typename U> - inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item) - { - return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item); - } - - // Attempts to dequeue several elements from a specific producer's inner queue. - // Returns the number of items actually dequeued. - // If you happen to know which producer you want to dequeue from, this - // is significantly faster than using the general-case try_dequeue methods. - // Returns 0 if the producer's queue appeared empty at the time it - // was checked (so, the queue is likely but not guaranteed to be empty). - // Never allocates. Thread-safe. - template <typename It> - inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, - It itemFirst, - size_t max) - { - return static_cast<ExplicitProducer*>(producer.producer) - ->dequeue_bulk(itemFirst, max); - } - - - // Returns an estimate of the total number of elements currently in the queue. This - // estimate is only accurate if the queue has completely stabilized before it is - // called (i.e. all enqueue and dequeue operations have completed and their memory - // effects are visible on the calling thread, and no further operations start while - // this method is being called). Thread-safe. - size_t size_approx() const - { - size_t size = 0; - for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; - ptr = ptr->next_prod()) { - size += ptr->size_approx(); - } - return size; - } - - - // Returns true if the underlying atomic variables used by - // the queue are lock-free (they should be on most platforms). - // Thread-safe. - static bool is_lock_free() - { - return details::static_is_lock_free<bool>::value == 2 && - details::static_is_lock_free<size_t>::value == 2 && - details::static_is_lock_free<std::uint32_t>::value == 2 && - details::static_is_lock_free<index_t>::value == 2 && - details::static_is_lock_free<void*>::value == 2 && - details::static_is_lock_free<typename details::thread_id_converter< - details::thread_id_t>::thread_id_numeric_size_t>::value == 2; - } - - -private: - friend struct ProducerToken; - friend struct ConsumerToken; - struct ExplicitProducer; - friend struct ExplicitProducer; - struct ImplicitProducer; - friend struct ImplicitProducer; - friend class ConcurrentQueueTests; - - enum AllocationMode { CanAlloc, CannotAlloc }; - - - /////////////////////////////// - // Queue methods - /////////////////////////////// - - template <AllocationMode canAlloc, typename U> - inline bool inner_enqueue(producer_token_t const& token, U&& element) - { - return static_cast<ExplicitProducer*>(token.producer) - ->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>( - std::forward<U>(element)); - } - - template <AllocationMode canAlloc, typename U> - inline bool inner_enqueue(U&& element) - { - auto producer = get_or_add_implicit_producer(); - return producer == nullptr - ? false - : producer - ->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>( - std::forward<U>(element)); - } - - template <AllocationMode canAlloc, typename It> - inline bool - inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) - { - return static_cast<ExplicitProducer*>(token.producer) - ->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>( - itemFirst, count); - } - - template <AllocationMode canAlloc, typename It> - inline bool inner_enqueue_bulk(It itemFirst, size_t count) - { - auto producer = get_or_add_implicit_producer(); - return producer == nullptr - ? false - : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk< - canAlloc>(itemFirst, count); - } - - inline bool update_current_producer_after_rotation(consumer_token_t& token) - { - // Ah, there's been a rotation, figure out where we should be! - auto tail = producerListTail.load(std::memory_order_acquire); - if (token.desiredProducer == nullptr && tail == nullptr) { - return false; - } - auto prodCount = producerCount.load(std::memory_order_relaxed); - auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed); - if ((details::unlikely)(token.desiredProducer == nullptr)) { - // Aha, first time we're dequeueing anything. - // Figure out our local position - // Note: offset is from start, not end, but we're traversing from end -- - // subtract from count first - std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount); - token.desiredProducer = tail; - for (std::uint32_t i = 0; i != offset; ++i) { - token.desiredProducer = - static_cast<ProducerBase*>(token.desiredProducer)->next_prod(); - if (token.desiredProducer == nullptr) { - token.desiredProducer = tail; - } - } - } - - std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset; - if (delta >= prodCount) { - delta = delta % prodCount; - } - for (std::uint32_t i = 0; i != delta; ++i) { - token.desiredProducer = - static_cast<ProducerBase*>(token.desiredProducer)->next_prod(); - if (token.desiredProducer == nullptr) { - token.desiredProducer = tail; - } - } - - token.lastKnownGlobalOffset = globalOffset; - token.currentProducer = token.desiredProducer; - token.itemsConsumedFromCurrent = 0; - return true; - } - - - /////////////////////////// - // Free list - /////////////////////////// - - template <typename N> - struct FreeListNode { - FreeListNode() : freeListRefs(0), freeListNext(nullptr) {} - - std::atomic<std::uint32_t> freeListRefs; - std::atomic<N*> freeListNext; - }; - - // A simple CAS-based lock-free free list. Not the fastest thing in the world under - // heavy contention, but simple and correct (assuming nodes are never freed until - // after the free list is destroyed), and fairly speedy under low contention. - template <typename N> // N must inherit FreeListNode or have the same fields (and - // initialization of them) - struct FreeList { - FreeList() : freeListHead(nullptr) {} - FreeList(FreeList&& other) - : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) - { - other.freeListHead.store(nullptr, std::memory_order_relaxed); - } - void swap(FreeList& other) - { - details::swap_relaxed(freeListHead, other.freeListHead); - } - - FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; - FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; - - inline void add(N* node) - { -#ifdef MCDBGQ_NOLOCKFREE_FREELIST - debug::DebugLock lock(mutex); -#endif - // We know that the should-be-on-freelist bit is 0 at this point, so it's safe - // to set it using a fetch_add - if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, - std::memory_order_acq_rel) == 0) { - // Oh look! We were the last ones referencing this node, and we know - // we want to add it to the free list, so let's do it! - add_knowing_refcount_is_zero(node); - } - } - - inline N* try_get() - { -#ifdef MCDBGQ_NOLOCKFREE_FREELIST - debug::DebugLock lock(mutex); -#endif - auto head = freeListHead.load(std::memory_order_acquire); - while (head != nullptr) { - auto prevHead = head; - auto refs = head->freeListRefs.load(std::memory_order_relaxed); - if ((refs & REFS_MASK) == 0 || - !head->freeListRefs.compare_exchange_strong( - refs, - refs + 1, - std::memory_order_acquire, - std::memory_order_relaxed)) { - head = freeListHead.load(std::memory_order_acquire); - continue; - } - - // Good, reference count has been incremented (it wasn't at zero), which - // means we can read the next and not worry about it changing between now - // and the time we do the CAS - auto next = head->freeListNext.load(std::memory_order_relaxed); - if (freeListHead.compare_exchange_strong(head, - next, - std::memory_order_acquire, - std::memory_order_relaxed)) { - // Yay, got the node. This means it was on the list, which means - // shouldBeOnFreeList must be false no matter the refcount (because - // nobody else knows it's been taken off yet, it can't have been put - // back on). - assert((head->freeListRefs.load(std::memory_order_relaxed) & - SHOULD_BE_ON_FREELIST) == 0); - - // Decrease refcount twice, once for our ref, and once for the list's - // ref - head->freeListRefs.fetch_sub(2, std::memory_order_release); - return head; - } - - // OK, the head must have changed on us, but we still need to decrease the - // refcount we increased. Note that we don't need to release any memory - // effects, but we do need to ensure that the reference count decrement - // happens-after the CAS on the head. - refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel); - if (refs == SHOULD_BE_ON_FREELIST + 1) { - add_knowing_refcount_is_zero(prevHead); - } - } - - return nullptr; - } - - // Useful for traversing the list when there's no contention (e.g. to destroy - // remaining nodes) - N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); } - - private: - inline void add_knowing_refcount_is_zero(N* node) - { - // Since the refcount is zero, and nobody can increase it once it's zero - // (except us, and we run only one copy of this method per node at a time, - // i.e. the single thread case), then we know we can safely change the next - // pointer of the node; however, once the refcount is back above zero, then - // other threads could increase it (happens under heavy contention, when the - // refcount goes to zero in between a load and a refcount increment of a node - // in try_get, then back up to something non-zero, then the refcount increment - // is done by the other thread) -- so, if the CAS to add the node to the - // actual list fails, decrease the refcount and leave the add operation to the - // next thread who puts the refcount back at zero (which could be us, hence - // the loop). - auto head = freeListHead.load(std::memory_order_relaxed); - while (true) { - node->freeListNext.store(head, std::memory_order_relaxed); - node->freeListRefs.store(1, std::memory_order_release); - if (!freeListHead.compare_exchange_strong(head, - node, - std::memory_order_release, - std::memory_order_relaxed)) { - // Hmm, the add failed, but we can only try again when the refcount - // goes back to zero - if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, - std::memory_order_release) == 1) { - continue; - } - } - return; - } - } - - private: - // Implemented like a stack, but where node order doesn't matter (nodes are - // inserted out of order under contention) - std::atomic<N*> freeListHead; - - static const std::uint32_t REFS_MASK = 0x7FFFFFFF; - static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000; - -#ifdef MCDBGQ_NOLOCKFREE_FREELIST - debug::DebugMutex mutex; -#endif - }; - - - /////////////////////////// - // Block - /////////////////////////// - - enum InnerQueueContext { implicit_context = 0, explicit_context = 1 }; - - struct Block { - Block() - : next(nullptr), - elementsCompletelyDequeued(0), - freeListRefs(0), - freeListNext(nullptr), - shouldBeOnFreeList(false), - dynamicallyAllocated(true) - { -#ifdef MCDBGQ_TRACKMEM - owner = nullptr; -#endif - } - - template <InnerQueueContext context> - inline bool is_empty() const - { - MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && - BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) - { - // Check flags - for (size_t i = 0; i < BLOCK_SIZE; ++i) { - if (!emptyFlags[i].load(std::memory_order_relaxed)) { - return false; - } - } - - // Aha, empty; make sure we have all other memory effects that happened - // before the empty flags were set - std::atomic_thread_fence(std::memory_order_acquire); - return true; - } - else - { - // Check counter - if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == - BLOCK_SIZE) { - std::atomic_thread_fence(std::memory_order_acquire); - return true; - } - assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= - BLOCK_SIZE); - return false; - } - } - - // Returns true if the block is now empty (does not apply in explicit context) - template <InnerQueueContext context> - inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i) - { - MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && - BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) - { - // Set flag - assert(!emptyFlags[BLOCK_SIZE - 1 - - static_cast<size_t>( - i & static_cast<index_t>(BLOCK_SIZE - 1))] - .load(std::memory_order_relaxed)); - emptyFlags[BLOCK_SIZE - 1 - - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))] - .store(true, std::memory_order_release); - return false; - } - else - { - // Increment counter - auto prevVal = - elementsCompletelyDequeued.fetch_add(1, std::memory_order_release); - assert(prevVal < BLOCK_SIZE); - return prevVal == BLOCK_SIZE - 1; - } - } - - // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and - // count > 0). Returns true if the block is now empty (does not apply in explicit - // context). - template <InnerQueueContext context> - inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count) - { - MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && - BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) - { - // Set flags - std::atomic_thread_fence(std::memory_order_release); - i = BLOCK_SIZE - 1 - - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - - count + 1; - for (size_t j = 0; j != count; ++j) { - assert(!emptyFlags[i + j].load(std::memory_order_relaxed)); - emptyFlags[i + j].store(true, std::memory_order_relaxed); - } - return false; - } - else - { - // Increment counter - auto prevVal = elementsCompletelyDequeued.fetch_add( - count, std::memory_order_release); - assert(prevVal + count <= BLOCK_SIZE); - return prevVal + count == BLOCK_SIZE; - } - } - - template <InnerQueueContext context> - inline void set_all_empty() - { - MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && - BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) - { - // Set all flags - for (size_t i = 0; i != BLOCK_SIZE; ++i) { - emptyFlags[i].store(true, std::memory_order_relaxed); - } - } - else - { - // Reset counter - elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed); - } - } - - template <InnerQueueContext context> - inline void reset_empty() - { - MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && - BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) - { - // Reset flags - for (size_t i = 0; i != BLOCK_SIZE; ++i) { - emptyFlags[i].store(false, std::memory_order_relaxed); - } - } - else - { - // Reset counter - elementsCompletelyDequeued.store(0, std::memory_order_relaxed); - } - } - - inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT - { - return static_cast<T*>(static_cast<void*>(elements)) + - static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); - } - inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT - { - return static_cast<T const*>(static_cast<void const*>(elements)) + - static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); - } - - private: - static_assert(std::alignment_of<T>::value <= sizeof(T), - "The queue does not support types with an alignment greater than " - "their size at this time"); - MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements; - - public: - Block* next; - std::atomic<size_t> elementsCompletelyDequeued; - std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - ? BLOCK_SIZE - : 1]; - - public: - std::atomic<std::uint32_t> freeListRefs; - std::atomic<Block*> freeListNext; - std::atomic<bool> shouldBeOnFreeList; - bool dynamicallyAllocated; // Perhaps a better name for this would be - // 'isNotPartOfInitialBlockPool' - -#ifdef MCDBGQ_TRACKMEM - void* owner; -#endif - }; - static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value, - "Internal error: Blocks must be at least as aligned as the type they " - "are wrapping"); - - -#ifdef MCDBGQ_TRACKMEM -public: - struct MemStats; - -private: -#endif - - /////////////////////////// - // Producer base - /////////////////////////// - - struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase { - ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) - : tailIndex(0), - headIndex(0), - dequeueOptimisticCount(0), - dequeueOvercommit(0), - tailBlock(nullptr), - isExplicit(isExplicit_), - parent(parent_) - { - } - - virtual ~ProducerBase() {} - - template <typename U> - inline bool dequeue(U& element) - { - if (isExplicit) { - return static_cast<ExplicitProducer*>(this)->dequeue(element); - } - else { - return static_cast<ImplicitProducer*>(this)->dequeue(element); - } - } - - template <typename It> - inline size_t dequeue_bulk(It& itemFirst, size_t max) - { - if (isExplicit) { - return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max); - } - else { - return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max); - } - } - - inline ProducerBase* next_prod() const - { - return static_cast<ProducerBase*>(next); - } - - inline size_t size_approx() const - { - auto tail = tailIndex.load(std::memory_order_relaxed); - auto head = headIndex.load(std::memory_order_relaxed); - return details::circular_less_than(head, tail) - ? static_cast<size_t>(tail - head) - : 0; - } - - inline index_t getTail() const - { - return tailIndex.load(std::memory_order_relaxed); - } - - protected: - std::atomic<index_t> tailIndex; // Where to enqueue to next - std::atomic<index_t> headIndex; // Where to dequeue from next - - std::atomic<index_t> dequeueOptimisticCount; - std::atomic<index_t> dequeueOvercommit; - - Block* tailBlock; - - public: - bool isExplicit; - ConcurrentQueue* parent; - - protected: -#ifdef MCDBGQ_TRACKMEM - friend struct MemStats; -#endif - }; - - - /////////////////////////// - // Explicit queue - /////////////////////////// - - struct ExplicitProducer : public ProducerBase { - explicit ExplicitProducer(ConcurrentQueue* parent_) - : ProducerBase(parent_, true), - blockIndex(nullptr), - pr_blockIndexSlotsUsed(0), - pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1), - pr_blockIndexFront(0), - pr_blockIndexEntries(nullptr), - pr_blockIndexRaw(nullptr) - { - size_t poolBasedIndexSize = - details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1; - if (poolBasedIndexSize > pr_blockIndexSize) { - pr_blockIndexSize = poolBasedIndexSize; - } - - new_block_index(0); // This creates an index with double the number of current - // entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE - } - - ~ExplicitProducer() override - { - // Destruct any elements not yet dequeued. - // Since we're in the destructor, we can assume all elements - // are either completely dequeued or completely not (no halfways). - if (this->tailBlock != - nullptr) { // Note this means there must be a block index too - // First find the block that's partially dequeued, if any - Block* halfDequeuedBlock = nullptr; - if ((this->headIndex.load(std::memory_order_relaxed) & - static_cast<index_t>(BLOCK_SIZE - 1)) != 0) { - // The head's not on a block boundary, meaning a block somewhere is - // partially dequeued (or the head block is the tail block and was - // fully dequeued, but the head/tail are still not on a boundary) - size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & - (pr_blockIndexSize - 1); - while (details::circular_less_than<index_t>( - pr_blockIndexEntries[i].base + BLOCK_SIZE, - this->headIndex.load(std::memory_order_relaxed))) { - i = (i + 1) & (pr_blockIndexSize - 1); - } - assert(details::circular_less_than<index_t>( - pr_blockIndexEntries[i].base, - this->headIndex.load(std::memory_order_relaxed))); - halfDequeuedBlock = pr_blockIndexEntries[i].block; - } - - // Start at the head block (note the first line in the loop gives us the - // head from the tail on the first iteration) - auto block = this->tailBlock; - do { - block = block->next; - if (block->ConcurrentQueue::Block::template is_empty< - explicit_context>()) { - continue; - } - - size_t i = 0; // Offset into block - if (block == halfDequeuedBlock) { - i = static_cast<size_t>( - this->headIndex.load(std::memory_order_relaxed) & - static_cast<index_t>(BLOCK_SIZE - 1)); - } - - // Walk through all the items in the block; if this is the tail block, - // we need to stop when we reach the tail index - auto lastValidIndex = - (this->tailIndex.load(std::memory_order_relaxed) & - static_cast<index_t>(BLOCK_SIZE - 1)) == 0 - ? BLOCK_SIZE - : static_cast<size_t>( - this->tailIndex.load(std::memory_order_relaxed) & - static_cast<index_t>(BLOCK_SIZE - 1)); - while (i != BLOCK_SIZE && - (block != this->tailBlock || i != lastValidIndex)) { - (*block)[i++]->~T(); - } - } while (block != this->tailBlock); - } - - // Destroy all blocks that we own - if (this->tailBlock != nullptr) { - auto block = this->tailBlock; - do { - auto nextBlock = block->next; - if (block->dynamicallyAllocated) { - destroy(block); - } - else { - this->parent->add_block_to_free_list(block); - } - block = nextBlock; - } while (block != this->tailBlock); - } - - // Destroy the block indices - auto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw); - while (header != nullptr) { - auto prev = static_cast<BlockIndexHeader*>(header->prev); - header->~BlockIndexHeader(); - (Traits::free)(header); - header = prev; - } - } - - template <AllocationMode allocMode, typename U> - inline bool enqueue(U&& element) - { - index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); - index_t newTailIndex = 1 + currentTailIndex; - if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) { - // We reached the end of a block, start a new one - auto startBlock = this->tailBlock; - auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; - if (this->tailBlock != nullptr && - this->tailBlock->next - ->ConcurrentQueue::Block::template is_empty<explicit_context>()) { - // We can re-use the block ahead of us, it's empty! - this->tailBlock = this->tailBlock->next; - this->tailBlock->ConcurrentQueue::Block::template reset_empty< - explicit_context>(); - - // We'll put the block on the block index (guaranteed to be room since - // we're conceptually removing the last block from it first -- except - // instead of removing then adding, we can just overwrite). Note that - // there must be a valid block index here, since even if allocation - // failed in the ctor, it would have been re-attempted when adding the - // first block to the queue; since there is such a block, a block - // index must have been successfully allocated. - } - else { - // Whatever head value we see here is >= the last value we saw here - // (relatively), and <= its current value. Since we have the most - // recent tail, the head must be - // <= to it. - auto head = this->headIndex.load(std::memory_order_relaxed); - assert(!details::circular_less_than<index_t>(currentTailIndex, head)); - if (!details::circular_less_than<index_t>( - head, currentTailIndex + BLOCK_SIZE) || - (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && - (MAX_SUBQUEUE_SIZE == 0 || - MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { - // We can't enqueue in another block because there's not enough - // leeway -- the tail could surpass the head by the time the block - // fills up! (Or we'll exceed the size limit, if the second part - // of the condition was true.) - return false; - } - // We're going to need a new block; check that the block index has - // room - if (pr_blockIndexRaw == nullptr || - pr_blockIndexSlotsUsed == pr_blockIndexSize) { - // Hmm, the circular block index is already full -- we'll need - // to allocate a new index. Note pr_blockIndexRaw can only be - // nullptr if the initial allocation failed in the constructor. - - MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) - { - return false; - } - else if (!new_block_index(pr_blockIndexSlotsUsed)) - { - return false; - } - } - - // Insert a new block in the circular linked list - auto newBlock = - this->parent - ->ConcurrentQueue::template requisition_block<allocMode>(); - if (newBlock == nullptr) { - return false; - } -#ifdef MCDBGQ_TRACKMEM - newBlock->owner = this; -#endif - newBlock->ConcurrentQueue::Block::template reset_empty< - explicit_context>(); - if (this->tailBlock == nullptr) { - newBlock->next = newBlock; - } - else { - newBlock->next = this->tailBlock->next; - this->tailBlock->next = newBlock; - } - this->tailBlock = newBlock; - ++pr_blockIndexSlotsUsed; - } - - MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR( - T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) - { - // The constructor may throw. We want the element not to appear in the - // queue in that case (without corrupting the queue): - MOODYCAMEL_TRY - { - new ((*this->tailBlock)[currentTailIndex]) - T(std::forward<U>(element)); - } - MOODYCAMEL_CATCH(...) - { - // Revert change to the current block, but leave the new block - // available for next time - pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; - this->tailBlock = - startBlock == nullptr ? this->tailBlock : startBlock; - MOODYCAMEL_RETHROW; - } - } - else - { - (void)startBlock; - (void)originalBlockIndexSlotsUsed; - } - - // Add block to block index - auto& entry = blockIndex.load(std::memory_order_relaxed) - ->entries[pr_blockIndexFront]; - entry.base = currentTailIndex; - entry.block = this->tailBlock; - blockIndex.load(std::memory_order_relaxed) - ->front.store(pr_blockIndexFront, std::memory_order_release); - pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); - - MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR( - T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) - { - this->tailIndex.store(newTailIndex, std::memory_order_release); - return true; - } - } - - // Enqueue - new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element)); - - this->tailIndex.store(newTailIndex, std::memory_order_release); - return true; - } - - template <typename U> - bool dequeue(U& element) - { - auto tail = this->tailIndex.load(std::memory_order_relaxed); - auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); - if (details::circular_less_than<index_t>( - this->dequeueOptimisticCount.load(std::memory_order_relaxed) - - overcommit, - tail)) { - // Might be something to dequeue, let's give it a try - - // Note that this if is purely for performance purposes in the common case - // when the queue is empty and the values are eventually consistent -- we - // may enter here spuriously. - - // Note that whatever the values of overcommit and tail are, they are not - // going to change (unless we change them) and must be the same value at - // this point (inside the if) as when the if condition was evaluated. - - // We insert an acquire fence here to synchronize-with the release upon - // incrementing dequeueOvercommit below. This ensures that whatever the - // value we got loaded into overcommit, the load of dequeueOptisticCount - // in the fetch_add below will result in a value at least as recent as - // that (and therefore at least as large). Note that I believe a compiler - // (signal) fence here would be sufficient due to the nature of fetch_add - // (all read-modify-write operations are guaranteed to work on the latest - // value in the modification order), but unfortunately that can't be shown - // to be correct using only the C++11 standard. See - // http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case - std::atomic_thread_fence(std::memory_order_acquire); - - // Increment optimistic counter, then check if it went over the boundary - auto myDequeueCount = - this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); - - // Note that since dequeueOvercommit must be <= dequeueOptimisticCount - // (because dequeueOvercommit is only ever incremented after - // dequeueOptimisticCount -- this is enforced in the `else` block below), - // and since we now have a version of dequeueOptimisticCount that is at - // least as recent as overcommit (due to the release upon incrementing - // dequeueOvercommit and the acquire above that synchronizes with it), - // overcommit <= myDequeueCount. However, we can't assert this since both - // dequeueOptimisticCount and dequeueOvercommit may (independently) - // overflow; in such a case, though, the logic still holds since the - // difference between the two is maintained. - - // Note that we reload tail here in case it changed; it will be the same - // value as before or greater, since this load is sequenced after (happens - // after) the earlier load above. This is supported by read-read coherency - // (as defined in the standard), explained here: - // http://en.cppreference.com/w/cpp/atomic/memory_order - tail = this->tailIndex.load(std::memory_order_acquire); - if ((details::likely)(details::circular_less_than<index_t>( - myDequeueCount - overcommit, tail))) { - // Guaranteed to be at least one element to dequeue! - - // Get the index. Note that since there's guaranteed to be at least - // one element, this will never exceed tail. We need to do an - // acquire-release fence here since it's possible that whatever - // condition got us to this point was for an earlier enqueued element - // (that we already see the memory effects for), but that by the time - // we increment somebody else has incremented it, and we need to see - // the memory effects for *that* element, which is in such a case is - // necessarily visible on the thread that incremented it in the first - // place with the more current condition (they must have acquired a - // tail that is at least as recent). - auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); - - - // Determine which block the element is in - - auto localBlockIndex = blockIndex.load(std::memory_order_acquire); - auto localBlockIndexHead = - localBlockIndex->front.load(std::memory_order_acquire); - - // We need to be careful here about subtracting and dividing because - // of index wrap-around. When an index wraps, we need to preserve the - // sign of the offset when dividing it by the block size (in order to - // get a correct signed block count offset in all cases): - auto headBase = localBlockIndex->entries[localBlockIndexHead].base; - auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1); - auto offset = static_cast<size_t>( - static_cast<typename std::make_signed<index_t>::type>( - blockBaseIndex - headBase) / - BLOCK_SIZE); - auto block = localBlockIndex - ->entries[(localBlockIndexHead + offset) & - (localBlockIndex->size - 1)] - .block; - - // Dequeue - auto& el = *((*block)[index]); - if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { - // Make sure the element is still fully dequeued and destroyed - // even if the assignment throws - struct Guard { - Block* block; - index_t index; - - ~Guard() - { - (*block)[index]->~T(); - block->ConcurrentQueue::Block::template set_empty< - explicit_context>(index); - } - } guard = { block, index }; - - element = std::move(el); // NOLINT - } - else { - element = std::move(el); // NOLINT - el.~T(); // NOLINT - block->ConcurrentQueue::Block::template set_empty< - explicit_context>(index); - } - - return true; - } - else { - // Wasn't anything to dequeue after all; make the effective dequeue - // count eventually consistent - this->dequeueOvercommit.fetch_add( - 1, std::memory_order_release); // Release so that the fetch_add on - // dequeueOptimisticCount is - // guaranteed to happen before this - // write - } - } - - return false; - } - - template <AllocationMode allocMode, typename It> - bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count) - { - // First, we need to make sure we have enough room to enqueue all of the - // elements; this means pre-allocating blocks and putting them in the block - // index (but only if all the allocations succeeded). - index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); - auto startBlock = this->tailBlock; - auto originalBlockIndexFront = pr_blockIndexFront; - auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; - - Block* firstAllocatedBlock = nullptr; - - // Figure out how many blocks we'll need to allocate, and do so - size_t blockBaseDiff = - ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)); - index_t currentTailIndex = - (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1); - if (blockBaseDiff > 0) { - // Allocate as many blocks as possible from ahead - while ( - blockBaseDiff > 0 && this->tailBlock != nullptr && - this->tailBlock->next != firstAllocatedBlock && - this->tailBlock->next - ->ConcurrentQueue::Block::template is_empty<explicit_context>()) { - blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE); - currentTailIndex += static_cast<index_t>(BLOCK_SIZE); - - this->tailBlock = this->tailBlock->next; - firstAllocatedBlock = firstAllocatedBlock == nullptr - ? this->tailBlock - : firstAllocatedBlock; - - auto& entry = blockIndex.load(std::memory_order_relaxed) - ->entries[pr_blockIndexFront]; - entry.base = currentTailIndex; - entry.block = this->tailBlock; - pr_blockIndexFront = - (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); - } - - // Now allocate as many blocks as necessary from the block pool - while (blockBaseDiff > 0) { - blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE); - currentTailIndex += static_cast<index_t>(BLOCK_SIZE); - - auto head = this->headIndex.load(std::memory_order_relaxed); - assert(!details::circular_less_than<index_t>(currentTailIndex, head)); - bool full = - !details::circular_less_than<index_t>( - head, currentTailIndex + BLOCK_SIZE) || - (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && - (MAX_SUBQUEUE_SIZE == 0 || - MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); - if (pr_blockIndexRaw == nullptr || - pr_blockIndexSlotsUsed == pr_blockIndexSize || full) { - MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) - { - // Failed to allocate, undo changes (but keep injected blocks) - pr_blockIndexFront = originalBlockIndexFront; - pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; - this->tailBlock = - startBlock == nullptr ? firstAllocatedBlock : startBlock; - return false; - } - else if (full || !new_block_index(originalBlockIndexSlotsUsed)) - { - // Failed to allocate, undo changes (but keep injected blocks) - pr_blockIndexFront = originalBlockIndexFront; - pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; - this->tailBlock = - startBlock == nullptr ? firstAllocatedBlock : startBlock; - return false; - } - - // pr_blockIndexFront is updated inside new_block_index, so we - // need to update our fallback value too (since we keep the new - // index even if we later fail) - originalBlockIndexFront = originalBlockIndexSlotsUsed; - } - - // Insert a new block in the circular linked list - auto newBlock = - this->parent - ->ConcurrentQueue::template requisition_block<allocMode>(); - if (newBlock == nullptr) { - pr_blockIndexFront = originalBlockIndexFront; - pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; - this->tailBlock = - startBlock == nullptr ? firstAllocatedBlock : startBlock; - return false; - } - -#ifdef MCDBGQ_TRACKMEM - newBlock->owner = this; -#endif - newBlock->ConcurrentQueue::Block::template set_all_empty< - explicit_context>(); - if (this->tailBlock == nullptr) { - newBlock->next = newBlock; - } - else { - newBlock->next = this->tailBlock->next; - this->tailBlock->next = newBlock; - } - this->tailBlock = newBlock; - firstAllocatedBlock = firstAllocatedBlock == nullptr - ? this->tailBlock - : firstAllocatedBlock; - - ++pr_blockIndexSlotsUsed; - - auto& entry = blockIndex.load(std::memory_order_relaxed) - ->entries[pr_blockIndexFront]; - entry.base = currentTailIndex; - entry.block = this->tailBlock; - pr_blockIndexFront = - (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); - } - - // Excellent, all allocations succeeded. Reset each block's emptiness - // before we fill them up, and publish the new block index front - auto block = firstAllocatedBlock; - while (true) { - block->ConcurrentQueue::Block::template reset_empty< - explicit_context>(); - if (block == this->tailBlock) { - break; - } - block = block->next; - } - - MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR( - T, - decltype(*itemFirst), - new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) - { - blockIndex.load(std::memory_order_relaxed) - ->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), - std::memory_order_release); - } - } - - // Enqueue, one block at a time - index_t newTailIndex = startTailIndex + static_cast<index_t>(count); - currentTailIndex = startTailIndex; - auto endBlock = this->tailBlock; - this->tailBlock = startBlock; - assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || - firstAllocatedBlock != nullptr || count == 0); - if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && - firstAllocatedBlock != nullptr) { - this->tailBlock = firstAllocatedBlock; - } - while (true) { - index_t stopIndex = - (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + - static_cast<index_t>(BLOCK_SIZE); - if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) { - stopIndex = newTailIndex; - } - MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR( - T, - decltype(*itemFirst), - new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) - { - while (currentTailIndex != stopIndex) { - new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); - } - } - else - { - MOODYCAMEL_TRY - { - while (currentTailIndex != stopIndex) { - // Must use copy constructor even if move constructor is - // available because we may have to revert if there's an - // exception. Sorry about the horrible templated next line, - // but it was the only way to disable moving *at compile - // time*, which is important because a type may only define a - // (noexcept) move constructor, and so calls to the cctor will - // not compile, even if they are in an if branch that will - // never be executed - new ((*this->tailBlock)[currentTailIndex]) - T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR( - T, - decltype(*itemFirst), - new (static_cast<T*>(nullptr)) - T(details::deref_noexcept( - itemFirst)))>::eval(*itemFirst)); - ++currentTailIndex; - ++itemFirst; - } - } - MOODYCAMEL_CATCH(...) - { - // Oh dear, an exception's been thrown -- destroy the elements - // that were enqueued so far and revert the entire bulk operation - // (we'll keep any allocated blocks in our linked list for later, - // though). - auto constructedStopIndex = currentTailIndex; - auto lastBlockEnqueued = this->tailBlock; - - pr_blockIndexFront = originalBlockIndexFront; - pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; - this->tailBlock = - startBlock == nullptr ? firstAllocatedBlock : startBlock; - - if (!details::is_trivially_destructible<T>::value) { - auto block = startBlock; - if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == - 0) { - block = firstAllocatedBlock; - } - currentTailIndex = startTailIndex; - while (true) { - stopIndex = (currentTailIndex & - ~static_cast<index_t>(BLOCK_SIZE - 1)) + - static_cast<index_t>(BLOCK_SIZE); - if (details::circular_less_than<index_t>( - constructedStopIndex, stopIndex)) { - stopIndex = constructedStopIndex; - } - while (currentTailIndex != stopIndex) { - (*block)[currentTailIndex++]->~T(); - } - if (block == lastBlockEnqueued) { - break; - } - block = block->next; - } - } - MOODYCAMEL_RETHROW; - } - } - - if (this->tailBlock == endBlock) { - assert(currentTailIndex == newTailIndex); - break; - } - this->tailBlock = this->tailBlock->next; - } - - MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR( - T, - decltype(*itemFirst), - new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) - { - if (firstAllocatedBlock != nullptr) - blockIndex.load(std::memory_order_relaxed) - ->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), - std::memory_order_release); - } - - this->tailIndex.store(newTailIndex, std::memory_order_release); - return true; - } - - template <typename It> - size_t dequeue_bulk(It& itemFirst, size_t max) - { - auto tail = this->tailIndex.load(std::memory_order_relaxed); - auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); - auto desiredCount = static_cast<size_t>( - tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - - overcommit)); - if (details::circular_less_than<size_t>(0, desiredCount)) { - desiredCount = desiredCount < max ? desiredCount : max; - std::atomic_thread_fence(std::memory_order_acquire); - - auto myDequeueCount = this->dequeueOptimisticCount.fetch_add( - desiredCount, std::memory_order_relaxed); - - tail = this->tailIndex.load(std::memory_order_acquire); - auto actualCount = - static_cast<size_t>(tail - (myDequeueCount - overcommit)); - if (details::circular_less_than<size_t>(0, actualCount)) { - actualCount = desiredCount < actualCount ? desiredCount : actualCount; - if (actualCount < desiredCount) { - this->dequeueOvercommit.fetch_add(desiredCount - actualCount, - std::memory_order_release); - } - - // Get the first index. Note that since there's guaranteed to be at - // least actualCount elements, this will never exceed tail. - auto firstIndex = - this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); - - // Determine which block the first element is in - auto localBlockIndex = blockIndex.load(std::memory_order_acquire); - auto localBlockIndexHead = - localBlockIndex->front.load(std::memory_order_acquire); - - auto headBase = localBlockIndex->entries[localBlockIndexHead].base; - auto firstBlockBaseIndex = - firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1); - auto offset = static_cast<size_t>( - static_cast<typename std::make_signed<index_t>::type>( - firstBlockBaseIndex - headBase) / - BLOCK_SIZE); - auto indexIndex = - (localBlockIndexHead + offset) & (localBlockIndex->size - 1); - - // Iterate the blocks and dequeue - auto index = firstIndex; - do { - auto firstIndexInBlock = index; - index_t endIndex = - (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + - static_cast<index_t>(BLOCK_SIZE); - endIndex = - details::circular_less_than<index_t>( - firstIndex + static_cast<index_t>(actualCount), endIndex) - ? firstIndex + static_cast<index_t>(actualCount) - : endIndex; - auto block = localBlockIndex->entries[indexIndex].block; - if (MOODYCAMEL_NOEXCEPT_ASSIGN( - T, - T&&, - details::deref_noexcept(itemFirst) = - std::move((*(*block)[index])))) { - while (index != endIndex) { - auto& el = *((*block)[index]); - *itemFirst++ = std::move(el); - el.~T(); - ++index; - } - } - else { - MOODYCAMEL_TRY - { - while (index != endIndex) { - auto& el = *((*block)[index]); - *itemFirst = std::move(el); - ++itemFirst; - el.~T(); - ++index; - } - } - MOODYCAMEL_CATCH(...) - { - // It's too late to revert the dequeue, but we can make - // sure that all the dequeued objects are properly - // destroyed and the block index (and empty count) are - // properly updated before we propagate the exception - do { - block = localBlockIndex->entries[indexIndex].block; - while (index != endIndex) { - (*block)[index++]->~T(); - } - block->ConcurrentQueue::Block:: - template set_many_empty<explicit_context>( - firstIndexInBlock, - static_cast<size_t>(endIndex - - firstIndexInBlock)); - indexIndex = - (indexIndex + 1) & (localBlockIndex->size - 1); - - firstIndexInBlock = index; - endIndex = - (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + - static_cast<index_t>(BLOCK_SIZE); - endIndex = details::circular_less_than<index_t>( - firstIndex + - static_cast<index_t>(actualCount), - endIndex) - ? firstIndex + - static_cast<index_t>(actualCount) - : endIndex; - } while (index != firstIndex + actualCount); - - MOODYCAMEL_RETHROW; - } - } - block->ConcurrentQueue::Block::template set_many_empty< - explicit_context>( - firstIndexInBlock, - static_cast<size_t>(endIndex - firstIndexInBlock)); - indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); - } while (index != firstIndex + actualCount); - - return actualCount; - } - else { - // Wasn't anything to dequeue after all; make the effective dequeue - // count eventually consistent - this->dequeueOvercommit.fetch_add(desiredCount, - std::memory_order_release); - } - } - - return 0; - } - - private: - struct BlockIndexEntry { - index_t base; - Block* block; - }; - - struct BlockIndexHeader { - size_t size; - std::atomic<size_t> front; // Current slot (not next, like pr_blockIndexFront) - BlockIndexEntry* entries; - void* prev; - }; - - - bool new_block_index(size_t numberOfFilledSlotsToExpose) - { - auto prevBlockSizeMask = pr_blockIndexSize - 1; - - // Create the new block - pr_blockIndexSize <<= 1; - auto newRawPtr = static_cast<char*>((Traits::malloc)( - sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + - sizeof(BlockIndexEntry) * pr_blockIndexSize)); - if (newRawPtr == nullptr) { - pr_blockIndexSize >>= 1; // Reset to allow graceful retry - return false; - } - - auto newBlockIndexEntries = - reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>( - newRawPtr + sizeof(BlockIndexHeader))); - - // Copy in all the old indices, if any - size_t j = 0; - if (pr_blockIndexSlotsUsed != 0) { - auto i = - (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask; - do { - newBlockIndexEntries[j++] = pr_blockIndexEntries[i]; - i = (i + 1) & prevBlockSizeMask; - } while (i != pr_blockIndexFront); - } - - // Update everything - auto header = new (newRawPtr) BlockIndexHeader; - header->size = pr_blockIndexSize; - header->front.store(numberOfFilledSlotsToExpose - 1, - std::memory_order_relaxed); - header->entries = newBlockIndexEntries; - header->prev = pr_blockIndexRaw; // we link the new block to the old one so we - // can free it later - - pr_blockIndexFront = j; - pr_blockIndexEntries = newBlockIndexEntries; - pr_blockIndexRaw = newRawPtr; - blockIndex.store(header, std::memory_order_release); - - return true; - } - - private: - std::atomic<BlockIndexHeader*> blockIndex; - - // To be used by producer only -- consumer must use the ones in referenced by - // blockIndex - size_t pr_blockIndexSlotsUsed; - size_t pr_blockIndexSize; - size_t pr_blockIndexFront; // Next slot (not current) - BlockIndexEntry* pr_blockIndexEntries; - void* pr_blockIndexRaw; - -#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG - public: - ExplicitProducer* nextExplicitProducer; - - private: -#endif - -#ifdef MCDBGQ_TRACKMEM - friend struct MemStats; -#endif - }; - - - ////////////////////////////////// - // Implicit queue - ////////////////////////////////// - - struct ImplicitProducer : public ProducerBase { - ImplicitProducer(ConcurrentQueue* parent_) - : ProducerBase(parent_, false), - nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE), - blockIndex(nullptr) - { - new_block_index(); - } - - ~ImplicitProducer() override - { - // Note that since we're in the destructor we can assume that all - // enqueue/dequeue operations completed already; this means that all - // undequeued elements are placed contiguously across contiguous blocks, and - // that only the first and last remaining blocks can be only partially empty - // (all other remaining blocks must be completely full). - -#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED - // Unregister ourselves for thread termination notification - if (!this->inactive.load(std::memory_order_relaxed)) { - details::ThreadExitNotifier::unsubscribe(&threadExitListener); - } -#endif - - // Destroy all remaining elements! - auto tail = this->tailIndex.load(std::memory_order_relaxed); - auto index = this->headIndex.load(std::memory_order_relaxed); - Block* block = nullptr; - assert(index == tail || details::circular_less_than(index, tail)); - bool forceFreeLastBlock = - index != tail; // If we enter the loop, then the last (tail) block will - // not be freed - while (index != tail) { - if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || - block == nullptr) { - if (block != nullptr) { - // Free the old block - this->parent->add_block_to_free_list(block); - } - - block = get_block_index_entry_for_index(index)->value.load( - std::memory_order_relaxed); - } - - ((*block)[index])->~T(); - ++index; - } - // Even if the queue is empty, there's still one block that's not on the free - // list (unless the head index reached the end of it, in which case the tail - // will be poised to create a new block). - if (this->tailBlock != nullptr && - (forceFreeLastBlock || - (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) { - this->parent->add_block_to_free_list(this->tailBlock); - } - - // Destroy block index - auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); - if (localBlockIndex != nullptr) { - for (size_t i = 0; i != localBlockIndex->capacity; ++i) { - localBlockIndex->index[i]->~BlockIndexEntry(); - } - do { - auto prev = localBlockIndex->prev; - localBlockIndex->~BlockIndexHeader(); - (Traits::free)(localBlockIndex); - localBlockIndex = prev; - } while (localBlockIndex != nullptr); - } - } - - template <AllocationMode allocMode, typename U> - inline bool enqueue(U&& element) - { - index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); - index_t newTailIndex = 1 + currentTailIndex; - if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) { - // We reached the end of a block, start a new one - auto head = this->headIndex.load(std::memory_order_relaxed); - assert(!details::circular_less_than<index_t>(currentTailIndex, head)); - if (!details::circular_less_than<index_t>( - head, currentTailIndex + BLOCK_SIZE) || - (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && - (MAX_SUBQUEUE_SIZE == 0 || - MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { - return false; - } -#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX - debug::DebugLock lock(mutex); -#endif - // Find out where we'll be inserting this block in the block index - BlockIndexEntry* idxEntry; - if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) { - return false; - } - - // Get ahold of a new block - auto newBlock = - this->parent - ->ConcurrentQueue::template requisition_block<allocMode>(); - if (newBlock == nullptr) { - rewind_block_index_tail(); - idxEntry->value.store(nullptr, std::memory_order_relaxed); - return false; - } -#ifdef MCDBGQ_TRACKMEM - newBlock->owner = this; -#endif - newBlock - ->ConcurrentQueue::Block::template reset_empty<implicit_context>(); - - MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR( - T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) - { - // May throw, try to insert now before we publish the fact that we - // have this new block - MOODYCAMEL_TRY - { - new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element)); - } - MOODYCAMEL_CATCH(...) - { - rewind_block_index_tail(); - idxEntry->value.store(nullptr, std::memory_order_relaxed); - this->parent->add_block_to_free_list(newBlock); - MOODYCAMEL_RETHROW; - } - } - - // Insert the new block into the index - idxEntry->value.store(newBlock, std::memory_order_relaxed); - - this->tailBlock = newBlock; - - MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR( - T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) - { - this->tailIndex.store(newTailIndex, std::memory_order_release); - return true; - } - } - - // Enqueue - new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element)); - - this->tailIndex.store(newTailIndex, std::memory_order_release); - return true; - } - - template <typename U> - bool dequeue(U& element) - { - // See ExplicitProducer::dequeue for rationale and explanation - index_t tail = this->tailIndex.load(std::memory_order_relaxed); - index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); - if (details::circular_less_than<index_t>( - this->dequeueOptimisticCount.load(std::memory_order_relaxed) - - overcommit, - tail)) { - std::atomic_thread_fence(std::memory_order_acquire); - - index_t myDequeueCount = - this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); - tail = this->tailIndex.load(std::memory_order_acquire); - if ((details::likely)(details::circular_less_than<index_t>( - myDequeueCount - overcommit, tail))) { - index_t index = - this->headIndex.fetch_add(1, std::memory_order_acq_rel); - - // Determine which block the element is in - auto entry = get_block_index_entry_for_index(index); - - // Dequeue - auto block = entry->value.load(std::memory_order_relaxed); - auto& el = *((*block)[index]); - - if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { -#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX - // Note: Acquiring the mutex with every dequeue instead of only - // when a block is released is very sub-optimal, but it is, after - // all, purely debug code. - debug::DebugLock lock(producer->mutex); -#endif - struct Guard { - Block* block; - index_t index; - BlockIndexEntry* entry; - ConcurrentQueue* parent; - - ~Guard() - { - (*block)[index]->~T(); - if (block->ConcurrentQueue::Block::template set_empty< - implicit_context>(index)) { - entry->value.store(nullptr, - std::memory_order_relaxed); - parent->add_block_to_free_list(block); - } - } - } guard = { block, index, entry, this->parent }; - - element = std::move(el); // NOLINT - } - else { - element = std::move(el); // NOLINT - el.~T(); // NOLINT - - if (block->ConcurrentQueue::Block::template set_empty< - implicit_context>(index)) { - { -#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX - debug::DebugLock lock(mutex); -#endif - // Add the block back into the global free pool (and - // remove from block index) - entry->value.store(nullptr, std::memory_order_relaxed); - } - this->parent->add_block_to_free_list( - block); // releases the above store - } - } - - return true; - } - else { - this->dequeueOvercommit.fetch_add(1, std::memory_order_release); - } - } - - return false; - } - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4706) // assignment within conditional expression -#endif - template <AllocationMode allocMode, typename It> - bool enqueue_bulk(It itemFirst, size_t count) - { - // First, we need to make sure we have enough room to enqueue all of the - // elements; this means pre-allocating blocks and putting them in the block - // index (but only if all the allocations succeeded). - - // Note that the tailBlock we start off with may not be owned by us any more; - // this happens if it was filled up exactly to the top (setting tailIndex to - // the first index of the next block which is not yet allocated), then - // dequeued completely (putting it on the free list) before we enqueue again. - - index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); - auto startBlock = this->tailBlock; - Block* firstAllocatedBlock = nullptr; - auto endBlock = this->tailBlock; - - // Figure out how many blocks we'll need to allocate, and do so - size_t blockBaseDiff = - ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)); - index_t currentTailIndex = - (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1); - if (blockBaseDiff > 0) { -#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX - debug::DebugLock lock(mutex); -#endif - do { - blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE); - currentTailIndex += static_cast<index_t>(BLOCK_SIZE); - - // Find out where we'll be inserting this block in the block index - BlockIndexEntry* idxEntry = - nullptr; // initialization here unnecessary but compiler can't - // always tell - Block* newBlock; - bool indexInserted = false; - auto head = this->headIndex.load(std::memory_order_relaxed); - assert(!details::circular_less_than<index_t>(currentTailIndex, head)); - bool full = - !details::circular_less_than<index_t>( - head, currentTailIndex + BLOCK_SIZE) || - (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && - (MAX_SUBQUEUE_SIZE == 0 || - MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); - - if (full || - !(indexInserted = insert_block_index_entry<allocMode>( - idxEntry, currentTailIndex)) || - (newBlock = - this->parent->ConcurrentQueue::template requisition_block< - allocMode>()) == nullptr) { - // Index allocation or block allocation failed; revert any other - // allocations and index insertions done so far for this operation - if (indexInserted) { - rewind_block_index_tail(); - idxEntry->value.store(nullptr, std::memory_order_relaxed); - } - currentTailIndex = - (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1); - for (auto block = firstAllocatedBlock; block != nullptr; - block = block->next) { - currentTailIndex += static_cast<index_t>(BLOCK_SIZE); - idxEntry = get_block_index_entry_for_index(currentTailIndex); - idxEntry->value.store(nullptr, std::memory_order_relaxed); - rewind_block_index_tail(); - } - this->parent->add_blocks_to_free_list(firstAllocatedBlock); - this->tailBlock = startBlock; - - return false; - } - -#ifdef MCDBGQ_TRACKMEM - newBlock->owner = this; -#endif - newBlock->ConcurrentQueue::Block::template reset_empty< - implicit_context>(); - newBlock->next = nullptr; - - // Insert the new block into the index - idxEntry->value.store(newBlock, std::memory_order_relaxed); - - // Store the chain of blocks so that we can undo if later allocations - // fail, and so that we can find the blocks when we do the actual - // enqueueing - if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || - firstAllocatedBlock != nullptr) { - assert(this->tailBlock != nullptr); - this->tailBlock->next = newBlock; - } - this->tailBlock = newBlock; - endBlock = newBlock; - firstAllocatedBlock = - firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock; - } while (blockBaseDiff > 0); - } - - // Enqueue, one block at a time - index_t newTailIndex = startTailIndex + static_cast<index_t>(count); - currentTailIndex = startTailIndex; - this->tailBlock = startBlock; - assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || - firstAllocatedBlock != nullptr || count == 0); - if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && - firstAllocatedBlock != nullptr) { - this->tailBlock = firstAllocatedBlock; - } - while (true) { - index_t stopIndex = - (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + - static_cast<index_t>(BLOCK_SIZE); - if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) { - stopIndex = newTailIndex; - } - MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR( - T, - decltype(*itemFirst), - new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) - { - while (currentTailIndex != stopIndex) { - new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); - } - } - else - { - MOODYCAMEL_TRY - { - while (currentTailIndex != stopIndex) { - new ((*this->tailBlock)[currentTailIndex]) - T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR( - T, - decltype(*itemFirst), - new (static_cast<T*>(nullptr)) - T(details::deref_noexcept( - itemFirst)))>::eval(*itemFirst)); - ++currentTailIndex; - ++itemFirst; - } - } - MOODYCAMEL_CATCH(...) - { - auto constructedStopIndex = currentTailIndex; - auto lastBlockEnqueued = this->tailBlock; - - if (!details::is_trivially_destructible<T>::value) { - auto block = startBlock; - if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == - 0) { - block = firstAllocatedBlock; - } - currentTailIndex = startTailIndex; - while (true) { - stopIndex = (currentTailIndex & - ~static_cast<index_t>(BLOCK_SIZE - 1)) + - static_cast<index_t>(BLOCK_SIZE); - if (details::circular_less_than<index_t>( - constructedStopIndex, stopIndex)) { - stopIndex = constructedStopIndex; - } - while (currentTailIndex != stopIndex) { - (*block)[currentTailIndex++]->~T(); - } - if (block == lastBlockEnqueued) { - break; - } - block = block->next; - } - } - - currentTailIndex = - (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1); - for (auto block = firstAllocatedBlock; block != nullptr; - block = block->next) { - currentTailIndex += static_cast<index_t>(BLOCK_SIZE); - auto idxEntry = - get_block_index_entry_for_index(currentTailIndex); - idxEntry->value.store(nullptr, std::memory_order_relaxed); - rewind_block_index_tail(); - } - this->parent->add_blocks_to_free_list(firstAllocatedBlock); - this->tailBlock = startBlock; - MOODYCAMEL_RETHROW; - } - } - - if (this->tailBlock == endBlock) { - assert(currentTailIndex == newTailIndex); - break; - } - this->tailBlock = this->tailBlock->next; - } - this->tailIndex.store(newTailIndex, std::memory_order_release); - return true; - } -#ifdef _MSC_VER -#pragma warning(pop) -#endif - - template <typename It> - size_t dequeue_bulk(It& itemFirst, size_t max) - { - auto tail = this->tailIndex.load(std::memory_order_relaxed); - auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); - auto desiredCount = static_cast<size_t>( - tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - - overcommit)); - if (details::circular_less_than<size_t>(0, desiredCount)) { - desiredCount = desiredCount < max ? desiredCount : max; - std::atomic_thread_fence(std::memory_order_acquire); - - auto myDequeueCount = this->dequeueOptimisticCount.fetch_add( - desiredCount, std::memory_order_relaxed); - - tail = this->tailIndex.load(std::memory_order_acquire); - auto actualCount = - static_cast<size_t>(tail - (myDequeueCount - overcommit)); - if (details::circular_less_than<size_t>(0, actualCount)) { - actualCount = desiredCount < actualCount ? desiredCount : actualCount; - if (actualCount < desiredCount) { - this->dequeueOvercommit.fetch_add(desiredCount - actualCount, - std::memory_order_release); - } - - // Get the first index. Note that since there's guaranteed to be at - // least actualCount elements, this will never exceed tail. - auto firstIndex = - this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); - - // Iterate the blocks and dequeue - auto index = firstIndex; - BlockIndexHeader* localBlockIndex; - auto indexIndex = - get_block_index_index_for_index(index, localBlockIndex); - do { - auto blockStartIndex = index; - index_t endIndex = - (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + - static_cast<index_t>(BLOCK_SIZE); - endIndex = - details::circular_less_than<index_t>( - firstIndex + static_cast<index_t>(actualCount), endIndex) - ? firstIndex + static_cast<index_t>(actualCount) - : endIndex; - - auto entry = localBlockIndex->index[indexIndex]; - auto block = entry->value.load(std::memory_order_relaxed); - if (MOODYCAMEL_NOEXCEPT_ASSIGN( - T, - T&&, - details::deref_noexcept(itemFirst) = - std::move((*(*block)[index])))) { - while (index != endIndex) { - auto& el = *((*block)[index]); - *itemFirst++ = std::move(el); - el.~T(); - ++index; - } - } - else { - MOODYCAMEL_TRY - { - while (index != endIndex) { - auto& el = *((*block)[index]); - *itemFirst = std::move(el); - ++itemFirst; - el.~T(); - ++index; - } - } - MOODYCAMEL_CATCH(...) - { - do { - entry = localBlockIndex->index[indexIndex]; - block = entry->value.load(std::memory_order_relaxed); - while (index != endIndex) { - (*block)[index++]->~T(); - } - - if (block->ConcurrentQueue::Block:: - template set_many_empty<implicit_context>( - blockStartIndex, - static_cast<size_t>(endIndex - - blockStartIndex))) { -#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX - debug::DebugLock lock(mutex); -#endif - entry->value.store(nullptr, - std::memory_order_relaxed); - this->parent->add_block_to_free_list(block); - } - indexIndex = (indexIndex + 1) & - (localBlockIndex->capacity - 1); - - blockStartIndex = index; - endIndex = - (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + - static_cast<index_t>(BLOCK_SIZE); - endIndex = details::circular_less_than<index_t>( - firstIndex + - static_cast<index_t>(actualCount), - endIndex) - ? firstIndex + - static_cast<index_t>(actualCount) - : endIndex; - } while (index != firstIndex + actualCount); - - MOODYCAMEL_RETHROW; - } - } - if (block->ConcurrentQueue::Block::template set_many_empty< - implicit_context>( - blockStartIndex, - static_cast<size_t>(endIndex - blockStartIndex))) { - { -#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX - debug::DebugLock lock(mutex); -#endif - // Note that the set_many_empty above did a release, - // meaning that anybody who acquires the block we're about - // to free can use it safely since our writes (and reads!) - // will have happened-before then. - entry->value.store(nullptr, std::memory_order_relaxed); - } - this->parent->add_block_to_free_list( - block); // releases the above store - } - indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); - } while (index != firstIndex + actualCount); - - return actualCount; - } - else { - this->dequeueOvercommit.fetch_add(desiredCount, - std::memory_order_release); - } - } - - return 0; - } - - private: - // The block size must be > 1, so any number with the low bit set is an invalid - // block base index - static const index_t INVALID_BLOCK_BASE = 1; - - struct BlockIndexEntry { - std::atomic<index_t> key; - std::atomic<Block*> value; - }; - - struct BlockIndexHeader { - size_t capacity; - std::atomic<size_t> tail; - BlockIndexEntry* entries; - BlockIndexEntry** index; - BlockIndexHeader* prev; - }; - - template <AllocationMode allocMode> - inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, - index_t blockStartIndex) - { - auto localBlockIndex = blockIndex.load( - std::memory_order_relaxed); // We're the only writer thread, relaxed is OK - if (localBlockIndex == nullptr) { - return false; // this can happen if new_block_index failed in the - // constructor - } - size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & - (localBlockIndex->capacity - 1); - idxEntry = localBlockIndex->index[newTail]; - if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE || - idxEntry->value.load(std::memory_order_relaxed) == nullptr) { - - idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); - localBlockIndex->tail.store(newTail, std::memory_order_release); - return true; - } - - // No room in the old block index, try to allocate another one! - MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) { return false; } - else if (!new_block_index()) { return false; } - localBlockIndex = blockIndex.load(std::memory_order_relaxed); - newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & - (localBlockIndex->capacity - 1); - idxEntry = localBlockIndex->index[newTail]; - assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE); - idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); - localBlockIndex->tail.store(newTail, std::memory_order_release); - return true; - } - - inline void rewind_block_index_tail() - { - auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); - localBlockIndex->tail.store( - (localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & - (localBlockIndex->capacity - 1), - std::memory_order_relaxed); - } - - inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const - { - BlockIndexHeader* localBlockIndex; - auto idx = get_block_index_index_for_index(index, localBlockIndex); - return localBlockIndex->index[idx]; - } - - inline size_t - get_block_index_index_for_index(index_t index, - BlockIndexHeader*& localBlockIndex) const - { -#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX - debug::DebugLock lock(mutex); -#endif - index &= ~static_cast<index_t>(BLOCK_SIZE - 1); - localBlockIndex = blockIndex.load(std::memory_order_acquire); - auto tail = localBlockIndex->tail.load(std::memory_order_acquire); - auto tailBase = - localBlockIndex->index[tail]->key.load(std::memory_order_relaxed); - assert(tailBase != INVALID_BLOCK_BASE); - // Note: Must use division instead of shift because the index may wrap around, - // causing a negative offset, whose negativity we want to preserve - auto offset = static_cast<size_t>( - static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / - BLOCK_SIZE); - size_t idx = (tail + offset) & (localBlockIndex->capacity - 1); - assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == - index && - localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != - nullptr); - return idx; - } - - bool new_block_index() - { - auto prev = blockIndex.load(std::memory_order_relaxed); - size_t prevCapacity = prev == nullptr ? 0 : prev->capacity; - auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity; - auto raw = static_cast<char*>((Traits::malloc)( - sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + - sizeof(BlockIndexEntry) * entryCount + - std::alignment_of<BlockIndexEntry*>::value - 1 + - sizeof(BlockIndexEntry*) * nextBlockIndexCapacity)); - if (raw == nullptr) { - return false; - } - - auto header = new (raw) BlockIndexHeader; - auto entries = reinterpret_cast<BlockIndexEntry*>( - details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader))); - auto index = - reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>( - reinterpret_cast<char*>(entries) + - sizeof(BlockIndexEntry) * entryCount)); - if (prev != nullptr) { - auto prevTail = prev->tail.load(std::memory_order_relaxed); - auto prevPos = prevTail; - size_t i = 0; - do { - prevPos = (prevPos + 1) & (prev->capacity - 1); - index[i++] = prev->index[prevPos]; - } while (prevPos != prevTail); - assert(i == prevCapacity); - } - for (size_t i = 0; i != entryCount; ++i) { - new (entries + i) BlockIndexEntry; - entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed); - index[prevCapacity + i] = entries + i; - } - header->prev = prev; - header->entries = entries; - header->index = index; - header->capacity = nextBlockIndexCapacity; - header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), - std::memory_order_relaxed); - - blockIndex.store(header, std::memory_order_release); - - nextBlockIndexCapacity <<= 1; - - return true; - } - - private: - size_t nextBlockIndexCapacity; - std::atomic<BlockIndexHeader*> blockIndex; - -#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED - public: - details::ThreadExitListener threadExitListener; - - private: -#endif - -#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG - public: - ImplicitProducer* nextImplicitProducer; - - private: -#endif - -#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX - mutable debug::DebugMutex mutex; -#endif -#ifdef MCDBGQ_TRACKMEM - friend struct MemStats; -#endif - }; - - - ////////////////////////////////// - // Block pool manipulation - ////////////////////////////////// - - void populate_initial_block_list(size_t blockCount) - { - initialBlockPoolSize = blockCount; - if (initialBlockPoolSize == 0) { - initialBlockPool = nullptr; - return; - } - - initialBlockPool = create_array<Block>(blockCount); - if (initialBlockPool == nullptr) { - initialBlockPoolSize = 0; - } - for (size_t i = 0; i < initialBlockPoolSize; ++i) { - initialBlockPool[i].dynamicallyAllocated = false; - } - } - - inline Block* try_get_block_from_initial_pool() - { - if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= - initialBlockPoolSize) { - return nullptr; - } - - auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed); - - return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr; - } - - inline void add_block_to_free_list(Block* block) - { -#ifdef MCDBGQ_TRACKMEM - block->owner = nullptr; -#endif - freeList.add(block); - } - - inline void add_blocks_to_free_list(Block* block) - { - while (block != nullptr) { - auto next = block->next; - add_block_to_free_list(block); - block = next; - } - } - - inline Block* try_get_block_from_free_list() { return freeList.try_get(); } - - // Gets a free block from one of the memory pools, or allocates a new one (if - // applicable) - template <AllocationMode canAlloc> - Block* requisition_block() - { - auto block = try_get_block_from_initial_pool(); - if (block != nullptr) { - return block; - } - - block = try_get_block_from_free_list(); - if (block != nullptr) { - return block; - } - - MOODYCAMEL_CONSTEXPR_IF(canAlloc == CanAlloc) { return create<Block>(); } - else { return nullptr; } - } - - -#ifdef MCDBGQ_TRACKMEM -public: - struct MemStats { - size_t allocatedBlocks; - size_t usedBlocks; - size_t freeBlocks; - size_t ownedBlocksExplicit; - size_t ownedBlocksImplicit; - size_t implicitProducers; - size_t explicitProducers; - size_t elementsEnqueued; - size_t blockClassBytes; - size_t queueClassBytes; - size_t implicitBlockIndexBytes; - size_t explicitBlockIndexBytes; - - friend class ConcurrentQueue; - - private: - static MemStats getFor(ConcurrentQueue* q) - { - MemStats stats = { 0 }; - - stats.elementsEnqueued = q->size_approx(); - - auto block = q->freeList.head_unsafe(); - while (block != nullptr) { - ++stats.allocatedBlocks; - ++stats.freeBlocks; - block = block->freeListNext.load(std::memory_order_relaxed); - } - - for (auto ptr = q->producerListTail.load(std::memory_order_acquire); - ptr != nullptr; - ptr = ptr->next_prod()) { - bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr; - stats.implicitProducers += implicit ? 1 : 0; - stats.explicitProducers += implicit ? 0 : 1; - - if (implicit) { - auto prod = static_cast<ImplicitProducer*>(ptr); - stats.queueClassBytes += sizeof(ImplicitProducer); - auto head = prod->headIndex.load(std::memory_order_relaxed); - auto tail = prod->tailIndex.load(std::memory_order_relaxed); - auto hash = prod->blockIndex.load(std::memory_order_relaxed); - if (hash != nullptr) { - for (size_t i = 0; i != hash->capacity; ++i) { - if (hash->index[i]->key.load(std::memory_order_relaxed) != - ImplicitProducer::INVALID_BLOCK_BASE && - hash->index[i]->value.load(std::memory_order_relaxed) != - nullptr) { - ++stats.allocatedBlocks; - ++stats.ownedBlocksImplicit; - } - } - stats.implicitBlockIndexBytes += - hash->capacity * - sizeof(typename ImplicitProducer::BlockIndexEntry); - for (; hash != nullptr; hash = hash->prev) { - stats.implicitBlockIndexBytes += - sizeof(typename ImplicitProducer::BlockIndexHeader) + - hash->capacity * - sizeof(typename ImplicitProducer::BlockIndexEntry*); - } - } - for (; details::circular_less_than<index_t>(head, tail); - head += BLOCK_SIZE) { - // auto block = prod->get_block_index_entry_for_index(head); - ++stats.usedBlocks; - } - } - else { - auto prod = static_cast<ExplicitProducer*>(ptr); - stats.queueClassBytes += sizeof(ExplicitProducer); - auto tailBlock = prod->tailBlock; - bool wasNonEmpty = false; - if (tailBlock != nullptr) { - auto block = tailBlock; - do { - ++stats.allocatedBlocks; - if (!block->ConcurrentQueue::Block::template is_empty< - explicit_context>() || - wasNonEmpty) { - ++stats.usedBlocks; - wasNonEmpty = wasNonEmpty || block != tailBlock; - } - ++stats.ownedBlocksExplicit; - block = block->next; - } while (block != tailBlock); - } - auto index = prod->blockIndex.load(std::memory_order_relaxed); - while (index != nullptr) { - stats.explicitBlockIndexBytes += - sizeof(typename ExplicitProducer::BlockIndexHeader) + - index->size * - sizeof(typename ExplicitProducer::BlockIndexEntry); - index = static_cast<typename ExplicitProducer::BlockIndexHeader*>( - index->prev); - } - } - } - - auto freeOnInitialPool = - q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= - q->initialBlockPoolSize - ? 0 - : q->initialBlockPoolSize - - q->initialBlockPoolIndex.load(std::memory_order_relaxed); - stats.allocatedBlocks += freeOnInitialPool; - stats.freeBlocks += freeOnInitialPool; - - stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks; - stats.queueClassBytes += sizeof(ConcurrentQueue); - - return stats; - } - }; - - // For debugging only. Not thread-safe. - MemStats getMemStats() { return MemStats::getFor(this); } - -private: - friend struct MemStats; -#endif - - - ////////////////////////////////// - // Producer list manipulation - ////////////////////////////////// - - ProducerBase* recycle_or_create_producer(bool isExplicit) - { - bool recycled; - return recycle_or_create_producer(isExplicit, recycled); - } - - ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled) - { -#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH - debug::DebugLock lock(implicitProdMutex); -#endif - // Try to re-use one first - for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; - ptr = ptr->next_prod()) { - if (ptr->inactive.load(std::memory_order_relaxed) && - ptr->isExplicit == isExplicit) { - bool expected = true; - if (ptr->inactive.compare_exchange_strong(expected, - /* desired */ false, - std::memory_order_acquire, - std::memory_order_relaxed)) { - // We caught one! It's been marked as activated, the caller can have - // it - recycled = true; - return ptr; - } - } - } - - recycled = false; - return add_producer( - isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) - : create<ImplicitProducer>(this)); - } - - ProducerBase* add_producer(ProducerBase* producer) - { - // Handle failed memory allocation - if (producer == nullptr) { - return nullptr; - } - - producerCount.fetch_add(1, std::memory_order_relaxed); - - // Add it to the lock-free list - auto prevTail = producerListTail.load(std::memory_order_relaxed); - do { - producer->next = prevTail; - } while (!producerListTail.compare_exchange_weak( - prevTail, producer, std::memory_order_release, std::memory_order_relaxed)); - -#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG - if (producer->isExplicit) { - auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed); - do { - static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = - prevTailExplicit; - } while (!explicitProducers.compare_exchange_weak( - prevTailExplicit, - static_cast<ExplicitProducer*>(producer), - std::memory_order_release, - std::memory_order_relaxed)); - } - else { - auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed); - do { - static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = - prevTailImplicit; - } while (!implicitProducers.compare_exchange_weak( - prevTailImplicit, - static_cast<ImplicitProducer*>(producer), - std::memory_order_release, - std::memory_order_relaxed)); - } -#endif - - return producer; - } - - void reown_producers() - { - // After another instance is moved-into/swapped-with this one, all the - // producers we stole still think their parents are the other queue. - // So fix them up! - for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; - ptr = ptr->next_prod()) { - ptr->parent = this; - } - } - - - ////////////////////////////////// - // Implicit producer hash - ////////////////////////////////// - - struct ImplicitProducerKVP { - std::atomic<details::thread_id_t> key; - ImplicitProducer* value; // No need for atomicity since it's only read by the - // thread that sets it in the first place - - ImplicitProducerKVP() : value(nullptr) {} - - ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT - { - key.store(other.key.load(std::memory_order_relaxed), - std::memory_order_relaxed); - value = other.value; - } - - inline ImplicitProducerKVP& - operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT - { - swap(other); - return *this; - } - - inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT - { - if (this != &other) { - details::swap_relaxed(key, other.key); - std::swap(value, other.value); - } - } - }; - - template <typename XT, typename XTraits> - friend void moodycamel::swap( - typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&, - typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT; - - struct ImplicitProducerHash { - size_t capacity; - ImplicitProducerKVP* entries; - ImplicitProducerHash* prev; - }; - - inline void populate_initial_implicit_producer_hash() - { - MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { return; } - else - { - implicitProducerHashCount.store(0, std::memory_order_relaxed); - auto hash = &initialImplicitProducerHash; - hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; - hash->entries = &initialImplicitProducerHashEntries[0]; - for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) { - initialImplicitProducerHashEntries[i].key.store( - details::invalid_thread_id, std::memory_order_relaxed); - } - hash->prev = nullptr; - implicitProducerHash.store(hash, std::memory_order_relaxed); - } - } - - void swap_implicit_producer_hashes(ConcurrentQueue& other) - { - MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { return; } - else - { - // Swap (assumes our implicit producer hash is initialized) - initialImplicitProducerHashEntries.swap( - other.initialImplicitProducerHashEntries); - initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0]; - other.initialImplicitProducerHash.entries = - &other.initialImplicitProducerHashEntries[0]; - - details::swap_relaxed(implicitProducerHashCount, - other.implicitProducerHashCount); - - details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); - if (implicitProducerHash.load(std::memory_order_relaxed) == - &other.initialImplicitProducerHash) { - implicitProducerHash.store(&initialImplicitProducerHash, - std::memory_order_relaxed); - } - else { - ImplicitProducerHash* hash; - for (hash = implicitProducerHash.load(std::memory_order_relaxed); - hash->prev != &other.initialImplicitProducerHash; - hash = hash->prev) { - continue; - } - hash->prev = &initialImplicitProducerHash; - } - if (other.implicitProducerHash.load(std::memory_order_relaxed) == - &initialImplicitProducerHash) { - other.implicitProducerHash.store(&other.initialImplicitProducerHash, - std::memory_order_relaxed); - } - else { - ImplicitProducerHash* hash; - for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); - hash->prev != &initialImplicitProducerHash; - hash = hash->prev) { - continue; - } - hash->prev = &other.initialImplicitProducerHash; - } - } - } - - // Only fails (returns nullptr) if memory allocation fails - ImplicitProducer* get_or_add_implicit_producer() - { - // Note that since the data is essentially thread-local (key is thread ID), - // there's a reduced need for fences (memory ordering is already consistent - // for any individual thread), except for the current table itself. - - // Start by looking for the thread ID in the current and all previous hash tables. - // If it's not found, it must not be in there yet, since this same thread would - // have added it previously to one of the tables that we traversed. - - // Code and algorithm adapted from - // http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table - -#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH - debug::DebugLock lock(implicitProdMutex); -#endif - - auto id = details::thread_id(); - auto hashedId = details::hash_thread_id(id); - - auto mainHash = implicitProducerHash.load(std::memory_order_acquire); - assert(mainHash != - nullptr); // silence clang-tidy and MSVC warnings (hash cannot be null) - for (auto hash = mainHash; hash != nullptr; hash = hash->prev) { - // Look for the id in this hash - auto index = hashedId; - while (true) { // Not an infinite loop because at least one slot is free in - // the hash table - index &= hash->capacity - 1; - - auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed); - if (probedKey == id) { - // Found it! If we had to search several hashes deep, though, we - // should lazily add it to the current main hash table to avoid the - // extended search next time. Note there's guaranteed to be room in - // the current hash table since every subsequent table implicitly - // reserves space for all previous tables (there's only one - // implicitProducerHashCount). - auto value = hash->entries[index].value; - if (hash != mainHash) { - index = hashedId; - while (true) { - index &= mainHash->capacity - 1; - probedKey = mainHash->entries[index].key.load( - std::memory_order_relaxed); - auto empty = details::invalid_thread_id; -#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED - auto reusable = details::invalid_thread_id2; - if ((probedKey == empty && - mainHash->entries[index].key.compare_exchange_strong( - empty, - id, - std::memory_order_relaxed, - std::memory_order_relaxed)) || - (probedKey == reusable && - mainHash->entries[index].key.compare_exchange_strong( - reusable, - id, - std::memory_order_acquire, - std::memory_order_acquire))) { -#else - if ((probedKey == empty && - mainHash->entries[index].key.compare_exchange_strong( - empty, - id, - std::memory_order_relaxed, - std::memory_order_relaxed))) { -#endif - mainHash->entries[index].value = value; - break; - } - ++index; - } - } - - return value; - } - if (probedKey == details::invalid_thread_id) { - break; // Not in this hash table - } - ++index; - } - } - - // Insert! - auto newCount = - 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed); - while (true) { - // NOLINTNEXTLINE(clang-analyzer-core.NullDereference) - if (newCount >= (mainHash->capacity >> 1) && - !implicitProducerHashResizeInProgress.test_and_set( - std::memory_order_acquire)) { - // We've acquired the resize lock, try to allocate a bigger hash table. - // Note the acquire fence synchronizes with the release fence at the end - // of this block, and hence when we reload implicitProducerHash it must be - // the most recent version (it only gets changed within this locked - // block). - mainHash = implicitProducerHash.load(std::memory_order_acquire); - if (newCount >= (mainHash->capacity >> 1)) { - auto newCapacity = mainHash->capacity << 1; - while (newCount >= (newCapacity >> 1)) { - newCapacity <<= 1; - } - auto raw = static_cast<char*>( - (Traits::malloc)(sizeof(ImplicitProducerHash) + - std::alignment_of<ImplicitProducerKVP>::value - - 1 + sizeof(ImplicitProducerKVP) * newCapacity)); - if (raw == nullptr) { - // Allocation failed - implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); - implicitProducerHashResizeInProgress.clear( - std::memory_order_relaxed); - return nullptr; - } - - auto newHash = new (raw) ImplicitProducerHash; - newHash->capacity = static_cast<size_t>(newCapacity); - newHash->entries = reinterpret_cast<ImplicitProducerKVP*>( - details::align_for<ImplicitProducerKVP>( - raw + sizeof(ImplicitProducerHash))); - for (size_t i = 0; i != newCapacity; ++i) { - new (newHash->entries + i) ImplicitProducerKVP; - newHash->entries[i].key.store(details::invalid_thread_id, - std::memory_order_relaxed); - } - newHash->prev = mainHash; - implicitProducerHash.store(newHash, std::memory_order_release); - implicitProducerHashResizeInProgress.clear(std::memory_order_release); - mainHash = newHash; - } - else { - implicitProducerHashResizeInProgress.clear(std::memory_order_release); - } - } - - // If it's < three-quarters full, add to the old one anyway so that we don't - // have to wait for the next table to finish being allocated by another thread - // (and if we just finished allocating above, the condition will always be - // true) - if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) { - bool recycled; - auto producer = static_cast<ImplicitProducer*>( - recycle_or_create_producer(false, recycled)); - if (producer == nullptr) { - implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); - return nullptr; - } - if (recycled) { - implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); - } - -#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED - producer->threadExitListener.callback = - &ConcurrentQueue::implicit_producer_thread_exited_callback; - producer->threadExitListener.userData = producer; - details::ThreadExitNotifier::subscribe(&producer->threadExitListener); -#endif - - auto index = hashedId; - while (true) { - index &= mainHash->capacity - 1; - auto probedKey = - mainHash->entries[index].key.load(std::memory_order_relaxed); - - auto empty = details::invalid_thread_id; -#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED - auto reusable = details::invalid_thread_id2; - if ((probedKey == empty && - mainHash->entries[index].key.compare_exchange_strong( - empty, - id, - std::memory_order_relaxed, - std::memory_order_relaxed)) || - (probedKey == reusable && - mainHash->entries[index].key.compare_exchange_strong( - reusable, - id, - std::memory_order_acquire, - std::memory_order_acquire))) { -#else - if ((probedKey == empty && - mainHash->entries[index].key.compare_exchange_strong( - empty, - id, - std::memory_order_relaxed, - std::memory_order_relaxed))) { -#endif - mainHash->entries[index].value = producer; - break; - } - ++index; - } - return producer; - } - - // Hmm, the old hash is quite full and somebody else is busy allocating a new - // one. We need to wait for the allocating thread to finish (if it succeeds, - // we add, if not, we try to allocate ourselves). - mainHash = implicitProducerHash.load(std::memory_order_acquire); - } - } // namespace moodycamel - -#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED - void implicit_producer_thread_exited(ImplicitProducer* producer) - { - // Remove from thread exit listeners - details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener); - - // Remove from hash -#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH - debug::DebugLock lock(implicitProdMutex); -#endif - auto hash = implicitProducerHash.load(std::memory_order_acquire); - assert(hash != nullptr); // The thread exit listener is only registered if we were - // added to a hash in the first place - auto id = details::thread_id(); - auto hashedId = details::hash_thread_id(id); - details::thread_id_t probedKey; - - // We need to traverse all the hashes just in case other threads aren't on the - // current one yet and are trying to add an entry thinking there's a free slot - // (because they reused a producer) - for (; hash != nullptr; hash = hash->prev) { - auto index = hashedId; - do { - index &= hash->capacity - 1; - probedKey = hash->entries[index].key.load(std::memory_order_relaxed); - if (probedKey == id) { - hash->entries[index].key.store(details::invalid_thread_id2, - std::memory_order_release); - break; - } - ++index; - } while (probedKey != - details::invalid_thread_id); // Can happen if the hash has changed - // but we weren't put back in it yet, or - // if we weren't added to this hash in - // the first place - } - - // Mark the queue as being recyclable - producer->inactive.store(true, std::memory_order_release); - } - - static void implicit_producer_thread_exited_callback(void* userData) - { - auto producer = static_cast<ImplicitProducer*>(userData); - auto queue = producer->parent; - queue->implicit_producer_thread_exited(producer); - } -#endif - - ////////////////////////////////// - // Utility functions - ////////////////////////////////// - - template <typename TAlign> - static inline void* aligned_malloc(size_t size) - { - MOODYCAMEL_CONSTEXPR_IF(std::alignment_of<TAlign>::value <= - std::alignment_of<details::max_align_t>::value) - return (Traits::malloc)(size); - else - { - size_t alignment = std::alignment_of<TAlign>::value; - void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*)); - if (!raw) - return nullptr; - char* ptr = - details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*)); - *(reinterpret_cast<void**>(ptr) - 1) = raw; - return ptr; - } - } - - template <typename TAlign> - static inline void aligned_free(void* ptr) - { - MOODYCAMEL_CONSTEXPR_IF(std::alignment_of<TAlign>::value <= - std::alignment_of<details::max_align_t>::value) - return (Traits::free)(ptr); - else(Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 1) : nullptr); - } - - template <typename U> - static inline U* create_array(size_t count) - { - assert(count > 0); - U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count)); - if (p == nullptr) - return nullptr; - - for (size_t i = 0; i != count; ++i) - new (p + i) U(); - return p; - } - - template <typename U> - static inline void destroy_array(U* p, size_t count) - { - if (p != nullptr) { - assert(count > 0); - for (size_t i = count; i != 0;) - (p + --i)->~U(); - } - aligned_free<U>(p); - } - - template <typename U> - static inline U* create() - { - void* p = aligned_malloc<U>(sizeof(U)); - return p != nullptr ? new (p) U : nullptr; - } - - template <typename U, typename A1> - static inline U* create(A1&& a1) - { - void* p = aligned_malloc<U>(sizeof(U)); - return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr; - } - - template <typename U> - static inline void destroy(U* p) - { - if (p != nullptr) - p->~U(); - aligned_free<U>(p); - } - -private: - std::atomic<ProducerBase*> producerListTail; - std::atomic<std::uint32_t> producerCount; - - std::atomic<size_t> initialBlockPoolIndex; - Block* initialBlockPool; - size_t initialBlockPoolSize; - -#ifndef MCDBGQ_USEDEBUGFREELIST - FreeList<Block> freeList; -#else - debug::DebugFreeList<Block> freeList; -#endif - - std::atomic<ImplicitProducerHash*> implicitProducerHash; - std::atomic<size_t> implicitProducerHashCount; // Number of slots logically used - ImplicitProducerHash initialImplicitProducerHash; - std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> - initialImplicitProducerHashEntries; - std::atomic_flag implicitProducerHashResizeInProgress; - - std::atomic<std::uint32_t> nextExplicitConsumerId; - std::atomic<std::uint32_t> globalExplicitConsumerOffset; - -#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH - debug::DebugMutex implicitProdMutex; -#endif - -#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG - std::atomic<ExplicitProducer*> explicitProducers; - std::atomic<ImplicitProducer*> implicitProducers; -#endif -}; - - -template <typename T, typename Traits> -ProducerToken::ProducerToken(ConcurrentQueue<T, Traits>& queue) - : producer(queue.recycle_or_create_producer(true)) -{ - if (producer != nullptr) { - producer->token = this; - } -} - -template <typename T, typename Traits> -ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits>& queue) - : producer(reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue) - ->recycle_or_create_producer(true)) -{ - if (producer != nullptr) { - producer->token = this; - } -} - -template <typename T, typename Traits> -ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue) - : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) -{ - initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release); - lastKnownGlobalOffset = static_cast<std::uint32_t>(-1); -} - -template <typename T, typename Traits> -ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue) - : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) -{ - initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue) - ->nextExplicitConsumerId.fetch_add(1, std::memory_order_release); - lastKnownGlobalOffset = static_cast<std::uint32_t>(-1); -} - -template <typename T, typename Traits> -inline void swap(ConcurrentQueue<T, Traits>& a, - ConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT -{ - a.swap(b); -} - -inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT { a.swap(b); } - -inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT { a.swap(b); } - -template <typename T, typename Traits> -inline void -swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, - typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT -{ - a.swap(b); -} - -} // namespace moodycamel - -#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) -#pragma warning(pop) -#endif - -#if defined(__GNUC__) -#pragma GCC diagnostic pop -#endif diff --git a/gr/include/moodycamel/lightweightsemaphore.h b/gr/include/moodycamel/lightweightsemaphore.h deleted file mode 100644 index 72658b026..000000000 --- a/gr/include/moodycamel/lightweightsemaphore.h +++ /dev/null @@ -1,415 +0,0 @@ -// Provides an efficient implementation of a semaphore (LightweightSemaphore). -// This is an extension of Jeff Preshing's sempahore implementation (licensed -// under the terms of its separate zlib license) that has been adapted and -// extended by Cameron Desrochers. - -#pragma once - -#include <type_traits> // For std::make_signed<T> -#include <atomic> -#include <cstddef> // For std::size_t - -#if defined(_WIN32) -// Avoid including windows.h in a header; we only need a handful of -// items, so we'll redeclare them here (this is relatively safe since -// the API generally has to remain stable between Windows versions). -// I know this is an ugly hack but it still beats polluting the global -// namespace with thousands of generic names or adding a .cpp for nothing. -extern "C" { -struct _SECURITY_ATTRIBUTES; -__declspec(dllimport) void* __stdcall CreateSemaphoreW( - _SECURITY_ATTRIBUTES* lpSemaphoreAttributes, - long lInitialCount, - long lMaximumCount, - const wchar_t* lpName); -__declspec(dllimport) int __stdcall CloseHandle(void* hObject); -__declspec(dllimport) unsigned long __stdcall WaitForSingleObject( - void* hHandle, unsigned long dwMilliseconds); -__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, - long lReleaseCount, - long* lpPreviousCount); -} -#elif defined(__MACH__) -#include <mach/mach.h> -#elif defined(__unix__) -#include <semaphore.h> -#endif - -namespace moodycamel { -namespace details { - -// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's -// portable + lightweight semaphore implementations, originally from -// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h -// LICENSE: -// Copyright (c) 2015 Jeff Preshing -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgement in the product documentation would be -// appreciated but is not required. -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// 3. This notice may not be removed or altered from any source distribution. -#if defined(_WIN32) -class Semaphore -{ -private: - void* m_hSema; - - Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; - Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; - -public: - Semaphore(int initialCount = 0) - { - assert(initialCount >= 0); - const long maxLong = 0x7fffffff; - m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr); - assert(m_hSema); - } - - ~Semaphore() { CloseHandle(m_hSema); } - - bool wait() - { - const unsigned long infinite = 0xffffffff; - return WaitForSingleObject(m_hSema, infinite) == 0; - } - - bool try_wait() { return WaitForSingleObject(m_hSema, 0) == 0; } - - bool timed_wait(std::uint64_t usecs) - { - return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0; - } - - void signal(int count = 1) - { - while (!ReleaseSemaphore(m_hSema, count, nullptr)) - ; - } -}; -#elif defined(__MACH__) -//--------------------------------------------------------- -// Semaphore (Apple iOS and OSX) -// Can't use POSIX semaphores due to -// http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html -//--------------------------------------------------------- -class Semaphore -{ -private: - semaphore_t m_sema; - - Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; - Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; - -public: - Semaphore(int initialCount = 0) - { - assert(initialCount >= 0); - kern_return_t rc = - semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount); - assert(rc == KERN_SUCCESS); - (void)rc; - } - - ~Semaphore() { semaphore_destroy(mach_task_self(), m_sema); } - - bool wait() { return semaphore_wait(m_sema) == KERN_SUCCESS; } - - bool try_wait() { return timed_wait(0); } - - bool timed_wait(std::uint64_t timeout_usecs) - { - mach_timespec_t ts; - ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000); - ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000); - - // added in OSX 10.10: - // https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html - kern_return_t rc = semaphore_timedwait(m_sema, ts); - return rc == KERN_SUCCESS; - } - - void signal() - { - while (semaphore_signal(m_sema) != KERN_SUCCESS) - ; - } - - void signal(int count) - { - while (count-- > 0) { - while (semaphore_signal(m_sema) != KERN_SUCCESS) - ; - } - } -}; -#elif defined(__unix__) -//--------------------------------------------------------- -// Semaphore (POSIX, Linux) -//--------------------------------------------------------- -class Semaphore -{ -private: - sem_t m_sema; - - Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; - Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; - -public: - Semaphore(int initialCount = 0) - { - assert(initialCount >= 0); - int rc = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount)); - assert(rc == 0); - (void)rc; - } - - ~Semaphore() { sem_destroy(&m_sema); } - - bool wait() - { - // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error - int rc; - do { - rc = sem_wait(&m_sema); - } while (rc == -1 && errno == EINTR); - return rc == 0; - } - - bool try_wait() - { - int rc; - do { - rc = sem_trywait(&m_sema); - } while (rc == -1 && errno == EINTR); - return rc == 0; - } - - bool timed_wait(std::uint64_t usecs) - { - struct timespec ts; - const int usecs_in_1_sec = 1000000; - const int nsecs_in_1_sec = 1000000000; - clock_gettime(CLOCK_REALTIME, &ts); - ts.tv_sec += (time_t)(usecs / usecs_in_1_sec); - ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000; - // sem_timedwait bombs if you have more than 1e9 in tv_nsec - // so we have to clean things up before passing it in - if (ts.tv_nsec >= nsecs_in_1_sec) { - ts.tv_nsec -= nsecs_in_1_sec; - ++ts.tv_sec; - } - - int rc; - do { - rc = sem_timedwait(&m_sema, &ts); - } while (rc == -1 && errno == EINTR); - return rc == 0; - } - - void signal() - { - while (sem_post(&m_sema) == -1) - ; - } - - void signal(int count) - { - while (count-- > 0) { - while (sem_post(&m_sema) == -1) - ; - } - } -}; -#else -#error Unsupported platform! (No semaphore wrapper available) -#endif - -} // end namespace details - - -//--------------------------------------------------------- -// LightweightSemaphore -//--------------------------------------------------------- -class LightweightSemaphore -{ -public: - typedef std::make_signed<std::size_t>::type ssize_t; - -private: - std::atomic<ssize_t> m_count; - details::Semaphore m_sema; - int m_maxSpins; - - bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1) - { - ssize_t oldCount; - int spin = m_maxSpins; - while (--spin >= 0) { - oldCount = m_count.load(std::memory_order_relaxed); - if ((oldCount > 0) && - m_count.compare_exchange_strong(oldCount, - oldCount - 1, - std::memory_order_acquire, - std::memory_order_relaxed)) - return true; - std::atomic_signal_fence( - std::memory_order_acquire); // Prevent the compiler from collapsing the - // loop. - } - oldCount = m_count.fetch_sub(1, std::memory_order_acquire); - if (oldCount > 0) - return true; - if (timeout_usecs < 0) { - if (m_sema.wait()) - return true; - } - if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs)) - return true; - // At this point, we've timed out waiting for the semaphore, but the - // count is still decremented indicating we may still be waiting on - // it. So we have to re-adjust the count, but only if the semaphore - // wasn't signaled enough times for us too since then. If it was, we - // need to release the semaphore too. - while (true) { - oldCount = m_count.load(std::memory_order_acquire); - if (oldCount >= 0 && m_sema.try_wait()) - return true; - if (oldCount < 0 && - m_count.compare_exchange_strong(oldCount, - oldCount + 1, - std::memory_order_relaxed, - std::memory_order_relaxed)) - return false; - } - } - - ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1) - { - assert(max > 0); - ssize_t oldCount; - int spin = m_maxSpins; - while (--spin >= 0) { - oldCount = m_count.load(std::memory_order_relaxed); - if (oldCount > 0) { - ssize_t newCount = oldCount > max ? oldCount - max : 0; - if (m_count.compare_exchange_strong(oldCount, - newCount, - std::memory_order_acquire, - std::memory_order_relaxed)) - return oldCount - newCount; - } - std::atomic_signal_fence(std::memory_order_acquire); - } - oldCount = m_count.fetch_sub(1, std::memory_order_acquire); - if (oldCount <= 0) { - if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait()) || - (timeout_usecs > 0 && !m_sema.timed_wait((std::uint64_t)timeout_usecs))) { - while (true) { - oldCount = m_count.load(std::memory_order_acquire); - if (oldCount >= 0 && m_sema.try_wait()) - break; - if (oldCount < 0 && - m_count.compare_exchange_strong(oldCount, - oldCount + 1, - std::memory_order_relaxed, - std::memory_order_relaxed)) - return 0; - } - } - } - if (max > 1) - return 1 + tryWaitMany(max - 1); - return 1; - } - -public: - LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) - : m_count(initialCount), m_maxSpins(maxSpins) - { - assert(initialCount >= 0); - assert(maxSpins >= 0); - } - - bool tryWait() - { - ssize_t oldCount = m_count.load(std::memory_order_relaxed); - while (oldCount > 0) { - if (m_count.compare_exchange_weak(oldCount, - oldCount - 1, - std::memory_order_acquire, - std::memory_order_relaxed)) - return true; - } - return false; - } - - bool wait() { return tryWait() || waitWithPartialSpinning(); } - - bool wait(std::int64_t timeout_usecs) - { - return tryWait() || waitWithPartialSpinning(timeout_usecs); - } - - // Acquires between 0 and (greedily) max, inclusive - ssize_t tryWaitMany(ssize_t max) - { - assert(max >= 0); - ssize_t oldCount = m_count.load(std::memory_order_relaxed); - while (oldCount > 0) { - ssize_t newCount = oldCount > max ? oldCount - max : 0; - if (m_count.compare_exchange_weak(oldCount, - newCount, - std::memory_order_acquire, - std::memory_order_relaxed)) - return oldCount - newCount; - } - return 0; - } - - // Acquires at least one, and (greedily) at most max - ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs) - { - assert(max >= 0); - ssize_t result = tryWaitMany(max); - if (result == 0 && max > 0) - result = waitManyWithPartialSpinning(max, timeout_usecs); - return result; - } - - ssize_t waitMany(ssize_t max) - { - ssize_t result = waitMany(max, -1); - assert(result > 0); - return result; - } - - void signal(ssize_t count = 1) - { - assert(count >= 0); - ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release); - ssize_t toRelease = -oldCount < count ? -oldCount : count; - if (toRelease > 0) { - m_sema.signal((int)toRelease); - } - } - - std::size_t availableApprox() const - { - ssize_t count = m_count.load(std::memory_order_relaxed); - return count > 0 ? static_cast<std::size_t>(count) : 0; - } -}; - -} // end namespace moodycamel diff --git a/gr/include/moodycamel/meson.build b/gr/include/moodycamel/meson.build deleted file mode 100644 index 12a34e6ad..000000000 --- a/gr/include/moodycamel/meson.build +++ /dev/null @@ -1,5 +0,0 @@ -install_headers([ - 'blockingconcurrentqueue.h', - 'concurrentqueue.h', - 'lightweightsemaphore.h' -], subdir : 'moodycamel') diff --git a/gr/lib/buffer_cpu_host.cc b/gr/lib/buffer_cpu_host.cc deleted file mode 100644 index 0623d166f..000000000 --- a/gr/lib/buffer_cpu_host.cc +++ /dev/null @@ -1,96 +0,0 @@ -#include <string.h> -#include <algorithm> -#include <cstdint> -#include <memory> -#include <mutex> -#include <vector> - -#include <gnuradio/buffer_cpu_host.h> - -namespace gr { -buffer_cpu_host::buffer_cpu_host(size_t num_items, - size_t item_size, - buffer_cpu_host_type type, - std::shared_ptr<buffer_properties> buf_properties) - : gr::buffer_sm(num_items, item_size, buf_properties), _transfer_type(type) -{ - _host_buffer.resize(_buf_size); - _device_buffer.resize(_buf_size); - - set_type("buffer_cpu_host_" + std::to_string((int)_transfer_type)); -} - -buffer_uptr buffer_cpu_host::make(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties) -{ - auto cbp = std::static_pointer_cast<buffer_cpu_host_properties>(buffer_properties); - if (cbp != nullptr) { - return buffer_uptr(new buffer_cpu_host( - num_items, item_size, cbp->buffer_type(), buffer_properties)); - } - else { - throw std::runtime_error( - "Failed to cast buffer properties to buffer_cpu_host_properties"); - } -} - -void* buffer_cpu_host::read_ptr(size_t index) -{ - if (_transfer_type == buffer_cpu_host_type::D2H || - _transfer_type == buffer_cpu_host_type::H2H) { - return (void*)&_host_buffer[index]; - } - else { - return (void*)&_device_buffer[index]; - } -} -void* buffer_cpu_host::write_ptr() -{ - if (_transfer_type == buffer_cpu_host_type::H2D || - _transfer_type == buffer_cpu_host_type::H2H) { - return (void*)&_host_buffer[_write_index]; - } - else { - return (void*)&_device_buffer[_write_index]; - } -} - -void buffer_cpu_host::post_write(int num_items) -{ - std::lock_guard<std::mutex> guard(_buf_mutex); - - size_t bytes_written = num_items * _item_size; - size_t wi1 = _write_index; - - // num_items were written to the buffer - - if (_transfer_type == buffer_cpu_host_type::H2D) { - memcpy(&_device_buffer[wi1], &_host_buffer[wi1], bytes_written); - } - else if (_transfer_type == buffer_cpu_host_type::D2H) { - memcpy(&_host_buffer[wi1], &_device_buffer[wi1], bytes_written); - } - - // advance the write pointer - _write_index += bytes_written; - if (_write_index == _buf_size) { - _write_index = 0; - } - if (_write_index > _buf_size) { - throw std::runtime_error("buffer_sm: Wrote too far into buffer"); - } - _total_written += num_items; -} - -buffer_reader_uptr -buffer_cpu_host::add_reader(std::shared_ptr<buffer_properties> buf_props, size_t itemsize) -{ - auto r = - std::make_unique<buffer_cpu_host_reader>(this, buf_props, itemsize, _write_index); - _readers.push_back(r.get()); - return r; -} - - -} // namespace gr diff --git a/gr/lib/buffer_cuda.cc b/gr/lib/buffer_cuda.cc deleted file mode 100644 index 73a1e61f4..000000000 --- a/gr/lib/buffer_cuda.cc +++ /dev/null @@ -1,162 +0,0 @@ -#include <cuda.h> -#include <cuda_runtime.h> -#include <string.h> -#include <algorithm> -#include <cstdint> -#include <memory> -#include <mutex> -#include <vector> - -#include <gnuradio/buffer_cuda.h> - - -namespace gr { -buffer_cuda::buffer_cuda(size_t num_items, - size_t item_size, - buffer_cuda_type type, - std::shared_ptr<buffer_properties> buf_properties) - : gr::buffer(num_items, item_size, buf_properties), _type(type) -{ - // _host_buffer.resize(_buf_size * 2); // double circular buffer - cudaMallocHost( - &_host_buffer, - _buf_size * - 2); // double circular buffer - should do something more intelligent here - cudaMalloc( - &_device_buffer, - _buf_size * - 2); // double circular buffer - should do something more intelligent here - set_type("buffer_cuda_" + std::to_string((int)_type)); - - cudaStreamCreate(&stream); -} -buffer_cuda::~buffer_cuda() -{ - cudaFree(_device_buffer); - cudaFree(_host_buffer); -} - -buffer_uptr buffer_cuda::make(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties) -{ - auto cbp = std::static_pointer_cast<buffer_cuda_properties>(buffer_properties); - if (cbp != nullptr) { - return buffer_uptr( - new buffer_cuda(num_items, item_size, cbp->buffer_type(), buffer_properties)); - } - else { - throw std::runtime_error( - "Failed to cast buffer properties to buffer_cuda_properties"); - } -} - -void* buffer_cuda::read_ptr(size_t index) -{ - if (_type == buffer_cuda_type::D2H) { - return (void*)&_host_buffer[index]; - } - else { - return (void*)&_device_buffer[index]; - } -} -void* buffer_cuda::write_ptr() -{ - if (_type == buffer_cuda_type::H2D) { - return (void*)&_host_buffer[_write_index]; - } - else { - return (void*)&_device_buffer[_write_index]; - } -} - -void buffer_cuda_reader::post_read(int num_items) -{ - std::lock_guard<std::mutex> guard(_rdr_mutex); - // advance the read pointer - _read_index += num_items * _itemsize; - if (_read_index >= _buffer->buf_size()) { - _read_index -= _buffer->buf_size(); - } - _total_read += num_items; -} -void buffer_cuda::post_write(int num_items) -{ - std::lock_guard<std::mutex> guard(_buf_mutex); - - size_t bytes_written = num_items * _item_size; - size_t wi1 = _write_index; - size_t wi2 = _write_index + _buf_size; - // num_items were written to the buffer - // copy the data to the second half of the buffer - - size_t num_bytes_1 = std::min(_buf_size - wi1, bytes_written); - size_t num_bytes_2 = bytes_written - num_bytes_1; - - if (_type == buffer_cuda_type::H2D) { - cudaMemcpyAsync(&_device_buffer[wi1], - &_host_buffer[wi1], - bytes_written, - cudaMemcpyHostToDevice, - stream); - - // memcpy(&_host_buffer[wi2], &_host_buffer[wi1], num_bytes_1); - cudaMemcpyAsync(&_device_buffer[wi2], - &_device_buffer[wi1], - num_bytes_1, - cudaMemcpyDeviceToDevice, - stream); - if (num_bytes_2) { - // memcpy(&_host_buffer[0], &_host_buffer[_buf_size], num_bytes_2); - cudaMemcpyAsync(&_device_buffer[0], - &_device_buffer[_buf_size], - num_bytes_2, - cudaMemcpyDeviceToDevice, - stream); - } - } - else if (_type == buffer_cuda_type::D2H) { - cudaMemcpyAsync(&_host_buffer[wi1], - &_device_buffer[wi1], - bytes_written, - cudaMemcpyDeviceToHost, - stream); - - memcpy(&_host_buffer[wi2], &_host_buffer[wi1], num_bytes_1); - - if (num_bytes_2) { - memcpy(&_host_buffer[0], &_host_buffer[_buf_size], num_bytes_2); - } - } - else // D2D - { - cudaMemcpyAsync(&_device_buffer[wi2], - &_device_buffer[wi1], - num_bytes_1, - cudaMemcpyDeviceToDevice); - if (num_bytes_2) - cudaMemcpyAsync(&_device_buffer[0], - &_device_buffer[_buf_size], - num_bytes_2, - cudaMemcpyDeviceToDevice, - stream); - } - // advance the write pointer - _write_index += bytes_written; - if (_write_index >= _buf_size) { - _write_index -= _buf_size; - } - _total_written += num_items; - cudaStreamSynchronize(stream); -} - -buffer_reader_uptr buffer_cuda::add_reader(std::shared_ptr<buffer_properties> buf_props, - size_t itemsize) -{ - auto r = - std::make_unique<buffer_cuda_reader>(this, buf_props, itemsize, _write_index); - _readers.push_back(r.get()); - return r; -} - -} // namespace gr diff --git a/gr/lib/buffer_cuda_pinned.cc b/gr/lib/buffer_cuda_pinned.cc deleted file mode 100644 index 3a1de05de..000000000 --- a/gr/lib/buffer_cuda_pinned.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include <string.h> -#include <algorithm> -#include <cstdint> -#include <memory> -#include <mutex> -#include <vector> - -#include <cuda.h> -#include <cuda_runtime.h> - -#include <gnuradio/buffer_cuda_pinned.h> - -namespace gr { -buffer_cuda_pinned::buffer_cuda_pinned(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buf_properties) - : buffer(num_items, item_size, buf_properties) -{ - if (!cudaHostAlloc((void**)&_pinned_buffer, _buf_size * 2, 0) == cudaSuccess) { - throw std::runtime_error("Failed to allocate CUDA pinned memory"); - } -} -buffer_cuda_pinned::~buffer_cuda_pinned() { cudaFree(_pinned_buffer); } - -buffer_uptr buffer_cuda_pinned::make(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties) -{ - return buffer_uptr(new buffer_cuda_pinned(num_items, item_size, buffer_properties)); -} - -void* buffer_cuda_pinned::read_ptr(size_t index) { return (void*)&_pinned_buffer[index]; } -void* buffer_cuda_pinned::write_ptr() { return (void*)&_pinned_buffer[_write_index]; } - -void buffer_cuda_pinned_reader::post_read(int num_items) -{ - std::lock_guard<std::mutex> guard(_rdr_mutex); - // advance the read pointer - _read_index += num_items * _itemsize; - if (_read_index >= _buffer->buf_size()) { - _read_index -= _buffer->buf_size(); - } - _total_read += num_items; -} - -void buffer_cuda_pinned::post_write(int num_items) -{ - std::lock_guard<std::mutex> guard(_buf_mutex); - - size_t bytes_written = num_items * _item_size; - size_t wi1 = _write_index; - size_t wi2 = _write_index + _buf_size; - // num_items were written to the buffer - // copy the data to the second half of the buffer - - size_t num_bytes_1 = std::min(_buf_size - wi1, bytes_written); - size_t num_bytes_2 = bytes_written - num_bytes_1; - - memcpy(&_pinned_buffer[wi2], &_pinned_buffer[wi1], num_bytes_1); - if (num_bytes_2) - memcpy(&_pinned_buffer[0], &_pinned_buffer[_buf_size], num_bytes_2); - - // advance the write pointer - _write_index += bytes_written; - if (_write_index >= _buf_size) { - _write_index -= _buf_size; - } -} - -buffer_reader_uptr -buffer_cuda_pinned::add_reader(std::shared_ptr<buffer_properties> buf_props, - size_t itemsize) -{ - auto r = std::make_unique<buffer_cuda_pinned_reader>( - this, buf_props, itemsize, _write_index); - _readers.push_back(r.get()); - return r; -} - -} // namespace gr diff --git a/gr/lib/buffer_cuda_sm.cc b/gr/lib/buffer_cuda_sm.cc deleted file mode 100644 index 7e8bc88d9..000000000 --- a/gr/lib/buffer_cuda_sm.cc +++ /dev/null @@ -1,164 +0,0 @@ -#include <cuda.h> -#include <cuda_runtime.h> -#include <string.h> -#include <algorithm> -#include <cstdint> -#include <memory> -#include <mutex> -#include <vector> - -#include <gnuradio/buffer_cuda_sm.h> - -namespace gr { -buffer_cuda_sm::buffer_cuda_sm(size_t num_items, - size_t item_size, - buffer_cuda_sm_type type, - std::shared_ptr<buffer_properties> buf_properties) - : gr::buffer_sm(num_items, item_size, buf_properties), _type(type) -{ - // _host_buffer.resize(_buf_size * 2); // double circular buffer - cudaMallocHost(&_host_buffer, _buf_size); - cudaMalloc(&_device_buffer, _buf_size); - set_type("buffer_cuda_sm_" + std::to_string((int)_type)); - - cudaStreamCreate(&stream); -} -buffer_cuda_sm::~buffer_cuda_sm() -{ - cudaFree(_device_buffer); - cudaFree(_host_buffer); -} - -buffer_uptr buffer_cuda_sm::make(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties) -{ - auto cbp = std::static_pointer_cast<buffer_cuda_sm_properties>(buffer_properties); - if (cbp != nullptr) { - return buffer_uptr(new buffer_cuda_sm( - num_items, item_size, cbp->buffer_type(), buffer_properties)); - } - else { - throw std::runtime_error( - "Failed to cast buffer properties to buffer_cuda_sm_properties"); - } -} - -void* buffer_cuda_sm::read_ptr(size_t index) -{ - if (_type == buffer_cuda_sm_type::D2H) { - return (void*)&_host_buffer[index]; - } - else { - return (void*)&_device_buffer[index]; - } -} -void* buffer_cuda_sm::write_ptr() -{ - if (_type == buffer_cuda_sm_type::H2D) { - return (void*)&_host_buffer[_write_index]; - } - else { - return (void*)&_device_buffer[_write_index]; - } -} - -void buffer_cuda_sm::post_write(int num_items) -{ - std::lock_guard<std::mutex> guard(_buf_mutex); - - size_t bytes_written = num_items * _item_size; - size_t wi1 = _write_index; - - // num_items were written to the buffer - - if (_type == buffer_cuda_sm_type::H2D) { - cudaMemcpyAsync(&_device_buffer[wi1], - &_host_buffer[wi1], - bytes_written, - cudaMemcpyHostToDevice, - stream); - } - else if (_type == buffer_cuda_sm_type::D2H) { - cudaMemcpyAsync(&_host_buffer[wi1], - &_device_buffer[wi1], - bytes_written, - cudaMemcpyDeviceToHost, - stream); - } - - // advance the write pointer - _write_index += bytes_written; - if (_write_index == _buf_size) { - _write_index = 0; - } - if (_write_index > _buf_size) { - throw std::runtime_error("buffer_sm: Wrote too far into buffer"); - } - _total_written += num_items; - cudaStreamSynchronize(stream); -} - -buffer_reader_uptr -buffer_cuda_sm::add_reader(std::shared_ptr<buffer_properties> buf_props, size_t itemsize) -{ - auto r = - std::make_unique<buffer_cuda_sm_reader>(this, buf_props, itemsize, _write_index); - _readers.push_back(r.get()); - return r; -} - -void* buffer_cuda_sm::cuda_memcpy(void* dest, const void* src, std::size_t count) -{ - cudaError_t rc = cudaSuccess; - rc = cudaMemcpy(dest, src, count, cudaMemcpyDeviceToDevice); - if (rc) { - std::ostringstream msg; - msg << "Error performing cudaMemcpy: " << cudaGetErrorName(rc) << " -- " - << cudaGetErrorString(rc); - throw std::runtime_error(msg.str()); - } - - return dest; -} - -void* buffer_cuda_sm::cuda_memmove(void* dest, const void* src, std::size_t count) -{ - // Would a kernel that checks for overlap and then copies front-to-back or - // back-to-front be faster than using cudaMemcpy with a temp buffer? - - // Allocate temp buffer - void* tempBuffer = nullptr; - cudaError_t rc = cudaSuccess; - rc = cudaMalloc((void**)&tempBuffer, count); - if (rc) { - std::ostringstream msg; - msg << "Error allocating device buffer: " << cudaGetErrorName(rc) << " -- " - << cudaGetErrorString(rc); - throw std::runtime_error(msg.str()); - } - - // First copy data from source to temp buffer - rc = cudaMemcpy(tempBuffer, src, count, cudaMemcpyDeviceToDevice); - if (rc) { - std::ostringstream msg; - msg << "Error performing cudaMemcpy: " << cudaGetErrorName(rc) << " -- " - << cudaGetErrorString(rc); - throw std::runtime_error(msg.str()); - } - - // Then copy data from temp buffer to destination to avoid overlap - rc = cudaMemcpy(dest, tempBuffer, count, cudaMemcpyDeviceToDevice); - if (rc) { - std::ostringstream msg; - msg << "Error performing cudaMemcpy: " << cudaGetErrorName(rc) << " -- " - << cudaGetErrorString(rc); - throw std::runtime_error(msg.str()); - } - - cudaFree(tempBuffer); - - return dest; -} - -} // namespace gr diff --git a/gr/lib/buffer_net_zmq.cc b/gr/lib/buffer_net_zmq.cc deleted file mode 100644 index 0236e028a..000000000 --- a/gr/lib/buffer_net_zmq.cc +++ /dev/null @@ -1,175 +0,0 @@ -#include <gnuradio/buffer_cpu_vmcirc.h> -#include <gnuradio/buffer_net_zmq.h> -#include <nlohmann/json.hpp> -#include <chrono> -#include <thread> -namespace gr { - - -std::shared_ptr<buffer_properties> -buffer_net_zmq_properties::make_from_params(const std::string& json_str) -{ - auto json_obj = nlohmann::json::parse(json_str); - return make(json_obj["ipaddr"], json_obj["port"]); -} - -buffer_uptr buffer_net_zmq::make(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties) -{ - - auto zbp = std::static_pointer_cast<buffer_net_zmq_properties>(buffer_properties); - if (zbp != nullptr) { - return buffer_uptr( - new buffer_net_zmq(num_items, item_size, buffer_properties, zbp->port())); - } - else { - throw std::runtime_error( - "Failed to cast buffer properties to buffer_net_zmq_properties"); - } -} - -buffer_net_zmq::buffer_net_zmq(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buf_properties, - int port) - : buffer(num_items, item_size, buf_properties), - _context(1), - _socket(_context, zmq::socket_type::push) -{ - gr::configure_default_loggers(d_logger, d_debug_logger, "buffer_net_zmq"); - set_type("buffer_net_zmq"); - _buffer.resize(_buf_size); - _socket.set(zmq::sockopt::sndhwm, 1); - _socket.set(zmq::sockopt::rcvhwm, 1); - std::string endpoint = "tcp://*:" + std::to_string(port); - std::cout << "snd_endpoint: " << endpoint << std::endl; - _socket.bind(endpoint); -} - - -/****************************************************************************/ -/* READER METHODS */ -/****************************************************************************/ - - -buffer_reader_uptr -buffer_net_zmq_reader::make(size_t itemsize, std::shared_ptr<buffer_properties> buf_props) -{ - auto zbp = std::static_pointer_cast<buffer_net_zmq_properties>(buf_props); - if (zbp != nullptr) { - return std::make_unique<buffer_net_zmq_reader>( - buf_props, itemsize, zbp->ipaddr(), zbp->port()); - } - else { - throw std::runtime_error( - "Failed to cast buffer properties to buffer_net_zmq_properties"); - } -} - -buffer_net_zmq_reader::buffer_net_zmq_reader(std::shared_ptr<buffer_properties> buf_props, - size_t itemsize, - const std::string& ipaddr, - int port) - : buffer_reader(nullptr, buf_props, itemsize, 0), - _context(1), - _socket(_context, zmq::socket_type::pull) -{ - gr::configure_default_loggers(d_logger, d_debug_logger, "buffer_net_zmq_reader"); - auto bufprops = std::make_shared<buffer_cpu_vmcirc_properties>(); - _circbuf = gr::buffer_cpu_vmcirc::make( - 8192, - itemsize, - bufprops); // FIXME - make nitems a buffer reader factory parameter - _circbuf_rdr = _circbuf->add_reader(bufprops, itemsize); - - // auto b = (float *)_circbuf->write_ptr(); - - // for (int i=0; i<8192; i++) - // { - // b[i] = i; - // } - // _circbuf->post_write(8192); - - // buffer_info_t info; - // _circbuf_rdr->read_info(info); - // auto br = (float *)_circbuf_rdr->read_ptr(); - - - // _circbuf_rdr->post_read(4096); - // br = (float *) _circbuf_rdr->read_ptr(); - - - std::string endpoint = "tcp://" + ipaddr + ":" + std::to_string(port); - d_debug_logger->debug("rcv_endpoint: {}", endpoint); - _socket.set(zmq::sockopt::sndhwm, 1); - _socket.set(zmq::sockopt::rcvhwm, 1); - // _socket.setsockopt(ZMQ_SUBSCRIBE, "", 0); - _socket.connect(endpoint); - d_debug_logger->debug(" ... connected"); - - std::thread t([this]() { - while (!this->_recv_done) { - // zmq::message_t msg{}; - // See how much room we have in the circular buffer - buffer_info_t wi; - _circbuf->write_info(wi); - - auto n_bytes_left_in_msg = _msg.size() - _msg_idx; - auto n_bytes_in_circbuf = wi.n_items * wi.item_size; - auto bytes_to_write = std::min(n_bytes_in_circbuf, n_bytes_left_in_msg); - auto items_to_write = bytes_to_write / wi.item_size; - bytes_to_write = items_to_write * wi.item_size; - - if (n_bytes_in_circbuf <= 0) { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - continue; - } - - if (bytes_to_write > 0) { - memcpy(wi.ptr, (uint8_t*)_msg.data() + _msg_idx, bytes_to_write); - d_debug_logger->debug("copied {} items", bytes_to_write / wi.item_size); - _msg_idx += bytes_to_write; - n_bytes_left_in_msg = _msg.size() - _msg_idx; - _circbuf->post_write(items_to_write); - notify_scheduler(); - } - - if (n_bytes_left_in_msg == 0) { - _msg.rebuild(); - d_debug_logger->debug("going into recv"); - auto r = _socket.recv(_msg, zmq::recv_flags::none); - if (r) { - d_debug_logger->debug("received msg with size {} items", - _msg.size() / wi.item_size); - _msg_idx = 0; - } - } - // GR_LOG_DEBUG(d_debug_logger, "recv: {}", wi.n_items); - // auto ret = this->_socket.recv( - // zmq::mutable_buffer(_circbuf->write_ptr(), wi.n_items * wi.item_size), - // zmq::recv_flags::none); - - // _circbuf->post_write(wi.n_items); - // notify_scheduler(); - // auto recbuf = *ret; - // assert(recbuf.size == wi.n_items * wi.item_size); - - // GR_LOG_DEBUG(d_debug_logger, "nbytesrcv: {}", recbuf.size); - // std::cout << " ---> msg received " << msg.size() << " bytes" << - // std::endl; - } - }); - - t.detach(); -} - -std::string buffer_net_zmq_properties::to_json() -{ - nlohmann::json j = { { "id", "buffer_net_zmq_properties" }, - { "parameters", { { "ipaddr", _ipaddr }, { "port", _port } } } }; - - return j.dump(); -} - -} // namespace gr diff --git a/gr/lib/buffer_sm.cc b/gr/lib/buffer_sm.cc deleted file mode 100644 index 6766b1062..000000000 --- a/gr/lib/buffer_sm.cc +++ /dev/null @@ -1,374 +0,0 @@ -#include <gnuradio/buffer_sm.h> - -#include <gnuradio/logger.h> - -namespace gr { - -buffer_reader_uptr buffer_sm::add_reader(std::shared_ptr<buffer_properties> buf_props, - size_t itemsize) -{ - auto r = std::make_unique<buffer_sm_reader>(this, itemsize, buf_props, _write_index); - _readers.push_back(r.get()); - return r; -} - -buffer_sm::buffer_sm(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buf_properties) - : buffer(num_items, item_size, buf_properties) -{ - _buffer.resize(_buf_size); // singly mapped buffer - _raw_buffer = _buffer.data(); - _write_index = 0; - - set_type("buffer_sm"); - - - gr::configure_default_loggers(d_logger, d_debug_logger, _type); -} - -buffer_uptr buffer_sm::make(size_t num_items, - size_t item_size, - std::shared_ptr<buffer_properties> buffer_properties) -{ - return buffer_uptr(new buffer_sm(num_items, item_size, buffer_properties)); -} - -void* buffer_sm::read_ptr(size_t index) { return (void*)&_raw_buffer[index]; } -void* buffer_sm::write_ptr() { return (void*)&_raw_buffer[_write_index]; } - -void buffer_sm::post_write(int num_items) -{ - std::scoped_lock guard(_buf_mutex); - - size_t bytes_written = num_items * _item_size; - // num_items were written to the buffer - - // advance the write pointer - _write_index += bytes_written; - if (_write_index == _buf_size) { - _write_index = 0; - } - if (_write_index > _buf_size) { - throw std::runtime_error("buffer_sm: Wrote too far into buffer"); - } - _total_written += num_items; -} - -bool buffer_sm::output_blkd_cb_ready(int output_multiple) -{ - uint32_t space_avail = 0; - { - std::unique_lock<std::mutex>(*this->mutex()); - space_avail = space_available(); - } - return ((space_avail > 0) && - ((space_avail / output_multiple) * output_multiple == 0)); -} - -bool buffer_sm::output_blocked_callback_logic(bool force, memmove_func_t memmove_func) -{ - auto space_avail = space_available(); - - // if (((space_avail > 0) && ((space_avail / output_multiple) * output_multiple == - // 0)) || - if ((space_avail > 0) || force) { - // Find reader with the smallest read index - uint32_t min_read_idx = _readers[0]->read_index(); - uint64_t min_read_idx_nitems = _readers[0]->total_read(); - for (size_t idx = 1; idx < _readers.size(); ++idx) { - // Record index of reader with minimum read-index - if (_readers[idx]->read_index() < min_read_idx) { - min_read_idx = _readers[idx]->read_index(); - min_read_idx_nitems = _readers[idx]->total_read(); - } - } - - d_debug_logger->debug("output_blocked_callback, space_avail {}, min_read_idx {}, " - "_write_index {}", - space_avail, - min_read_idx, - _write_index); - - // Make sure we have enough room to start writing back at the beginning - if ((min_read_idx == 0) || (min_read_idx > _write_index) || - (min_read_idx == _write_index && min_read_idx_nitems != total_written())) { - return false; - } - - // Determine how much "to be read" data needs to be moved - auto to_move_bytes = _write_index - min_read_idx; - - if (to_move_bytes > min_read_idx) { - return false; - } - - - d_debug_logger->debug("output_blocked_callback, moving {} bytes", to_move_bytes); - - // Shift "to be read" data back to the beginning of the buffer - std::memmove(_raw_buffer, _raw_buffer + (min_read_idx), to_move_bytes); - - // Adjust write index and each reader index - _write_index -= min_read_idx; - - for (size_t idx = 0; idx < _readers.size(); ++idx) { - d_debug_logger->debug("output_blocked_callback,setting _read_index to {}", - _readers[idx]->read_index() - min_read_idx); - _readers[idx]->set_read_index(_readers[idx]->read_index() - min_read_idx); - } - - return true; - } - - return false; -} - -bool buffer_sm::output_blocked_callback(bool force) -{ - std::scoped_lock guard(_buf_mutex); - - return output_blocked_callback_logic(force, std::memmove); -} - -size_t buffer_sm::space_available() -{ - // Find the max number of items available across readers - - uint64_t min_items_read = std::numeric_limits<uint64_t>::max(); - size_t min_read_idx = 0; - for (size_t idx = 0; idx < _readers.size(); idx++) { - std::scoped_lock lck{ *(_readers[idx]->mutex()) }; - auto total_read = _readers[idx]->total_read(); - if (total_read < min_items_read) { - min_items_read = total_read; - min_read_idx = _readers[idx]->read_index(); - } - } - - size_t space = (_buf_size - _write_index) / _item_size; - - if (min_read_idx == _write_index) { - // If the (min) read index and write index are equal then the buffer - // is either completely empty or completely full depending on if - // the number of items read matches the number written - if (min_items_read != total_written()) { - space = 0; - } - } - else if (min_read_idx > _write_index) { - space = (min_read_idx - _write_index) / _item_size; - } - - if (space == 0) - return space; - // Only half fill the buffer - // Leave extra space in case the reader gets stuck and needs realignment - - space = std::min(space, _num_items); - - return space; -} - -bool buffer_sm::write_info(buffer_info_t& info) -{ - std::scoped_lock guard(_buf_mutex); - - info.ptr = write_ptr(); - info.n_items = space_available(); - if (info.n_items < 0) - info.n_items = 0; - info.item_size = _item_size; - info.total_items = _total_written; - - return true; -} - -bool buffer_sm::adjust_buffer_data(memcpy_func_t memcpy_func, memmove_func_t memmove_func) -{ - - // Find reader with the smallest read index that is greater than the - // write index - // auto min_reader_index = std::numeric_limits<size_t>::max(); - auto min_read_idx = std::numeric_limits<size_t>::max(); - for (size_t idx = 0; idx < _readers.size(); ++idx) { - if (_readers[idx]->read_index() > write_index()) { - // Record index of reader with minimum read-index - // FIXME: What if one of the readers has wrapped back around? - // -- in that case this should use items_available() callback - if (_readers[idx]->read_index() < min_read_idx) { - min_read_idx = _readers[idx]->read_index(); - // min_reader_index = idx; - } - } - } - - // Note items_avail might be zero, that's okay. - auto max_bytes_avail = _buf_size - min_read_idx; - auto max_items_avail = max_bytes_avail / _item_size; - auto gap = min_read_idx - _write_index; - if (_write_index > min_read_idx || max_bytes_avail > gap) { - return false; - } - - // GR_LOG_DEBUG(d_debug_logger, - // "adust_buffer_data: max_bytes_avail {}, gap {}", - // max_bytes_avail, - // gap); - - - // Shift existing data down to make room for blocked data at end of buffer - auto move_data_size = _write_index; - auto dest = _raw_buffer + max_bytes_avail; - memmove_func(dest, _raw_buffer, move_data_size); - - // Next copy the data from the end of the buffer back to the beginning - auto avail_data_size = max_bytes_avail; - auto src = _raw_buffer + min_read_idx; - memcpy_func(_raw_buffer, src, avail_data_size); - - // Finally adjust all reader pointers - for (size_t idx = 0; idx < _readers.size(); ++idx) { - // GR_LOG_DEBUG(d_debug_logger, - // "adjust_buffer_data,setting _read_index to {}", - // _readers[idx]->read_index() - min_read_idx); - _readers[idx]->set_read_index(max_items_avail - _readers[idx]->items_available()); - } - - // Now adjust write pointer - _write_index += max_items_avail; - - return true; -} - - -buffer_sm_reader::buffer_sm_reader(buffer_sm* bufp, - size_t itemsize, - std::shared_ptr<buffer_properties> buf_props, - size_t read_index) - : buffer_reader(bufp, buf_props, itemsize, read_index), _buffer_sm(bufp) -{ - gr::configure_default_loggers(d_logger, d_debug_logger, "buffer_sm_reader"); -} - -void buffer_sm_reader::post_read(int num_items) -{ - std::scoped_lock guard(_rdr_mutex); - - // GR_LOG_DEBUG( - // d_debug_logger, "post_read: _read_index {}, num_items {}", _read_index, num_items); - - // advance the read pointer - _read_index += num_items * _itemsize; //_buffer->item_size(); - _total_read += num_items; - if (_read_index == _buffer->buf_size()) { - _read_index = 0; - } - if (_read_index > _buffer->buf_size()) { - // GR_LOG_INFO(d_logger, - // "too far: num_items {}, prev_index {}, post_index {}", - // num_items, - // _read_index - num_items * _buffer->item_size(), - // _read_index); - - // // throw std::runtime_error("buffer_sm_reader: Wrote too far into buffer"); - } - - // GR_LOG_DEBUG(d_debug_logger, "post_read: _read_index {}", _read_index); -} - -bool buffer_sm_reader::input_blocked_callback(size_t items_required) -{ - // Only singly mapped buffers need to do anything with this callback - std::scoped_lock guard(*(_buffer->mutex())); - - auto items_avail = items_available(); - - // GR_LOG_DEBUG(d_debug_logger, - // "input_blocked_callback: items_avail {}, _read_index {}, " - // "_write_index {}, items_required {}", - // items_avail, - // _read_index, - // _buffer->write_index(), - // items_required); - - // GR_LOG_DEBUG(d_debug_logger, - // "input_blocked_callback: total_written {}, total_read {}", - // _buffer->total_written(), - // total_read()); - - - // Maybe adjust read pointers from min read index? - // This would mean that *all* readers must be > (passed) the write index - if (items_avail < items_required && _buffer->write_index() < read_index()) { - // GR_LOG_DEBUG(d_debug_logger, "Calling adjust_buffer_data "); - return _buffer_sm->adjust_buffer_data(std::memcpy, std::memmove); - } - - return false; -} - -size_t buffer_sm_reader::bytes_available() -{ - // Can only read up to to the write_index, or the end of the buffer - // there is no wraparound - - size_t ret = 0; - - size_t w = _buffer->write_index(); - size_t r = _read_index; - - if (w < r) { - ret = (_buffer->buf_size() - r); - } - else if (w == r && total_read() < _buffer->total_written()) { - ret = (_buffer->buf_size() - r); - } - else { - ret = (w - r); - } - - // return ret; - - // GR_LOG_DEBUG(d_debug_logger, - // "items_available: write_index {}, read_index {}, ret {}, total_read " - // "{}, total_written {}", - // w, - // r, - // ret, - // total_read(), - // _buffer->total_written()); - - // if (_itemsize*(_buffer->total_written() - total_read()) < ret) { - // d_debug_logger->debug( - // "check_math {} {} {} {}", - // _buffer->total_written() - total_read(), - // ret, - // total_read(), - // _buffer->total_written()); - // } - - return ret; // in bytes -} - -bool buffer_sm_reader::input_blkd_cb_ready(int items_required) -{ - std::unique_lock<std::mutex>(*_buffer->mutex()); - - return ( - ((_buffer->buf_size() * _itemsize - _read_index) < (uint32_t)items_required) && - (_buffer->write_index() < _read_index)); -} - -buffer_sm_properties::buffer_sm_properties() : buffer_properties() -{ - _bff = buffer_sm::make; -} - -std::shared_ptr<buffer_properties> buffer_sm_properties::make() -{ - return std::static_pointer_cast<buffer_properties>( - std::make_shared<buffer_sm_properties>()); -} - -} // namespace gr diff --git a/gr/lib/meson.build b/gr/lib/meson.build index a809cea2e..dbee9c491 100644 --- a/gr/lib/meson.build +++ b/gr/lib/meson.build @@ -32,7 +32,6 @@ runtime_sources = [ 'block.cc', 'port.cc', 'buffer.cc', - 'buffer_sm.cc', 'buffer_management.cc', 'buffer_cpu_simple.cc', 'realtime.cc', @@ -56,13 +55,11 @@ runtime_sources = [ 'buffer_cpu_vmcirc_sysv_shm.cc', # mmap requires librt - FIXME - handle this a conditional dependency 'buffer_cpu_vmcirc_mmap_shm_open.cc', - 'buffer_net_zmq.cc', 'sptr_magic.cc', 'hier_block.cc', 'prefs.cc', 'tag.cc', 'registry.cc', - 'buffer_cpu_host.cc' ] if IMPLEMENT_CUDA diff --git a/gr/meson.build b/gr/meson.build index 2627dfa6b..62ed6711a 100644 --- a/gr/meson.build +++ b/gr/meson.build @@ -1,5 +1,4 @@ subdir('include/gnuradio') -subdir('include/moodycamel') subdir('lib') if (get_option('enable_python')) diff --git a/gr/python/gnuradio/gr/bindings/buffer_net_zmq_pybind.cc b/gr/python/gnuradio/gr/bindings/buffer_net_zmq_pybind.cc deleted file mode 100644 index 3effa205c..000000000 --- a/gr/python/gnuradio/gr/bindings/buffer_net_zmq_pybind.cc +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2020 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * SPDX-License-Identifier: GPL-3.0-or-later - * - */ - -#include <pybind11/complex.h> -#include <pybind11/pybind11.h> -#include <pybind11/stl.h> - -namespace py = pybind11; - -#include <gnuradio/buffer_net_zmq.h> -// pydoc.h is automatically generated in the build directory -// #include <edge_pydoc.h> - -void bind_buffer_net_zmq(py::module& m) -{ - using buffer_net_zmq_properties = ::gr::buffer_net_zmq_properties; - - py::class_<buffer_net_zmq_properties, - gr::buffer_properties, - std::shared_ptr<buffer_net_zmq_properties>>(m, "buffer_net_zmq_properties") - // .def(py::init( - // [](const std::string& ipaddr, int port) { - // return ::gr::buffer_net_zmq_properties::make(ipaddr, port); - // }), py::arg("ipaddr"), py::arg("port")) - .def_static( - "make", &buffer_net_zmq_properties::make, py::arg("ipaddr"), py::arg("port")) - .def_static("make_from_params", - &buffer_net_zmq_properties::make_from_params, - py::arg("json_str")) - .def("to_json", &buffer_net_zmq_properties::to_json); -} diff --git a/gr/python/gnuradio/gr/bindings/gr_pybind.cc b/gr/python/gnuradio/gr/bindings/gr_pybind.cc index 1ec5bc33d..8a43a6c1d 100644 --- a/gr/python/gnuradio/gr/bindings/gr_pybind.cc +++ b/gr/python/gnuradio/gr/bindings/gr_pybind.cc @@ -35,7 +35,6 @@ void bind_buffer(py::module&); void bind_vmcircbuf(py::module&); void bind_constants(py::module&); void bind_python_block(py::module&); -void bind_buffer_net_zmq(py::module& m); void bind_runtime(py::module&); void bind_runtime_proxy(py::module&); void bind_graph_utils(py::module&); @@ -77,7 +76,6 @@ PYBIND11_MODULE(gr_python, m) bind_flowgraph(m); bind_scheduler(m); bind_buffer(m); - bind_buffer_net_zmq(m); bind_vmcircbuf(m); bind_constants(m); bind_python_block(m); diff --git a/gr/python/gnuradio/gr/bindings/meson.build b/gr/python/gnuradio/gr/bindings/meson.build index 23421526b..55ad0ec83 100644 --- a/gr/python/gnuradio/gr/bindings/meson.build +++ b/gr/python/gnuradio/gr/bindings/meson.build @@ -13,7 +13,6 @@ runtime_pybind_sources = files([ 'scheduler_pybind.cc', 'buffer_pybind.cc', 'buffer_cpu_vmcirc_pybind.cc', - 'buffer_net_zmq_pybind.cc', 'constants_pybind.cc', 'python_block_pybind.cc', 'runtime_pybind.cc', diff --git a/gr/test/meson.build b/gr/test/meson.build index 86be691d4..9c3f5fa78 100644 --- a/gr/test/meson.build +++ b/gr/test/meson.build @@ -5,12 +5,12 @@ qa_srcs = ['qa_host_buffer', deps = [gnuradio_gr_dep, gtest_dep,] -foreach qa : qa_srcs - e = executable(qa, - qa + '.cc', - include_directories : incdir, - link_language : 'cpp', - dependencies: deps, - install : false) - test(qa, e, env: TEST_ENV) -endforeach
\ No newline at end of file +# foreach qa : qa_srcs +# e = executable(qa, +# qa + '.cc', +# include_directories : incdir, +# link_language : 'cpp', +# dependencies: deps, +# install : false) +# test(qa, e, env: TEST_ENV) +# endforeach
\ No newline at end of file diff --git a/gr/test/qa_host_buffer.cc b/gr/test/qa_host_buffer.cc deleted file mode 100644 index 326611f29..000000000 --- a/gr/test/qa_host_buffer.cc +++ /dev/null @@ -1,281 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright 2021 BlackLynx Inc. - * Copyright 2022 Josh Morman - * - * This file is part of GNU Radio - * - * SPDX-License-Identifier: GPL-3.0-or-later - * - */ - -#include <gtest/gtest.h> - -#include <gnuradio/block.h> -#include <gnuradio/buffer_cpu_host.h> - -#include <cstdlib> -#include <iostream> - - -using namespace gr; - -// ---------------------------------------------------------------------------- -// Basic checks for buffer_single_mapped using the buffer_cpu_host implementation -// of the interface for testing. -// ---------------------------------------------------------------------------- -TEST(HostBuffer, t0) -{ - size_t nitems = 65536 / sizeof(int); - - auto buf_props = BUFFER_CPU_HOST_ARGS_H2D; - auto buf = buf_props->factory()(nitems, 1, buf_props); - auto rdr1 = buf->add_reader(buf_props, 1); - - EXPECT_TRUE(buf->space_available() == nitems); - EXPECT_TRUE(rdr1->items_available() == 0); - - for (size_t idx = 1; idx <= 16; ++idx) { - buf->post_write(1000); - EXPECT_TRUE(buf->space_available() == (nitems - (idx * 1000))); - - EXPECT_TRUE(rdr1->items_available() == (idx * 1000)); - } - - EXPECT_TRUE(buf->space_available() == 384); - - buf->post_write(buf->space_available()); - EXPECT_TRUE(buf->space_available() == 0); - EXPECT_TRUE(rdr1->items_available() == nitems); - EXPECT_TRUE(buf->space_available() == 0); -} - -// ---------------------------------------------------------------------------- -// Basic checks for buffer_single_mapped using the buffer_cpu_host implementation -// of the interface for testing. -// ---------------------------------------------------------------------------- -TEST(HostBuffer, t1) -{ - size_t nitems = 65536 / sizeof(int); - - auto buf_props = BUFFER_CPU_HOST_ARGS_H2D; - auto buf = buf_props->factory()(nitems, 1, buf_props); - auto rdr1 = buf->add_reader(buf_props, 1); - - size_t space = buf->space_available(); - EXPECT_TRUE(nitems == space); - - EXPECT_TRUE(rdr1->items_available() == 0); - - buf->post_write(nitems); - EXPECT_TRUE(buf->space_available() == 0); - EXPECT_TRUE(rdr1->items_available() == nitems); - - for (size_t idx = 1; idx <= 16; ++idx) { - rdr1->post_read(1000); - EXPECT_TRUE(rdr1->items_available() == (nitems - (idx * 1000))); - - space = buf->space_available(); - EXPECT_TRUE(space == (idx * 1000)); - } - - EXPECT_TRUE(rdr1->items_available() == 384); - rdr1->post_read(384); - EXPECT_TRUE(rdr1->items_available() == 0); -} - -// ---------------------------------------------------------------------------- -// Basic check reader/write wrapping of buffer_single_mapped with 1 reader. -// ---------------------------------------------------------------------------- -TEST(HostBuffer, t2) -{ - size_t nitems = 65536 / sizeof(int); - - auto buf_props = BUFFER_CPU_HOST_ARGS_H2D; - auto buf = buf_props->factory()(nitems, 1, buf_props); - auto rdr1 = buf->add_reader(buf_props, 1); - - size_t space = buf->space_available(); - EXPECT_TRUE(nitems == space); - EXPECT_TRUE(rdr1->items_available() == 0); - - buf->post_write(nitems); - EXPECT_TRUE(buf->space_available() == 0); - - for (size_t idx = 1; idx <= 16; ++idx) { - rdr1->post_read(1000); - EXPECT_TRUE(rdr1->items_available() == (nitems - (idx * 1000))); - - space = buf->space_available(); - - if (idx <= 9) - EXPECT_TRUE(space == (idx * 1000)); - else - EXPECT_TRUE(space == ((idx * 1000) - (nitems / 2))); - - if (idx == 9) { - buf->post_write(nitems / 2); - } - } - - // At this point we can only read up until the end of the buffer even though - // additional data is available at the beginning of the buffer - EXPECT_TRUE(rdr1->items_available() == 384); - rdr1->post_read(384); - - // Now the (nitems / 2) at the beginning of the buffer should be available - EXPECT_TRUE(rdr1->items_available() == (nitems / 2)); - - for (size_t idx = 0; idx < 4; ++idx) - rdr1->post_read(1024); - - EXPECT_TRUE(buf->space_available() == (nitems / 2)); - EXPECT_TRUE(rdr1->items_available() == (nitems / 4)); - - for (size_t idx = 0; idx < 4; ++idx) - rdr1->post_read(1000); - - EXPECT_TRUE(buf->space_available() == (nitems / 2)); - EXPECT_TRUE(rdr1->items_available() == 96); - - rdr1->post_read(96); - EXPECT_TRUE(rdr1->items_available() == 0); - - EXPECT_TRUE(buf->space_available() == (nitems / 2)); -} - -// ---------------------------------------------------------------------------- -// Basic check reader/write wrapping of buffer_single_mapped with 2 readers. -// ---------------------------------------------------------------------------- -TEST(HostBuffer, t3) -{ - size_t nitems = 65536 / sizeof(int); - - auto buf_props = BUFFER_CPU_HOST_ARGS_H2D; - auto buf = buf_props->factory()(nitems, 1, buf_props); - auto rdr1 = buf->add_reader(buf_props, 1); - auto rdr2 = buf->add_reader(buf_props, 1); - - size_t space = buf->space_available(); - EXPECT_TRUE(nitems == space); - EXPECT_TRUE(rdr1->items_available() == 0); - EXPECT_TRUE(rdr2->items_available() == 0); - - buf->post_write(nitems); - EXPECT_TRUE(buf->space_available() == 0); - EXPECT_TRUE(rdr1->items_available() == nitems); - EXPECT_TRUE(rdr2->items_available() == nitems); - - for (size_t idx = 1; idx <= 16; ++idx) { - rdr1->post_read(1000); - EXPECT_TRUE(rdr1->items_available() == (nitems - (idx * 1000))); - - // Reader 2 hasn't read anything so space available should remain 0 - EXPECT_TRUE(buf->space_available() == 0); - } - - size_t last_rdr1_available = rdr1->items_available(); - size_t increment = last_rdr1_available / 4; - - for (size_t idx = 1; idx <= 16; ++idx) { - rdr2->post_read(1000); - EXPECT_TRUE(rdr2->items_available() == (nitems - (idx * 1000))); - - EXPECT_TRUE(rdr1->items_available() == last_rdr1_available); - if (idx % 4 == 0) { - rdr1->post_read(increment); - EXPECT_TRUE(rdr1->items_available() == (last_rdr1_available - increment)); - last_rdr1_available = rdr1->items_available(); - } - - EXPECT_TRUE(buf->space_available() == (idx * 1000)); - } -} - -// ---------------------------------------------------------------------------- -// Basic check of output blocked callback -// ---------------------------------------------------------------------------- -TEST(HostBuffer, t4) -{ - size_t nitems = 65536 / sizeof(int); - - auto buf_props = BUFFER_CPU_HOST_ARGS_H2D; - auto buf = buf_props->factory()(nitems, 1, buf_props); - auto rdr1 = buf->add_reader(buf_props, 1); - - EXPECT_TRUE(nitems == buf->space_available()); - EXPECT_TRUE(rdr1->items_available() == 0); - - buf->post_write(nitems / 2); - EXPECT_TRUE(buf->space_available() == (nitems / 2)); - EXPECT_TRUE(rdr1->items_available() == (nitems / 2)); - - rdr1->post_read(nitems / 2); - EXPECT_TRUE(buf->space_available() == (nitems / 2)); - EXPECT_TRUE(rdr1->items_available() == 0); - - buf->post_write(8000); - EXPECT_TRUE(buf->space_available() == 192); - - bool ready = buf->output_blkd_cb_ready(200); - EXPECT_TRUE(ready == true); - - bool success = buf->output_blocked_callback(200); - EXPECT_TRUE(success == true); - EXPECT_TRUE(buf->space_available() == 8384); - EXPECT_TRUE(rdr1->items_available() == 8000); - - rdr1->post_read(4000); - EXPECT_TRUE(buf->space_available() == 8384); - EXPECT_TRUE(rdr1->items_available() == 4000); - - buf->post_write(4000); - EXPECT_TRUE(buf->space_available() == 4384); - EXPECT_TRUE(rdr1->items_available() == 8000); - - rdr1->post_read(8000); - EXPECT_TRUE(buf->space_available() == 4384); - EXPECT_TRUE(rdr1->items_available() == 0); -} - -// ---------------------------------------------------------------------------- -// Basic check of input blocked callback -// ---------------------------------------------------------------------------- -TEST(HostBuffer, t5) -{ - size_t nitems = 65536 / sizeof(int); - - auto buf_props = BUFFER_CPU_HOST_ARGS_H2D; - auto buf = buf_props->factory()(nitems, 1, buf_props); - auto rdr1 = buf->add_reader(buf_props, 1); - - EXPECT_TRUE(nitems == buf->space_available()); - EXPECT_TRUE(rdr1->items_available() == 0); - - buf->post_write(16000); - EXPECT_TRUE(buf->space_available() == 384); - EXPECT_TRUE(rdr1->items_available() == 16000); - - rdr1->post_read(16000); - EXPECT_TRUE(buf->space_available() == 384); - EXPECT_TRUE(rdr1->items_available() == 0); - - buf->post_write(384); - EXPECT_TRUE(buf->space_available() == 16000); - EXPECT_TRUE(rdr1->items_available() == 384); - - buf->post_write(116); - EXPECT_TRUE(buf->space_available() == 15884); - EXPECT_TRUE(rdr1->items_available() == 384); - - bool ready = rdr1->input_blkd_cb_ready(400); - EXPECT_TRUE(ready == true); - - bool success = rdr1->input_blocked_callback(400); - EXPECT_TRUE(success == true); - EXPECT_TRUE(rdr1->items_available() == 500); - - rdr1->post_read(500); - EXPECT_TRUE(buf->space_available() == 15884); - EXPECT_TRUE(rdr1->items_available() == 0); -} |