From 2e59f643e9925a25fe9e2ffbfaf2db80727ecae7 Mon Sep 17 00:00:00 2001 From: Anders Cedronius Date: Thu, 29 Dec 2022 13:16:53 +0100 Subject: [PATCH] add readme --- .../{mac_boost_x86.yml => mac_boost_x64.yml} | 2 +- .../workflows/{mac_x86.yml => mac_x64.yml} | 2 +- ...ntu_boost_x86.yml => ubuntu_boost_x64.yml} | 2 +- .../{ubuntu_x86.yml => ubuntu_x64.yml} | 2 +- .../{win_boost_x86.yml => win_boost_x64.yml} | 2 +- .../workflows/{win_x86.yml => win_x64.yml} | 2 +- FastQueueCompare.cpp | 2 - README.md | 103 +++++++++++++++++- 8 files changed, 107 insertions(+), 10 deletions(-) rename .github/workflows/{mac_boost_x86.yml => mac_boost_x64.yml} (95%) rename .github/workflows/{mac_x86.yml => mac_x64.yml} (95%) rename .github/workflows/{ubuntu_boost_x86.yml => ubuntu_boost_x64.yml} (94%) rename .github/workflows/{ubuntu_x86.yml => ubuntu_x64.yml} (94%) rename .github/workflows/{win_boost_x86.yml => win_boost_x64.yml} (96%) rename .github/workflows/{win_x86.yml => win_x64.yml} (95%) diff --git a/.github/workflows/mac_boost_x86.yml b/.github/workflows/mac_boost_x64.yml similarity index 95% rename from .github/workflows/mac_boost_x86.yml rename to .github/workflows/mac_boost_x64.yml index f6718da..2615505 100644 --- a/.github/workflows/mac_boost_x86.yml +++ b/.github/workflows/mac_boost_x64.yml @@ -1,4 +1,4 @@ -name: mac_boost_x86 +name: mac_boost_x64 on: push: diff --git a/.github/workflows/mac_x86.yml b/.github/workflows/mac_x64.yml similarity index 95% rename from .github/workflows/mac_x86.yml rename to .github/workflows/mac_x64.yml index a5863a7..e173eee 100644 --- a/.github/workflows/mac_x86.yml +++ b/.github/workflows/mac_x64.yml @@ -1,4 +1,4 @@ -name: mac_x86 +name: mac_x64 on: push: diff --git a/.github/workflows/ubuntu_boost_x86.yml b/.github/workflows/ubuntu_boost_x64.yml similarity index 94% rename from .github/workflows/ubuntu_boost_x86.yml rename to .github/workflows/ubuntu_boost_x64.yml index ec3a50c..8f11c05 100644 --- a/.github/workflows/ubuntu_boost_x86.yml +++ b/.github/workflows/ubuntu_boost_x64.yml @@ -1,4 +1,4 @@ -name: ubuntu_boost_x86 +name: ubuntu_boost_x64 on: push: diff --git a/.github/workflows/ubuntu_x86.yml b/.github/workflows/ubuntu_x64.yml similarity index 94% rename from .github/workflows/ubuntu_x86.yml rename to .github/workflows/ubuntu_x64.yml index 2fc21d1..904f337 100644 --- a/.github/workflows/ubuntu_x86.yml +++ b/.github/workflows/ubuntu_x64.yml @@ -1,4 +1,4 @@ -name: ubuntu_x86 +name: ubuntu_x64 on: push: diff --git a/.github/workflows/win_boost_x86.yml b/.github/workflows/win_boost_x64.yml similarity index 96% rename from .github/workflows/win_boost_x86.yml rename to .github/workflows/win_boost_x64.yml index 0960fd1..6b9844a 100644 --- a/.github/workflows/win_boost_x86.yml +++ b/.github/workflows/win_boost_x64.yml @@ -1,4 +1,4 @@ -name: win_boost_x86 +name: win_boost_x64 on: push: diff --git a/.github/workflows/win_x86.yml b/.github/workflows/win_x64.yml similarity index 95% rename from .github/workflows/win_x86.yml rename to .github/workflows/win_x64.yml index b2c5e26..99b8943 100644 --- a/.github/workflows/win_x86.yml +++ b/.github/workflows/win_x64.yml @@ -1,4 +1,4 @@ -name: win_x86 +name: win_x64 on: push: diff --git a/FastQueueCompare.cpp b/FastQueueCompare.cpp index 083ef80..600844b 100644 --- a/FastQueueCompare.cpp +++ b/FastQueueCompare.cpp @@ -404,13 +404,11 @@ int main() { std::thread([pQueue] { fastQueueASMConsumer(pQueue, 1); - std::cout << "exit consumer" << std::endl; }).detach(); std::thread([pQueue] { fastQueueASMProducer(pQueue, 2); - std::cout << "exit producer" << std::endl; }).detach(); // Wait for the OS to actually get it done. diff --git a/README.md b/README.md index d13eeda..15387d7 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,101 @@ -# fastqueue -SPSC Queue +![Logo](fastqueuesmall.png) + + +FastQueue is a single producer single consumer (SPSC) 'process to process queue' similar to *boost::lockfree::spsc_queue* + +FastQueue is sligtely faster than the boost implementation (tested on a handfull systems and architectures) and is not as strict as boost is when it comes to data-types it can carry, it's possible to transfer smart pointers for example. + +FastQueue is what's called a lock-free queue. However there must always be some sort of locks to prevent race conditions when two asychronous workers communicate. Many SPSC solutions use atomics to guard the data. FastQueue uses a memory barrier teqnique and limit it's usage to 64-bit platforms for cross thread variable data consistency. + +FastQueue can be pictured as illustrated below: + +``` + FastQueue + ◀───────────────────────────────────▶ + .─────────. + ┌────┐ '─. + ,'│Data│ ┌────┐ + CPU 1 / Thread 1 ╱ └────┴──────.│Data│╲ CPU 2 / Thread 2 +◀───────────────▶ ╱ ,' └────┘ ╲ ◀───────────────▶ +┌───────────────┐ Push ┌────┐╱ ╲ : Pop ┌───────────────┐ +│ Data producer │ ─────▶│Data│Circular buffer: │─────▶ │ Data consumer │ +└───────────────┘ ├────┘ ┌────┤ └───────────────┘ + : ╲ ╱│Data│ + ╲ ╲ ╱ └────┘ + ╲ ┌────┐ ,' ╱ + ╲│Data│`─────┌────┐ ╱ + └────┘ │Data│' + '─. └────┘ + `───────' +``` + +FastQueue is aiming to be fast. When it comes to measuring performance using templated code the compiler may optimize the final solution in ways where FastQueue might be slower than alternative solutions and/or change it's performance depending on other factors such as queue depth and so on. If you're aiming for speed it might be wise to benchmark FastQueue against other SPSC queues in your implementation, match L1_CACHE size to the executing CPU and tune the queue depth to match the data flow. + +For example in my tests [Rigtorps SPSC](https://github.com/rigtorp/SPSCQueue) queue is really fast on ARM64. + +**Apple M1 Pro** + +``` +boost lock free pointer test started. +boost lock free pointer test ended. +BoostLockFree Transactions -> 8437017/s +FastQueue pointer test started. +FastQueue pointer test ended. +FastQueue Transactions -> 9886604/s +Rigtorp pointer test started. +Rigtorp pointer test ended. +Rigtorp Transactions -> 10974382/s +FastQueueASM pointer test started. +FastQueueASM pointer test ended. +FastQueueASM Transactions -> 9471164/s +``` + +However on X64 platforms I don't see the same gain in my benchmarks. With that said Rigtorps queue is really the one to beat ;-) . + + +The queue is a header only template class and is implemented in a few lines of C++. + +The code compiles on arm64 or x86_64 CPU's running Windows, MacOS or Linux OS. + +There is also an pure Assembly version *FastQueueASM.h* that I've been playing around with. FastQueueASM is a bit more difficult to build compared to just dropping in the FastQueue.h into your project. Just look in the CMake file for guidance if you want to test it. + +## Build status + + + +## Usage + +Copy the *FastQueue.h* file to your project and -> + +```cpp + +#include "FastQueue.h" + +//Create the queue +//In this example a unique pointer to a vector of uint8_t +auto fastQueue = FastQueue>, QUEUE_MASK>(); + +//Then the producer of the data pushes the data from one thread +auto dataProduced = std::make_unique>(1000); +fastQueue.push(dataProduced); + +//And the consumer pops the data in another thread +auto dataConsume = fastQueue.pop(); + +//When done signal that from anywhere (can be a third thread) +fastQueue.stopQueue(); + +//The consumer/producer may stop immediately or pop the queue +//until nullptr (in the above example) is received to drain all pushed items. + + +``` + +For more examples see the included implementations and tests. + + +## License + +*MIT* + +Read *LICENCE* for details