Date: Sat, 4 Oct 2025 15:19:35 +0100
On Sat, Oct 4, 2025 at 4:49 AM Walt Karas wrote:
>
> Response: Sorry, I still don't understand. Can you show me how you would rework this code?
I threw this code together quickly so there's a few oversights in it
(you can get it to crash with different numbers so there's a bug
somewhere), but anyway what you want might be something along the
following lines:
https://godbolt.org/z/q8oMobhrx
And here it is copy-pasted:
#include <cassert> // assert
#include <array> // array
#include <atomic> // atomic
#include <optional> // optional
#include <set> // set
#include <stdexcept> // runtime_error
#include <thread> // thread
#include <utility> // forward
#include <vector> // vector
#include <mutex> // mutex, lock_guard
template<typename T, unsigned max_threads>
class ThreadLocal final {
struct FreeNode {
unsigned index;
FreeNode *next;
};
static inline std::array< std::optional<T>, max_threads > storage{};
static inline std::atomic<unsigned> nextIndex{0};
static inline std::atomic<FreeNode*> freeList{nullptr};
static inline std::array<FreeNode, max_threads> nodes{};
static unsigned acquireSlot(void) noexcept(false)
{
FreeNode *head = freeList.load(std::memory_order_acquire);
while ( head )
{
FreeNode *const next = head->next;
if ( false == freeList.compare_exchange_weak(head, next,
std::memory_order_acq_rel) ) continue;
return head->index;
}
unsigned const index = nextIndex.fetch_add(1u,
std::memory_order_relaxed);
if ( index >= max_threads ) throw std::runtime_error("Exceeded
max_threads limit!");
return index;
}
static void releaseSlot(unsigned const index) noexcept
{
FreeNode *const node = &nodes[index];
FreeNode *oldHead = freeList.load(std::memory_order_relaxed);
do node->next = oldHead;
while ( false == freeList.compare_exchange_weak(oldHead, node,
std::memory_order_release, std::memory_order_relaxed) );
storage[index].reset();
}
template<typename... Params>
unsigned registerThread(Params&&... args) const noexcept(false)
{
static thread_local unsigned const myIndex = acquireSlot();
// Automatically recycle the slot when the thread exits
struct Reclaimer {
unsigned idx;
~Reclaimer(void) noexcept { releaseSlot(idx); }
};
static thread_local Reclaimer const reclaimer{myIndex};
if ( false == storage[myIndex].has_value() )
storage[myIndex].emplace(std::forward<Params>(args)...);
return myIndex;
}
public:
template<typename... Params>
T &operator()(Params&&... args) noexcept(false)
{
static thread_local unsigned slot = registerThread(
std::forward<Params>(args)... );
return *storage[slot];
}
template<typename... Params>
T const &operator()(Params&&... args) const noexcept(false)
{
return const_cast<ThreadLocal*>(this)->operator()(std::forward<Params>(args)...);
}
};
// =============
// Example usage
// =============
class Counter {
std::atomic<unsigned> total{0u};
std::atomic<unsigned> num_threads{0u};
std::mutex mtx;
struct PerThread {
std::atomic<unsigned> count{0u};
Counter &parent;
explicit PerThread(Counter *const c) noexcept : parent(*c)
{
std::lock_guard lock{parent.mtx};
parent.pt_active.insert(this);
parent.num_threads.fetch_add(1u, std::memory_order_relaxed);
}
~PerThread(void) noexcept
{
{
std::lock_guard lock{parent.mtx};
parent.pt_active.erase(this);
}
if ( unsigned const leftover =
count.load(std::memory_order_relaxed) )
parent.total.fetch_add(leftover, std::memory_order_relaxed);
parent.num_threads.fetch_sub(1u, std::memory_order_relaxed);
}
};
std::set<PerThread*> pt_active;
ThreadLocal<PerThread, 32u> pt;
public:
void incr(void)
{
pt(this).count.fetch_add(1u, std::memory_order_relaxed);
}
unsigned collect(void)
{
unsigned sum = 0u;
{
std::lock_guard lock{mtx};
for ( PerThread *const p : pt_active ) sum +=
p->count.exchange(0u, std::memory_order_relaxed);
}
if (sum) total.fetch_add(sum, std::memory_order_relaxed);
return sum;
}
};
#include <iostream> // cout
auto main(void) -> int
{
Counter counter;
constexpr unsigned numThreads = 6u;
constexpr unsigned iterations = 200u;
auto work = [&](unsigned id)
{
for (unsigned i = 0; i < iterations; ++i) counter.incr();
};
std::vector<std::thread> threads;
threads.reserve(numThreads);
for ( unsigned i = 0; i < numThreads; ++i ) threads.emplace_back(work, i);
for ( auto &t : threads ) t.join();
std::cout << "Collected total = " << counter.collect() << std::endl;
}
>
> Response: Sorry, I still don't understand. Can you show me how you would rework this code?
I threw this code together quickly so there's a few oversights in it
(you can get it to crash with different numbers so there's a bug
somewhere), but anyway what you want might be something along the
following lines:
https://godbolt.org/z/q8oMobhrx
And here it is copy-pasted:
#include <cassert> // assert
#include <array> // array
#include <atomic> // atomic
#include <optional> // optional
#include <set> // set
#include <stdexcept> // runtime_error
#include <thread> // thread
#include <utility> // forward
#include <vector> // vector
#include <mutex> // mutex, lock_guard
template<typename T, unsigned max_threads>
class ThreadLocal final {
struct FreeNode {
unsigned index;
FreeNode *next;
};
static inline std::array< std::optional<T>, max_threads > storage{};
static inline std::atomic<unsigned> nextIndex{0};
static inline std::atomic<FreeNode*> freeList{nullptr};
static inline std::array<FreeNode, max_threads> nodes{};
static unsigned acquireSlot(void) noexcept(false)
{
FreeNode *head = freeList.load(std::memory_order_acquire);
while ( head )
{
FreeNode *const next = head->next;
if ( false == freeList.compare_exchange_weak(head, next,
std::memory_order_acq_rel) ) continue;
return head->index;
}
unsigned const index = nextIndex.fetch_add(1u,
std::memory_order_relaxed);
if ( index >= max_threads ) throw std::runtime_error("Exceeded
max_threads limit!");
return index;
}
static void releaseSlot(unsigned const index) noexcept
{
FreeNode *const node = &nodes[index];
FreeNode *oldHead = freeList.load(std::memory_order_relaxed);
do node->next = oldHead;
while ( false == freeList.compare_exchange_weak(oldHead, node,
std::memory_order_release, std::memory_order_relaxed) );
storage[index].reset();
}
template<typename... Params>
unsigned registerThread(Params&&... args) const noexcept(false)
{
static thread_local unsigned const myIndex = acquireSlot();
// Automatically recycle the slot when the thread exits
struct Reclaimer {
unsigned idx;
~Reclaimer(void) noexcept { releaseSlot(idx); }
};
static thread_local Reclaimer const reclaimer{myIndex};
if ( false == storage[myIndex].has_value() )
storage[myIndex].emplace(std::forward<Params>(args)...);
return myIndex;
}
public:
template<typename... Params>
T &operator()(Params&&... args) noexcept(false)
{
static thread_local unsigned slot = registerThread(
std::forward<Params>(args)... );
return *storage[slot];
}
template<typename... Params>
T const &operator()(Params&&... args) const noexcept(false)
{
return const_cast<ThreadLocal*>(this)->operator()(std::forward<Params>(args)...);
}
};
// =============
// Example usage
// =============
class Counter {
std::atomic<unsigned> total{0u};
std::atomic<unsigned> num_threads{0u};
std::mutex mtx;
struct PerThread {
std::atomic<unsigned> count{0u};
Counter &parent;
explicit PerThread(Counter *const c) noexcept : parent(*c)
{
std::lock_guard lock{parent.mtx};
parent.pt_active.insert(this);
parent.num_threads.fetch_add(1u, std::memory_order_relaxed);
}
~PerThread(void) noexcept
{
{
std::lock_guard lock{parent.mtx};
parent.pt_active.erase(this);
}
if ( unsigned const leftover =
count.load(std::memory_order_relaxed) )
parent.total.fetch_add(leftover, std::memory_order_relaxed);
parent.num_threads.fetch_sub(1u, std::memory_order_relaxed);
}
};
std::set<PerThread*> pt_active;
ThreadLocal<PerThread, 32u> pt;
public:
void incr(void)
{
pt(this).count.fetch_add(1u, std::memory_order_relaxed);
}
unsigned collect(void)
{
unsigned sum = 0u;
{
std::lock_guard lock{mtx};
for ( PerThread *const p : pt_active ) sum +=
p->count.exchange(0u, std::memory_order_relaxed);
}
if (sum) total.fetch_add(sum, std::memory_order_relaxed);
return sum;
}
};
#include <iostream> // cout
auto main(void) -> int
{
Counter counter;
constexpr unsigned numThreads = 6u;
constexpr unsigned iterations = 200u;
auto work = [&](unsigned id)
{
for (unsigned i = 0; i < iterations; ++i) counter.incr();
};
std::vector<std::thread> threads;
threads.reserve(numThreads);
for ( unsigned i = 0; i < numThreads; ++i ) threads.emplace_back(work, i);
for ( auto &t : threads ) t.join();
std::cout << "Collected total = " << counter.collect() << std::endl;
}
Received on 2025-10-04 14:19:48