// Demo of std::map::insert(const_iterator pos, node_type&& nh)  [C++20]
// JSF AV C++ Coding Standards compliance.
// Google C++ Style Guide compliant.
//
// All heap memory is served from a single StackArena whose backing store is a
// plain char array. StackAllocator wraps the arena and satisfies the C++
// named-allocator requirements so it can be passed to std::map and std::vector.
// After Startup() no further allocations occur.

#include <cstddef>
#include <cstdint>
#include <cstdio>

#include <chrono>
#include <map>
#include <numeric>
#include <string_view>
#include <vector>

// StackArena - Allocations advance the top-of- stack pointer. There is no
// per-object free (the whole arena is released when the object goes out of
// scope).  This satisfies JSF AV Rule 206: all dynamic memory is acquired
// during startup from a pre-sized buffer.
template<std::size_t kCapacity>
class StackArena
{
public:
	// Non-copyable, non-movable: every StackAllocator holds a raw pointer to
	// this object so it must not be relocated.
	StackArena(const StackArena&)            = delete;
	StackArena& operator=(const StackArena&) = delete;
	StackArena(StackArena&&)                 = delete;
	StackArena& operator=(StackArena&&)      = delete;

	StackArena() noexcept : buf_{}, top_(0u) {}

	// Allocate n bytes aligned to kAlign.  Returns nullptr if the arena is
	// exhausted (caller must handle this; JSF AV Rule 208 forbids exceptions).
	[[nodiscard]] void* Allocate(std::size_t n, std::size_t align) noexcept
	{
		const std::size_t aligned_top = AlignUp(top_, align);
		if (aligned_top + n > kCapacity)
		{
			std::printf("[arena] EXHAUSTED: requested %zu, used %zu / %zu\n",
				n, top_, kCapacity);
			return nullptr;
		}
		void* ptr = &buf_[aligned_top];
		top_ = aligned_top + n;
		return ptr;
	}

	// Deallocate is a no-op: the arena reclaims memory only on destruction.
	// This is intentional and correct for a bump-pointer arena.
	void Deallocate(void* /*ptr*/, std::size_t /*n*/) noexcept {}

	std::size_t Used()     const noexcept { return top_; }
	std::size_t Capacity() const noexcept { return kCapacity; }

private:
	[[nodiscard]] static constexpr std::size_t AlignUp(
		std::size_t val, std::size_t align) noexcept
	{
		return (val + align - 1u) & ~(align - 1u);
	}

	alignas(std::max_align_t) char buf_[kCapacity];
	std::size_t top_;
};

// StackAllocator - Thin stateless-looking wrapper around StackArena*.
// Satisfies the C++17/20 allocator requirements including rebind, so it works
// with std::map (which rebinds to allocate its internal
// _Rb_tree_node<value_type> objects) and std::vector.
template<typename T, std::size_t kCapacity>
class StackAllocator
{
public:
	using value_type = T;

	// Required by std::allocator_traits when a container rebinds the allocator
	// to a different value_type (e.g. std::map rebinds to its node type).
	template<typename U>
	struct rebind
	{
		using other = StackAllocator<U, kCapacity>;
	};

	explicit StackAllocator(StackArena<kCapacity>& arena) noexcept
		: arena_(&arena) {}

	// Rebind constructor: called by containers when rebinding to a different T.
	template<typename U>
	explicit StackAllocator(const StackAllocator<U, kCapacity>& other) noexcept
		: arena_(other.arena_) {}

	[[nodiscard]] T* allocate(std::size_t n)
	{
		void* const ptr = arena_->Allocate(n * sizeof(T), alignof(T));
		return static_cast<T*>(ptr);
	}

	void deallocate(T* ptr, std::size_t n) noexcept
	{
		arena_->Deallocate(ptr, n * sizeof(T));
	}

	// Two StackAllocators are equal when they share the same arena.
	template<typename U>
	bool operator==(const StackAllocator<U, kCapacity>& rhs) const noexcept
	{
		return arena_ == rhs.arena_;
	}

	// Public so the rebind constructor can reach it.
	StackArena<kCapacity>* arena_;
};

static constexpr std::size_t kArenaCapacity = 65536u;

class MapNodeDemo
{
private:
	using PairAlloc = StackAllocator<std::pair<const int, int>, kArenaCapacity>;
	using Map       = std::map<int, int, std::less<int>, PairAlloc>;
	using NodeAlloc = StackAllocator<Map::node_type, kArenaCapacity>;
	using IntAlloc  = StackAllocator<int, kArenaCapacity>;

public:
	static constexpr int kPoolSize      = 16;
	static constexpr int kRunIterations = 32;

	// Allocates the node pool. Must be called once before RunDemo().
	void Startup()
	{
		std::printf("[startup] arena capacity: %zu bytes\n",
			arena_.Capacity());

		pool_.reserve(static_cast<std::size_t>(kPoolSize));

		// The only way to obtain a live node_type (with its internal tree node
		// already allocated) is to extract() it from a real map. JSF AV C++
		// Coding Standards do not allow dynamic allocation after startup.
		{
			// std::map<int, int, std::less<int>, StackAllocator<std::pair<const int, int>, kArenaCapacity>> tmp{
			//     StackAllocator<std::pair<const int, int>, kArenaCapacity>(arena_)};
			Map tmp{PairAlloc(arena_)};

			for (int i = 0; i < kPoolSize; ++i)
			{
				tmp.emplace(i, i * 10);
			}
			for (int i = 0; i < kPoolSize; ++i)
			{
				pool_.push_back(tmp.extract(i));  // node_type is move-only
			}
		}  // tmp destroyed; tree nodes now owned by pool_ node_types

		free_indices_.reserve(static_cast<std::size_t>(kPoolSize));
		free_indices_.resize(static_cast<std::size_t>(kPoolSize));

		// Ascending so back() == kPoolSize-1 is acquired first.
		std::iota(free_indices_.begin(), free_indices_.end(), 0);

		std::printf("[startup] pool ready - %d nodes available\n", kPoolSize);
		std::printf("[startup] arena used: %zu / %zu bytes\n",
			arena_.Used(), arena_.Capacity());
	}

	// Resets map and free list to post-Startup() state. Zero allocations.
	void Reset()
	{
		// Extract every live node back to its pool slot before clearing.
		// map_.clear() would destroy the tree nodes, leaving pool_ entries empty.
		while (!map_.empty())
		{
			const Map::iterator it = map_.begin();
			const int pool_idx = it->first;  // invariant: key == slot index
			pool_[static_cast<std::size_t>(pool_idx)] = map_.extract(it);
		}
		free_indices_.resize(static_cast<std::size_t>(kPoolSize));
		std::iota(free_indices_.begin(), free_indices_.end(), 0);
	}

	// Exercises insert/remove using pooled nodes. Zero heap allocations.
	// Pass verbose=true to print per-iteration log lines.
	void RunDemo(bool verbose)
	{
		uint32_t seed = 42u;

		for (int iter = 0; iter < kRunIterations; ++iter)
		{
			seed = Lcg(seed);
			// Use bits [17:16] for the insert/remove decision (~75% insert bias).
			const bool want_insert = ((seed >> 16u) & 3u) != 0u || map_.empty();

			if (want_insert && !free_indices_.empty())
			{
				TryInsert(iter, seed = Lcg(seed), verbose);
			}
			else if (!map_.empty())
			{
				DoRemove(iter, seed = Lcg(seed), verbose);
			}
		}

		if (verbose)
		{
			std::printf("[run_demo] arena used after demo: %zu / %zu bytes"
				" (no change expected)\n",
				arena_.Used(), arena_.Capacity());
		}
	}

private:
	// Acquires a free slot, stamps key/value, and hint-inserts into the map.
	void TryInsert(int iter, uint32_t seed, bool verbose)
	{
		const int pool_idx = free_indices_.back();
		free_indices_.pop_back();

		// std::map<int, int, std::less<int>, StackAllocator<std::pair<const int, int>, kArenaCapacity>>::node_type&
		Map::node_type& node = pool_[static_cast<std::size_t>(pool_idx)];
		node.key()    = pool_idx;  // slot index doubles as the map key
		node.mapped() = static_cast<int>(seed % 1000u);

		const int key = node.key();
		const int val = node.mapped();

		// const std::map<int, int, std::less<int>, StackAllocator<std::pair<const int, int>, kArenaCapacity>>::const_iterator
		const Map::const_iterator hint = map_.lower_bound(key);

		// const std::map<int, int, std::less<int>, StackAllocator<std::pair<const int, int>, kArenaCapacity>>::iterator
		const Map::iterator       pos  = map_.insert(hint, std::move(node));

		// AV Rule 165: test emptiness explicitly, not via implicit bool.
		if (node.empty())  // empty == insertion succeeded; node was consumed
		{
			(void)pos;  // AV Rule 113: suppress unused return value
			if (verbose) { Log(iter, "INSERT ", key, val); }
		}
		else
		{
			// Collision: ownership stays with node; return it to the pool.
			pool_[static_cast<std::size_t>(pool_idx)] = std::move(node);
			free_indices_.push_back(pool_idx);
			if (verbose) { Log(iter, "COLLIDE", key, val); }
		}
	}

	// Picks a pseudo-random map entry, extracts it, and returns it to the pool.
	void DoRemove(int iter, uint32_t seed, bool verbose)
	{
		// const std::map<int, int, std::less<int>, StackAllocator<std::pair<const int, int>, kArenaCapacity>>::iterator
		const Map::iterator it = std::next(
				map_.begin(),
				static_cast<std::ptrdiff_t>(seed % map_.size()));

		const int key      = it->first;  // save before extract() invalidates it
		const int pool_idx = key;        // invariant: key == original slot index

		// extract() splices the tree node out without touching the allocator.
		pool_[static_cast<std::size_t>(pool_idx)] = map_.extract(it);
		free_indices_.push_back(pool_idx);

		if (verbose) { Log(iter, "REMOVE ", key, /*val=*/0); }
	}

	void Log(int iter, std::string_view op, int key, int val) const
	{
		if (op == "REMOVE ")
		{
			std::printf("[iter %2d] %s key=%2d map=%2zu free=%2zu\n",
				iter, op.data(), key,
				map_.size(), free_indices_.size());
		}
		else
		{
			std::printf("[iter %2d] %s key=%2d val=%4d map=%2zu free=%2zu\n",
				iter, op.data(), key, val,
				map_.size(), free_indices_.size());
		}
	}

	// Knuth multiplicative LCG - no allocations, no global state.
	[[nodiscard]] static constexpr uint32_t Lcg(uint32_t s) noexcept
	{
		return s * 1664525u + 1013904223u;
	}

	// Arena must be declared before any member that holds a StackAllocator,
	// so it is constructed first and destroyed last.
	StackArena<kArenaCapacity> arena_;

	// std::map<int, int, std::less<int>, StackAllocator<std::pair<const int, int>, kArenaCapacity>>
	//     map_{StackAllocator<std::pair<const int, int>, kArenaCapacity>(arena_)};
	Map map_{PairAlloc(arena_)};

	// std::vector<std::map<int, int, std::less<int>, StackAllocator<std::pair<const int, int>, kArenaCapacity>>::node_type,
	//         StackAllocator<std::map<int, int, std::less<int>,
	//         StackAllocator<std::pair<const int, int>, kArenaCapacity>>::node_type, kArenaCapacity>>
	//     pool_{StackAllocator<std::map<int, int, std::less<int>,
	//         StackAllocator<std::pair<const int, int>, kArenaCapacity>>::node_type,
	//         kArenaCapacity>(arena_)};
	std::vector<Map::node_type, NodeAlloc> pool_{NodeAlloc(arena_)};

	// std::vector<int, StackAllocator<int, kArenaCapacity>>
	//     free_indices_{StackAllocator<int, kArenaCapacity>(arena_)};
	std::vector<int, IntAlloc> free_indices_{IntAlloc(arena_)};
};

int main()
{
	static constexpr int kWarmupRuns = 10;
	static constexpr int kBenchRuns  = 100;

	MapNodeDemo demo;
	demo.Startup();

	// Warm-up: first run is verbose, remaining are silent.
	for (int i = 1; i < kWarmupRuns; ++i)
	{
		demo.RunDemo(/*verbose=*/false);
		demo.Reset();
	}

	// Benchmark: 100 silent runs back-to-back.
	const auto t0 = std::chrono::steady_clock::now();
	for (int i = 0; i < kBenchRuns; ++i)
	{
		demo.RunDemo(/*verbose=*/false);
		demo.Reset();
	}
	const auto t1 = std::chrono::steady_clock::now();

	const auto total_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(t1 - t0).count();
	std::printf("[bench] %d runs: total %lld ns,  avg %.1f ns/run (%.3f us/run)\n",
		kBenchRuns,
		static_cast<long long>(total_ns),
		static_cast<double>(total_ns) / kBenchRuns,
		static_cast<double>(total_ns) / kBenchRuns / 1000.0);

	demo.RunDemo(/*verbose=*/true);

	return 0;
}
