Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
trace_container.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <algorithm>
4#include <array>
5#include <atomic>
6#include <cstddef>
7#include <cstdint>
8#include <functional>
9#include <memory>
10#include <span>
11#include <utility>
12#include <vector>
13
21
22namespace bb::avm2::tracegen {
23
24// Thread-safety contract.
25//
26// This container supports concurrent get/set during the parallel tracegen fill phase, *as long as the
27// caller never lets two threads touch the same cell (column, row) with at least one of them writing*.
28// Under that contract the following are all race-free and never serialize against each other:
29// - writes to different columns (independent storage);
30// - writes to different rows of the same column, even when those rows fall in the same shard (e.g.
31// two chunks that meet at a shard boundary) — distinct rows are distinct objects;
32// - concurrent first writes that race to create the same shard — the shard is installed with a single
33// compare-and-swap, so it is created at most once (the loser frees its spare copy).
34//
35// What is NOT protected, and what the caller must therefore guarantee never happens concurrently:
36// - two writes to the same (column, row), or a read and a write of the same (column, row). There is
37// no per-cell synchronization, so this is a data race (undefined behavior). Tracegen satisfies the
38// contract because it never writes a given cell from two threads, and every read of a column
39// happens in a later, barrier-separated phase.
40// - get/set concurrent with reserve_column, clear_column, invert_column, or the destructor on the
41// same column. Those operations mutate or free the shard table and assume the parallel fill phase
42// for that column has already joined.
43//
44// Design. Each column is partitioned into fixed-size row intervals ("shards") of INTERVAL_SIZE rows.
45// The per-column shard table is a fixed-size array of atomic pointers indexed by shard (row /
46// INTERVAL_SIZE), so the hot get/set path finds its shard with a single lock-free atomic load; that
47// path takes no per-column or per-shard lock, so concurrent writers of a column never serialize. A
48// shard is created at most once with a lock-free compare-and-swap: a writer that finds a null slot
49// constructs a shard and atomically installs it only if the slot is still null, so creators of
50// different shards never serialize. Each shard holds its rows in a dense, fixed-size array, so once a
51// shard exists, a write is a plain store to a fixed address.
53 public:
54 // Number of rows owned by a single shard, and the unit of lazy allocation. Writes to different rows
55 // never serialize regardless of shard, so this does not affect write parallelism; it only sets how
56 // finely columns are allocated and the (rare) granularity at which two chunks can race to create the
57 // same shard. Chunked tracegen sizes its chunks as a multiple of this value (and ideally aligns to
58 // it) so that concurrent chunks touch disjoint shards. Smaller => less memory wasted in
59 // sparsely-filled regions, at the cost of a larger shard table (more atomic slots) and more shard
60 // allocations per column.
61 static constexpr uint32_t INTERVAL_SIZE = 1u << 11;
62 // Number of shards in a column's (fixed-size) shard table. The trace never exceeds the circuit size,
63 // so this bounds the shard index. INTERVAL_SIZE divides MAX_AVM_TRACE_SIZE evenly.
64 static constexpr size_t NUM_SHARDS = MAX_AVM_TRACE_SIZE / INTERVAL_SIZE;
65
67
68 const FF& get(Column col, uint32_t row) const;
69 // Returns a tuple of const references to the values in the specified columns.
70 template <size_t N> auto get_multiple(const std::array<ColumnAndShifts, N>& cols, uint32_t row) const
71 {
72 return [&]<size_t... Is>(std::index_sequence<Is...>) {
75 }
76 // Extended version of get that works with shifted columns. More expensive.
78
79 void set(Column col, uint32_t row, const FF& value);
80 // Bulk setting for a given row.
81 void set(uint32_t row, std::span<const std::pair<Column, FF>> values);
82 // Reserve column size. Useful for precomputed columns.
83 void reserve_column(Column col, size_t size);
84
85 // Visits non-zero values in a column. The visit order is unspecified.
86 void visit_column(Column col, const std::function<void(uint32_t, const FF&)>& visitor) const;
87 // Returns the number of rows in a column. That is, the maximum non-zero row index + 1.
89 // Maximum number of rows in any column.
90 uint32_t get_num_rows() const;
91 // Maximum number of rows in any witness column (no precomputed columns).
93 // Number of columns (without shifts).
94 static constexpr size_t num_columns() { return NUM_COLUMNS_WITHOUT_SHIFTS; }
95
96 // Batch inverts a set of columns. Not thread-safe.
98
99 // Free column memory. Not thread-safe.
100 void clear_column(Column col);
101
102 private:
103 // A contiguous range of rows [k*INTERVAL_SIZE, (k+1)*INTERVAL_SIZE) within a column.
104 // Rows are stored densely: rows[row % INTERVAL_SIZE] holds the value for that absolute row,
105 // with zero meaning "absent" (an unset cell reads as zero anyway). Shards are top-dense — once a
106 // shard exists it tends to fill — so a flat array beats a hash map: no hashing/rehashing, sequential
107 // cache-friendly writes, stable element addresses, and less memory than the map's key + load-factor
108 // overhead.
109 //
110 // Concurrency: there is no per-shard lock. The array is fixed-size and never reallocates, so
111 // concurrent writes to different rows of this shard touch distinct objects and are race-free by the
112 // C++ memory model, including when two chunks meet at a shard boundary. Same-cell concurrency is the
113 // caller's responsibility (see the thread-safety contract on TraceContainer). The row count is derived
114 // lazily by scanning (see get_column_rows), avoiding any per-write bookkeeping.
115 //
116 // NOTE: the {} initializer on rows is required. FF's default constructor is trivial, so it zeroes
117 // only under value-initialization; a plain `std::array<FF, INTERVAL_SIZE> rows;` would leave garbage,
118 // breaking the "unset cell reads as zero" invariant.
120 std::array<FF, INTERVAL_SIZE> rows{};
121 };
122 // A column is a fixed-size table of lazily-created shards (index = row / INTERVAL_SIZE), held inline
123 // as atomic pointers. The hot get/set path reads a slot with a single relaxed/acquire atomic load and
124 // never takes a lock, so concurrent writers of the same column do not serialize. A shard is created
125 // at most once: a writer that finds a null slot constructs one and installs it with a compare-and-swap,
126 // keeping it only if the slot was still null. Slots are never cleared concurrently with get/set
127 // (clear_column and the destructor run after the parallel fill phase).
129 SparseColumn() = default;
131 {
132 for (size_t k = 0; k < NUM_SHARDS; ++k) {
133 delete slots[k].load(std::memory_order_relaxed);
134 }
135 }
136 SparseColumn(const SparseColumn&) = delete;
138
139 // The {} zero-initializes every slot to nullptr (std::atomic value-initializes its value in C++20).
141 };
142 // We store the trace as a sparse matrix. Each SparseColumn holds its shard table inline, so
143 // sizeof(SparseColumn) is ~NUM_SHARDS atomic pointers; with ~3k columns the whole array is tens of
144 // MB, so we heap-allocate it via unique_ptr rather than placing it on the stack.
146
147 // Returns the shard owning shard_idx, creating it (once, via a lock-free compare-and-swap) if absent.
149 // Not thread-safe.
151};
152
153} // namespace bb::avm2::tracegen
static constexpr size_t num_columns()
const FF & get(Column col, uint32_t row) const
void reserve_column(Column col, size_t size)
const FF & get_column_or_shift(ColumnAndShifts col, uint32_t row) const
std::unique_ptr< std::array< SparseColumn, NUM_COLUMNS_WITHOUT_SHIFTS > > trace
void invert_columns(std::span< const Column > cols)
auto get_multiple(const std::array< ColumnAndShifts, N > &cols, uint32_t row) const
static ColumnInterval & get_or_create_shard(SparseColumn &column_data, size_t shard_idx)
void visit_column(Column col, const std::function< void(uint32_t, const FF &)> &visitor) const
uint32_t get_column_rows(Column col) const
void set(Column col, uint32_t row, const FF &value)
static constexpr uint32_t INTERVAL_SIZE
constexpr std::size_t MAX_AVM_TRACE_SIZE
Definition constants.hpp:13
AvmFlavorSettings::FF FF
Definition field.hpp:10
constexpr auto NUM_COLUMNS_WITHOUT_SHIFTS
Definition columns.hpp:40
ColumnAndShifts
Definition columns.hpp:34
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13
TUPLET_INLINE constexpr auto forward_as_tuple(T &&... a) noexcept
Definition tuplet.hpp:1067
std::array< std::atomic< ColumnInterval * >, NUM_SHARDS > slots
SparseColumn & operator=(const SparseColumn &)=delete