libstdc++
barrier
Go to the documentation of this file.
1// <barrier> -*- C++ -*-
2
3// Copyright (C) 2020-2024 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25// This implementation is based on libcxx/include/barrier
26//===-- barrier.h --------------------------------------------------===//
27//
28// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
29// See https://llvm.org/LICENSE.txt for license information.
30// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
31//
32//===---------------------------------------------------------------===//
33
34/** @file include/barrier
35 * This is a Standard C++ Library header.
36 */
37
38#ifndef _GLIBCXX_BARRIER
39#define _GLIBCXX_BARRIER 1
40
41#ifdef _GLIBCXX_SYSHDR
42#pragma GCC system_header
43#endif
44
45#include <bits/requires_hosted.h> // threading primitive
46
47#define __glibcxx_want_barrier
48#include <bits/version.h>
49
50#ifdef __cpp_lib_barrier // C++ >= 20 && __cpp_aligned_new && lib_atomic_wait
51#include <bits/atomic_base.h>
52#include <bits/std_thread.h>
53#include <bits/unique_ptr.h>
54
55#include <array>
56
57namespace std _GLIBCXX_VISIBILITY(default)
58{
59_GLIBCXX_BEGIN_NAMESPACE_VERSION
60
61 struct __empty_completion
62 {
63 _GLIBCXX_ALWAYS_INLINE void
64 operator()() noexcept
65 { }
66 };
67
68/*
69
70The default implementation of __tree_barrier is a classic tree barrier.
71
72It looks different from literature pseudocode for two main reasons:
73 1. Threads that call into std::barrier functions do not provide indices,
74 so a numbering step is added before the actual barrier algorithm,
75 appearing as an N+1 round to the N rounds of the tree barrier.
76 2. A great deal of attention has been paid to avoid cache line thrashing
77 by flattening the tree structure into cache-line sized arrays, that
78 are indexed in an efficient way.
79
80*/
81
82 enum class __barrier_phase_t : unsigned char { };
83
84 template<typename _CompletionF>
85 class __tree_barrier
86 {
87 using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
88 using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
89 static constexpr auto __phase_alignment =
90 __atomic_phase_ref_t::required_alignment;
91
92 using __tickets_t = std::array<__barrier_phase_t, 64>;
93 struct alignas(64) /* naturally-align the heap state */ __state_t
94 {
95 alignas(__phase_alignment) __tickets_t __tickets;
96 };
97
98 ptrdiff_t _M_expected;
99 unique_ptr<__state_t[]> _M_state;
100 __atomic_base<ptrdiff_t> _M_expected_adjustment;
101 _CompletionF _M_completion;
102
103 alignas(__phase_alignment) __barrier_phase_t _M_phase;
104
105 bool
106 _M_arrive(__barrier_phase_t __old_phase, size_t __current)
107 {
108 const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
109 const auto __half_step =
110 static_cast<__barrier_phase_t>(__old_phase_val + 1);
111 const auto __full_step =
112 static_cast<__barrier_phase_t>(__old_phase_val + 2);
113
114 size_t __current_expected = _M_expected;
115 __current %= ((_M_expected + 1) >> 1);
116
117 for (int __round = 0; ; ++__round)
118 {
119 if (__current_expected <= 1)
120 return true;
121 size_t const __end_node = ((__current_expected + 1) >> 1),
122 __last_node = __end_node - 1;
123 for ( ; ; ++__current)
124 {
125 if (__current == __end_node)
126 __current = 0;
127 auto __expect = __old_phase;
128 __atomic_phase_ref_t __phase(_M_state[__current]
129 .__tickets[__round]);
130 if (__current == __last_node && (__current_expected & 1))
131 {
132 if (__phase.compare_exchange_strong(__expect, __full_step,
133 memory_order_acq_rel))
134 break; // I'm 1 in 1, go to next __round
135 }
136 else if (__phase.compare_exchange_strong(__expect, __half_step,
137 memory_order_acq_rel))
138 {
139 return false; // I'm 1 in 2, done with arrival
140 }
141 else if (__expect == __half_step)
142 {
143 if (__phase.compare_exchange_strong(__expect, __full_step,
144 memory_order_acq_rel))
145 break; // I'm 2 in 2, go to next __round
146 }
147 }
148 __current_expected = __last_node + 1;
149 __current >>= 1;
150 }
151 }
152
153 public:
154 using arrival_token = __barrier_phase_t;
155
156 static constexpr ptrdiff_t
157 max() noexcept
158 { return __PTRDIFF_MAX__; }
159
160 __tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
161 : _M_expected(__expected), _M_expected_adjustment(0),
162 _M_completion(move(__completion)),
163 _M_phase(static_cast<__barrier_phase_t>(0))
164 {
165 size_t const __count = (_M_expected + 1) >> 1;
166
167 _M_state = std::make_unique<__state_t[]>(__count);
168 }
169
170 [[nodiscard]] arrival_token
171 arrive(ptrdiff_t __update)
172 {
173 std::hash<std::thread::id> __hasher;
174 size_t __current = __hasher(std::this_thread::get_id());
175 __atomic_phase_ref_t __phase(_M_phase);
176 const auto __old_phase = __phase.load(memory_order_relaxed);
177 const auto __cur = static_cast<unsigned char>(__old_phase);
178 for(; __update; --__update)
179 {
180 if(_M_arrive(__old_phase, __current))
181 {
182 _M_completion();
183 _M_expected += _M_expected_adjustment.load(memory_order_relaxed);
184 _M_expected_adjustment.store(0, memory_order_relaxed);
185 auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
186 __phase.store(__new_phase, memory_order_release);
187 __phase.notify_all();
188 }
189 }
190 return __old_phase;
191 }
192
193 void
194 wait(arrival_token&& __old_phase) const
195 {
196 __atomic_phase_const_ref_t __phase(_M_phase);
197 auto const __test_fn = [=]
198 {
199 return __phase.load(memory_order_acquire) != __old_phase;
200 };
201 std::__atomic_wait_address(&_M_phase, __test_fn);
202 }
203
204 void
205 arrive_and_drop()
206 {
207 _M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
208 (void)arrive(1);
209 }
210 };
211
212 template<typename _CompletionF = __empty_completion>
213 class barrier
214 {
215 // Note, we may introduce a "central" barrier algorithm at some point
216 // for more space constrained targets
217 using __algorithm_t = __tree_barrier<_CompletionF>;
218 __algorithm_t _M_b;
219
220 public:
221 class arrival_token final
222 {
223 public:
224 arrival_token(arrival_token&&) = default;
225 arrival_token& operator=(arrival_token&&) = default;
226 ~arrival_token() = default;
227
228 private:
229 friend class barrier;
230 using __token = typename __algorithm_t::arrival_token;
231 explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
232 __token _M_tok;
233 };
234
235 static constexpr ptrdiff_t
236 max() noexcept
237 { return __algorithm_t::max(); }
238
239 explicit
240 barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
241 : _M_b(__count, std::move(__completion))
242 { }
243
244 barrier(barrier const&) = delete;
245 barrier& operator=(barrier const&) = delete;
246
247 [[nodiscard]] arrival_token
248 arrive(ptrdiff_t __update = 1)
249 { return arrival_token{_M_b.arrive(__update)}; }
250
251 void
252 wait(arrival_token&& __phase) const
253 { _M_b.wait(std::move(__phase._M_tok)); }
254
255 void
256 arrive_and_wait()
257 { wait(arrive()); }
258
259 void
260 arrive_and_drop()
261 { _M_b.arrive_and_drop(); }
262 };
263
264_GLIBCXX_END_NAMESPACE_VERSION
265} // namespace
266#endif // __cpp_lib_barrier
267#endif // _GLIBCXX_BARRIER