portable_atomic/imp/fallback/
mod.rs

1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*
4Fallback implementation using global locks.
5
6This implementation uses seqlock for global locks.
7
8This is basically based on global locks in crossbeam-utils's `AtomicCell`,
9but seqlock is implemented in a way that does not depend on UB
10(see comments in optimistic_read method in atomic! macro for details).
11
12Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic
13type and the value type must be the same.
14*/
15
16#![cfg_attr(
17    any(
18        all(
19            target_arch = "x86_64",
20            not(portable_atomic_no_outline_atomics),
21            not(any(target_env = "sgx", miri)),
22        ),
23        all(
24            target_arch = "powerpc64",
25            feature = "fallback",
26            not(portable_atomic_no_outline_atomics),
27            any(
28                all(
29                    target_os = "linux",
30                    any(
31                        all(
32                            target_env = "gnu",
33                            any(target_endian = "little", not(target_feature = "crt-static")),
34                        ),
35                        all(
36                            any(target_env = "musl", target_env = "ohos", target_env = "uclibc"),
37                            not(target_feature = "crt-static"),
38                        ),
39                        portable_atomic_outline_atomics,
40                    ),
41                ),
42                target_os = "android",
43                target_os = "freebsd",
44                target_os = "openbsd",
45            ),
46            not(any(miri, portable_atomic_sanitize_thread)),
47        ),
48        all(
49            target_arch = "riscv32",
50            not(any(miri, portable_atomic_sanitize_thread)),
51            not(portable_atomic_no_asm),
52            any(
53                target_feature = "experimental-zacas",
54                portable_atomic_target_feature = "experimental-zacas",
55                all(
56                    feature = "fallback",
57                    not(portable_atomic_no_outline_atomics),
58                    any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default
59                    any(target_os = "linux", target_os = "android"),
60                ),
61            ),
62        ),
63        all(
64            target_arch = "riscv64",
65            not(portable_atomic_no_asm),
66            any(
67                target_feature = "experimental-zacas",
68                portable_atomic_target_feature = "experimental-zacas",
69                all(
70                    feature = "fallback",
71                    not(portable_atomic_no_outline_atomics),
72                    any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default
73                    any(target_os = "linux", target_os = "android"),
74                    not(any(miri, portable_atomic_sanitize_thread)),
75                ),
76            ),
77        ),
78        all(
79            target_arch = "arm",
80            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
81            any(target_os = "linux", target_os = "android"),
82            not(portable_atomic_no_outline_atomics),
83        ),
84    ),
85    allow(dead_code)
86)]
87
88#[macro_use]
89pub(crate) mod utils;
90
91// Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap
92// around.
93//
94// In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be
95// vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the
96// counter will not be increased that fast.
97//
98// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
99// AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast,
100// so use it to implement normal sequence lock.
101cfg_has_fast_atomic_64! {
102    mod seq_lock;
103}
104cfg_no_fast_atomic_64! {
105    #[path = "seq_lock_wide.rs"]
106    mod seq_lock;
107}
108
109use core::{cell::UnsafeCell, mem, sync::atomic::Ordering};
110
111use self::{
112    seq_lock::{SeqLock, SeqLockWriteGuard},
113    utils::CachePadded,
114};
115
116// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
117// AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast,
118// so use it to reduce chunks of byte-wise atomic memcpy.
119use self::seq_lock::{AtomicChunk, Chunk};
120
121// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016.
122#[inline]
123#[must_use]
124fn lock(addr: usize) -> &'static SeqLock {
125    // The number of locks is a prime number because we want to make sure `addr % LEN` gets
126    // dispersed across all locks.
127    //
128    // crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded,
129    // so the actual concurrency level will be smaller.
130    const LEN: usize = 67;
131    const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new());
132    static LOCKS: [CachePadded<SeqLock>; LEN] = [
133        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
134        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
135        L, L, L, L, L, L, L,
136    ];
137
138    // If the modulus is a constant number, the compiler will use crazy math to transform this into
139    // a sequence of cheap arithmetic operations rather than using the slow modulo instruction.
140    &LOCKS[addr % LEN]
141}
142
143macro_rules! atomic {
144    ($atomic_type:ident, $int_type:ident, $align:literal) => {
145        #[repr(C, align($align))]
146        pub(crate) struct $atomic_type {
147            v: UnsafeCell<$int_type>,
148        }
149
150        impl $atomic_type {
151            const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>();
152
153            #[inline]
154            unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] {
155                static_assert!($atomic_type::LEN > 1);
156                static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0);
157
158                // SAFETY: the caller must uphold the safety contract for `chunks`.
159                unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) }
160            }
161
162            #[inline]
163            fn optimistic_read(&self) -> $int_type {
164                // Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M
165                let mut dst: [Chunk; Self::LEN] = [0; Self::LEN];
166                // SAFETY:
167                // - There are no threads that perform non-atomic concurrent write operations.
168                // - There is no writer that updates the value using atomic operations of different granularity.
169                //
170                // If the atomic operation is not used here, it will cause a data race
171                // when `write` performs concurrent write operation.
172                // Such a data race is sometimes considered virtually unproblematic
173                // in SeqLock implementations:
174                //
175                // - https://github.com/Amanieu/seqlock/issues/2
176                // - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116
177                // - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F
178                //
179                // However, in our use case, the implementation that loads/stores value as
180                // chunks of usize is enough fast and sound, so we use that implementation.
181                //
182                // See also atomic-memcpy crate, a generic implementation of this pattern:
183                // https://github.com/taiki-e/atomic-memcpy
184                let chunks = unsafe { self.chunks() };
185                for i in 0..Self::LEN {
186                    dst[i] = chunks[i].load(Ordering::Relaxed);
187                }
188                // SAFETY: integers are plain old data types so we can always transmute to them.
189                unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) }
190            }
191
192            #[inline]
193            fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type {
194                // This calls optimistic_read that can return teared value, but the resulting value
195                // is guaranteed not to be teared because we hold the lock to write.
196                self.optimistic_read()
197            }
198
199            #[inline]
200            fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) {
201                // SAFETY: integers are plain old data types so we can always transmute them to arrays of integers.
202                let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) };
203                // SAFETY:
204                // - The guard guarantees that we hold the lock to write.
205                // - There are no threads that perform non-atomic concurrent read or write operations.
206                //
207                // See optimistic_read for the reason that atomic operations are used here.
208                let chunks = unsafe { self.chunks() };
209                for i in 0..Self::LEN {
210                    chunks[i].store(val[i], Ordering::Relaxed);
211                }
212            }
213        }
214
215        // Send is implicitly implemented.
216        // SAFETY: any data races are prevented by the lock and atomic operation.
217        unsafe impl Sync for $atomic_type {}
218
219        impl_default_no_fetch_ops!($atomic_type, $int_type);
220        impl_default_bit_opts!($atomic_type, $int_type);
221        impl $atomic_type {
222            #[inline]
223            pub(crate) const fn new(v: $int_type) -> Self {
224                Self { v: UnsafeCell::new(v) }
225            }
226
227            #[inline]
228            pub(crate) fn is_lock_free() -> bool {
229                Self::IS_ALWAYS_LOCK_FREE
230            }
231            pub(crate) const IS_ALWAYS_LOCK_FREE: bool = false;
232
233            #[inline]
234            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
235            pub(crate) fn load(&self, order: Ordering) -> $int_type {
236                crate::utils::assert_load_ordering(order);
237                let lock = lock(self.v.get() as usize);
238
239                // Try doing an optimistic read first.
240                if let Some(stamp) = lock.optimistic_read() {
241                    let val = self.optimistic_read();
242
243                    if lock.validate_read(stamp) {
244                        return val;
245                    }
246                }
247
248                // Grab a regular write lock so that writers don't starve this load.
249                let guard = lock.write();
250                let val = self.read(&guard);
251                // The value hasn't been changed. Drop the guard without incrementing the stamp.
252                guard.abort();
253                val
254            }
255
256            #[inline]
257            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
258            pub(crate) fn store(&self, val: $int_type, order: Ordering) {
259                crate::utils::assert_store_ordering(order);
260                let guard = lock(self.v.get() as usize).write();
261                self.write(val, &guard)
262            }
263
264            #[inline]
265            pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
266                let guard = lock(self.v.get() as usize).write();
267                let prev = self.read(&guard);
268                self.write(val, &guard);
269                prev
270            }
271
272            #[inline]
273            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
274            pub(crate) fn compare_exchange(
275                &self,
276                current: $int_type,
277                new: $int_type,
278                success: Ordering,
279                failure: Ordering,
280            ) -> Result<$int_type, $int_type> {
281                crate::utils::assert_compare_exchange_ordering(success, failure);
282                let guard = lock(self.v.get() as usize).write();
283                let prev = self.read(&guard);
284                if prev == current {
285                    self.write(new, &guard);
286                    Ok(prev)
287                } else {
288                    // The value hasn't been changed. Drop the guard without incrementing the stamp.
289                    guard.abort();
290                    Err(prev)
291                }
292            }
293
294            #[inline]
295            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
296            pub(crate) fn compare_exchange_weak(
297                &self,
298                current: $int_type,
299                new: $int_type,
300                success: Ordering,
301                failure: Ordering,
302            ) -> Result<$int_type, $int_type> {
303                self.compare_exchange(current, new, success, failure)
304            }
305
306            #[inline]
307            pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
308                let guard = lock(self.v.get() as usize).write();
309                let prev = self.read(&guard);
310                self.write(prev.wrapping_add(val), &guard);
311                prev
312            }
313
314            #[inline]
315            pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
316                let guard = lock(self.v.get() as usize).write();
317                let prev = self.read(&guard);
318                self.write(prev.wrapping_sub(val), &guard);
319                prev
320            }
321
322            #[inline]
323            pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type {
324                let guard = lock(self.v.get() as usize).write();
325                let prev = self.read(&guard);
326                self.write(prev & val, &guard);
327                prev
328            }
329
330            #[inline]
331            pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
332                let guard = lock(self.v.get() as usize).write();
333                let prev = self.read(&guard);
334                self.write(!(prev & val), &guard);
335                prev
336            }
337
338            #[inline]
339            pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type {
340                let guard = lock(self.v.get() as usize).write();
341                let prev = self.read(&guard);
342                self.write(prev | val, &guard);
343                prev
344            }
345
346            #[inline]
347            pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type {
348                let guard = lock(self.v.get() as usize).write();
349                let prev = self.read(&guard);
350                self.write(prev ^ val, &guard);
351                prev
352            }
353
354            #[inline]
355            pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
356                let guard = lock(self.v.get() as usize).write();
357                let prev = self.read(&guard);
358                self.write(core::cmp::max(prev, val), &guard);
359                prev
360            }
361
362            #[inline]
363            pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
364                let guard = lock(self.v.get() as usize).write();
365                let prev = self.read(&guard);
366                self.write(core::cmp::min(prev, val), &guard);
367                prev
368            }
369
370            #[inline]
371            pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type {
372                let guard = lock(self.v.get() as usize).write();
373                let prev = self.read(&guard);
374                self.write(!prev, &guard);
375                prev
376            }
377            #[inline]
378            pub(crate) fn not(&self, order: Ordering) {
379                self.fetch_not(order);
380            }
381
382            #[inline]
383            pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
384                let guard = lock(self.v.get() as usize).write();
385                let prev = self.read(&guard);
386                self.write(prev.wrapping_neg(), &guard);
387                prev
388            }
389            #[inline]
390            pub(crate) fn neg(&self, order: Ordering) {
391                self.fetch_neg(order);
392            }
393
394            #[inline]
395            pub(crate) const fn as_ptr(&self) -> *mut $int_type {
396                self.v.get()
397            }
398        }
399    };
400}
401
402#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))]
403#[cfg_attr(
404    not(portable_atomic_no_cfg_target_has_atomic),
405    cfg(any(
406        test,
407        not(any(
408            target_has_atomic = "64",
409            all(
410                target_arch = "riscv32",
411                not(any(miri, portable_atomic_sanitize_thread)),
412                not(portable_atomic_no_asm),
413                any(
414                    target_feature = "experimental-zacas",
415                    portable_atomic_target_feature = "experimental-zacas",
416                ),
417            ),
418        ))
419    ))
420)]
421cfg_no_fast_atomic_64! {
422    atomic!(AtomicI64, i64, 8);
423    atomic!(AtomicU64, u64, 8);
424}
425
426atomic!(AtomicI128, i128, 16);
427atomic!(AtomicU128, u128, 16);
428
429#[cfg(test)]
430mod tests {
431    use super::*;
432
433    cfg_no_fast_atomic_64! {
434        test_atomic_int!(i64);
435        test_atomic_int!(u64);
436    }
437    test_atomic_int!(i128);
438    test_atomic_int!(u128);
439
440    // load/store/swap implementation is not affected by signedness, so it is
441    // enough to test only unsigned types.
442    cfg_no_fast_atomic_64! {
443        stress_test!(u64);
444    }
445    stress_test!(u128);
446}