31 #ifndef TSAN_ANNOTATIONS
32 #define TSAN_ANNOTATIONS 0
37 const unsigned __tsan_mutex_linker_init = 1 << 0;
38 void __tsan_mutex_pre_lock(
void *addr,
unsigned flags);
39 void __tsan_mutex_post_lock(
void *addr,
unsigned flags,
int recursion);
40 int __tsan_mutex_pre_unlock(
void *addr,
unsigned flags);
41 void __tsan_mutex_post_unlock(
void *addr,
unsigned flags);
42 void __tsan_mutex_pre_signal(
void *addr,
unsigned flags);
43 void __tsan_mutex_post_signal(
void *addr,
unsigned flags);
51 namespace Synchronization {
57 __tsan_mutex_pre_lock(mutex, __tsan_mutex_linker_init);
61 __tsan_mutex_post_lock(mutex, __tsan_mutex_linker_init, 1);
65 (void)__tsan_mutex_pre_unlock(mutex, __tsan_mutex_linker_init);
68 __tsan_mutex_post_unlock(mutex, __tsan_mutex_linker_init);
71 __tsan_mutex_pre_signal(cond, 0);
74 __tsan_mutex_post_signal(cond, 0);
92 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
93 return __sync_and_and_fetch(addr, val);
97 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
98 return __sync_fetch_and_add(addr, val);
102 ALWAYS_INLINE bool cas_strong_sequentially_consistent_helper(T *addr, T *expected, T *desired) {
103 T oldval = *expected;
104 T gotval = __sync_val_compare_and_swap(addr, oldval, *desired);
106 return oldval == gotval;
109 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
110 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
113 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
114 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
118 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
119 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
122 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
123 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
126 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
127 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
130 ALWAYS_INLINE uintptr_t atomic_fetch_and_release(uintptr_t *addr, uintptr_t val) {
131 return __sync_fetch_and_and(addr, val);
141 __sync_synchronize();
145 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
146 return __sync_or_and_fetch(addr, val);
149 ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
156 __sync_synchronize();
160 __sync_synchronize();
165 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
166 return __atomic_and_fetch(addr, val, __ATOMIC_RELEASE);
170 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
171 return __atomic_fetch_add(addr, val, __ATOMIC_ACQ_REL);
174 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
175 return __atomic_compare_exchange(addr, expected, desired,
false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
179 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
180 return __atomic_compare_exchange(addr, expected, desired,
true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
183 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
184 return __atomic_compare_exchange(addr, expected, desired,
true, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
187 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
188 return __atomic_compare_exchange(addr, expected, desired,
true, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
191 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
192 return __atomic_compare_exchange(addr, expected, desired,
true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
195 ALWAYS_INLINE uintptr_t atomic_fetch_and_release(uintptr_t *addr, uintptr_t val) {
196 return __atomic_fetch_and(addr, val, __ATOMIC_RELEASE);
201 __atomic_load(addr, val, __ATOMIC_RELAXED);
206 __atomic_load(addr, val, __ATOMIC_ACQUIRE);
209 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
210 return __atomic_or_fetch(addr, val, __ATOMIC_RELAXED);
213 ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
214 __atomic_store(addr, val, __ATOMIC_RELAXED);
219 __atomic_store(addr, val, __ATOMIC_RELEASE);
223 __atomic_thread_fence(__ATOMIC_ACQUIRE);
236 if (spin_count > 0) {
239 return spin_count > 0;
248 static constexpr
uint8_t lock_bit = 0x01;
249 static constexpr
uint8_t queue_lock_bit = 0x02;
250 static constexpr
uint8_t parked_bit = 0x02;
284 if_tsan_pre_lock(
this);
286 uintptr_t expected = 0;
287 uintptr_t desired = lock_bit;
289 if (!atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
293 if_tsan_post_lock(
this);
297 if_tsan_pre_unlock(
this);
299 uintptr_t val = atomic_fetch_and_release(&state, ~(uintptr_t)lock_bit);
302 bool no_thread_queuing = (val & queue_lock_bit) == 0;
304 bool some_queued = (val & ~(uintptr_t)(queue_lock_bit | lock_bit)) != 0;
305 if (no_thread_queuing && some_queued) {
309 if_tsan_post_unlock(
this);
313 WEAK void word_lock::lock_full() {
314 spin_control spinner;
316 atomic_load_relaxed(&state, &expected);
319 if (!(expected & lock_bit)) {
320 uintptr_t desired = expected | lock_bit;
322 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
328 if (((expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) != 0) && spinner.should_spin()) {
330 atomic_load_relaxed(&state, &expected);
334 word_lock_queue_data node;
336 node.parker.prepare_park();
339 word_lock_queue_data *head = (word_lock_queue_data *)(expected & ~(uintptr_t)(queue_lock_bit | lock_bit));
340 if (head ==
nullptr) {
351 uintptr_t desired = ((uintptr_t)&node) | (expected & (queue_lock_bit | lock_bit));
352 if (atomic_cas_weak_release_relaxed(&state, &expected, &desired)) {
355 atomic_load_relaxed(&state, &expected);
360 WEAK void word_lock::unlock_full() {
362 atomic_load_relaxed(&state, &expected);
367 bool thread_queuing = (expected & queue_lock_bit);
369 bool none_queued = (expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) == 0;
370 if (thread_queuing || none_queued) {
374 uintptr_t desired = expected | queue_lock_bit;
375 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
381 word_lock_queue_data *head = (word_lock_queue_data *)(expected & ~(uintptr_t)(queue_lock_bit | lock_bit));
382 word_lock_queue_data *current = head;
383 word_lock_queue_data *tail = current->tail;
384 int times_through = 0;
385 while (tail ==
nullptr) {
386 word_lock_queue_data *next = current->next;
388 next->prev = current;
390 tail = current->tail;
397 if (expected & lock_bit) {
398 uintptr_t desired = expected & ~(uintptr_t)queue_lock_bit;
399 if (atomic_cas_weak_relacq_relaxed(&state, &expected, &desired)) {
402 atomic_thread_fence_acquire();
406 word_lock_queue_data *new_tail = tail->prev;
407 if (new_tail ==
nullptr) {
408 bool continue_outer =
false;
409 while (!continue_outer) {
410 uintptr_t desired = expected & lock_bit;
411 if (atomic_cas_weak_relacq_relaxed(&state, &expected, &desired)) {
414 if ((expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) == 0) {
417 atomic_thread_fence_acquire();
418 continue_outer =
true;
422 if (continue_outer) {
426 head->tail = new_tail;
427 atomic_and_fetch_release(&state, ~(uintptr_t)queue_lock_bit);
433 tail->parker.unpark_start();
434 tail->parker.unpark();
435 tail->parker.unpark_finish();
468 WEAK void dump_hash() {
472 while (head !=
nullptr) {
473 print(
nullptr) <<
"Bucket index " << i <<
" addr " << (
void *)head->
sleep_address <<
"\n";
484 if (
sizeof(uintptr_t) >= 8) {
485 return (addr * (uintptr_t)0x9E3779B97F4A7C15) >> (64 -
HASH_TABLE_BITS);
499 uintptr_t hash = addr_hash(addr);
524 uintptr_t hash_from = addr_hash(addr_from);
525 uintptr_t hash_to = addr_hash(addr_to);
528 check_hash(hash_from);
534 if (hash_from == hash_to) {
538 }
else if (hash_from < hash_to) {
558 if (&buckets.
from == &buckets.
to) {
560 }
else if (&buckets.
from > &buckets.
to) {
575 uintptr_t
park(uintptr_t addr);
577 int unpark_requeue(uintptr_t addr_from, uintptr_t addr_to, uintptr_t unpark_info);
586 virtual uintptr_t
unpark(
int unparked,
bool more_waiters) {
609 if (bucket.
head !=
nullptr) {
632 while (data !=
nullptr) {
635 if (cur_addr == addr) {
637 *data_location = next;
639 bool more_waiters =
false;
641 if (bucket.
tail == data) {
645 while (data2 !=
nullptr && !more_waiters) {
648 more_waiters = (cur_addr2 == addr);
655 data->
parker.unpark_start();
658 data->
parker.unpark_finish();
661 return more_waiters ? 1 : 0;
663 data_location = &data->
next;
693 while (data !=
nullptr) {
698 if (cur_addr == addr_from) {
699 *data_location = next;
708 if (requeue ==
nullptr) {
711 requeue_tail->
next = data;
720 data_location = &data->
next;
726 if (requeue !=
nullptr) {
727 requeue_tail->
next =
nullptr;
728 if (buckets.
to.
head ==
nullptr) {
729 buckets.
to.
head = requeue;
733 buckets.
to.
tail = requeue_tail;
738 if (wakeup !=
nullptr) {
740 wakeup->
parker.unpark_start();
743 wakeup->
parker.unpark_finish();
748 return wakeup !=
nullptr && action.
unpark_one;
762 return result == (lock_bit | parked_bit);
765 uintptr_t
unpark(
int unparked,
bool more_waiters)
final {
767 uintptr_t return_state = more_waiters ? parked_bit : 0;
768 atomic_store_release(
lock_state, &return_state);
780 atomic_load_relaxed(&state, &expected);
783 if (!(expected & lock_bit)) {
784 uintptr_t desired = expected | lock_bit;
785 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
796 atomic_load_relaxed(&state, &expected);
801 if ((expected & parked_bit) == 0) {
802 uintptr_t desired = expected | parked_bit;
803 if (!atomic_cas_weak_relaxed_relaxed(&state, &expected, &desired)) {
810 uintptr_t result = control.
park((uintptr_t)
this);
811 if (result == (uintptr_t)
this) {
816 atomic_load_relaxed(&state, &expected);
821 uintptr_t expected = lock_bit;
822 uintptr_t desired = 0;
825 if (atomic_cas_strong_release_relaxed(&state, &expected, &desired)) {
835 uintptr_t expected = 0;
836 uintptr_t desired = lock_bit;
838 if (!atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
844 uintptr_t expected = lock_bit;
845 uintptr_t desired = 0;
847 if (!atomic_cas_weak_release_relaxed(&state, &expected, &desired)) {
854 atomic_load_relaxed(&state, &val);
856 if (!(val & lock_bit)) {
860 uintptr_t desired = val | parked_bit;
861 if (atomic_cas_weak_relaxed_relaxed(&state, &val, &desired)) {
868 atomic_or_fetch_relaxed(&state, parked_bit);
881 uintptr_t
unpark(
int unparked,
bool more_waiters)
final {
888 return (uintptr_t)
mutex;
910 if (val != (uintptr_t)
mutex) {
921 if (action.unpark_one && some_requeued) {
941 val = (uintptr_t)
mutex;
943 }
else if (val != (uintptr_t)
mutex) {
945 action.invalid_unpark_info = (uintptr_t)
mutex;
956 uintptr_t
unpark(
int unparked,
bool more_waiters)
final {
970 if_tsan_pre_signal(
this);
973 atomic_load_relaxed(&state, &val);
975 if_tsan_post_signal(
this);
980 if_tsan_post_signal(
this);
984 if_tsan_pre_signal(
this);
986 atomic_load_relaxed(&state, &val);
988 if_tsan_post_signal(
this);
993 if_tsan_post_signal(
this);
998 uintptr_t result = control.
park((uintptr_t)
this);
999 if (result != (uintptr_t)mutex) {
1002 if_tsan_pre_lock(mutex);
1006 atomic_load_relaxed((uintptr_t *)mutex, &val);
1009 if_tsan_post_lock(mutex);
1051 fast_cond->
wait(fast_mutex);
1064 if (array ==
nullptr) {
1070 if (array->
array ==
nullptr) {
This file declares the routines used by Halide internally in its runtime.
void halide_free(void *user_context, void *ptr)
void * halide_malloc(void *user_context, size_t x)
Halide calls these functions to allocate and free memory.
ALWAYS_INLINE void signal()
ALWAYS_INLINE void broadcast()
ALWAYS_INLINE void wait(fast_mutex *mutex)
ALWAYS_INLINE void make_parked()
ALWAYS_INLINE void lock()
ALWAYS_INLINE void unlock()
ALWAYS_INLINE bool make_parked_if_locked()
ALWAYS_INLINE bool should_spin()
ALWAYS_INLINE void reset()
ALWAYS_INLINE void unlock()
ALWAYS_INLINE void lock()
WEAK void unlock_bucket_pair(bucket_pair &buckets)
constexpr int HASH_TABLE_SIZE
constexpr int HASH_TABLE_BITS
WEAK hash_bucket & lock_bucket(uintptr_t addr)
WEAK bucket_pair lock_bucket_pair(uintptr_t addr_from, uintptr_t addr_to)
constexpr int LOAD_FACTOR
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr print(const std::vector< Expr > &values)
Create an Expr that prints out its value whenever it is evaluated.
unsigned __INT8_TYPE__ uint8_t
void * memset(void *s, int val, size_t n)
void halide_thread_yield()
#define halide_assert(user_context, cond)
uintptr_t *const cond_state
ALWAYS_INLINE broadcast_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
bool validate(validate_action &action) final
void requeue_callback(const validate_action &action, bool one_to_wake, bool some_requeued) final
ALWAYS_INLINE bucket_pair(hash_bucket &from, hash_bucket &to)
hash_bucket buckets[HASH_TABLE_SIZE]
uintptr_t *const lock_state
bool validate(validate_action &action) final
uintptr_t unpark(int unparked, bool more_waiters) final
ALWAYS_INLINE mutex_parking_control(uintptr_t *lock_state)
virtual uintptr_t unpark(int unparked, bool more_waiters)
virtual void requeue_callback(const validate_action &action, bool one_to_wake, bool some_requeued)
uintptr_t park(uintptr_t addr)
int unpark_requeue(uintptr_t addr_from, uintptr_t addr_to, uintptr_t unpark_info)
virtual void before_sleep()
uintptr_t unpark_one(uintptr_t addr)
virtual bool validate(validate_action &action)
ALWAYS_INLINE signal_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
uintptr_t unpark(int unparked, bool more_waiters) final
uintptr_t *const cond_state
uintptr_t invalid_unpark_info
uintptr_t unpark(int unparked, bool more_waiters) final
ALWAYS_INLINE wait_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
uintptr_t *const cond_state
void before_sleep() final
bool validate(validate_action &action) final
word_lock_queue_data * prev
word_lock_queue_data * tail
word_lock_queue_data * next
Cross platform condition variable.
struct halide_mutex * array
WEAK int halide_mutex_array_lock(struct halide_mutex_array *array, int entry)
WEAK void halide_mutex_array_destroy(void *user_context, void *array)
WEAK void halide_mutex_unlock(halide_mutex *mutex)
WEAK halide_mutex_array * halide_mutex_array_create(int sz)
WEAK void halide_cond_signal(struct halide_cond *cond)
WEAK int halide_mutex_array_unlock(struct halide_mutex_array *array, int entry)
WEAK void halide_mutex_lock(halide_mutex *mutex)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
WEAK void halide_cond_wait(struct halide_cond *cond, struct halide_mutex *mutex)
WEAK void halide_cond_broadcast(struct halide_cond *cond)