31 #ifndef TSAN_ANNOTATIONS
32 #define TSAN_ANNOTATIONS 0
37 const unsigned __tsan_mutex_linker_init = 1 << 0;
38 void __tsan_mutex_pre_lock(
void *addr,
unsigned flags);
39 void __tsan_mutex_post_lock(
void *addr,
unsigned flags,
int recursion);
40 int __tsan_mutex_pre_unlock(
void *addr,
unsigned flags);
41 void __tsan_mutex_post_unlock(
void *addr,
unsigned flags);
42 void __tsan_mutex_pre_signal(
void *addr,
unsigned flags);
43 void __tsan_mutex_post_signal(
void *addr,
unsigned flags);
51 namespace Synchronization {
57 __tsan_mutex_pre_lock(mutex, __tsan_mutex_linker_init);
61 __tsan_mutex_post_lock(mutex, __tsan_mutex_linker_init, 1);
65 (void)__tsan_mutex_pre_unlock(mutex, __tsan_mutex_linker_init);
68 __tsan_mutex_post_unlock(mutex, __tsan_mutex_linker_init);
71 __tsan_mutex_pre_signal(cond, 0);
74 __tsan_mutex_post_signal(cond, 0);
92 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
93 return __sync_and_and_fetch(addr, val);
97 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
98 return __sync_fetch_and_add(addr, val);
102 ALWAYS_INLINE bool cas_strong_sequentially_consistent_helper(T *addr, T *expected, T *desired) {
103 T oldval = *expected;
104 T gotval = __sync_val_compare_and_swap(addr, oldval, *desired);
106 return oldval == gotval;
109 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
110 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
113 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
114 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
118 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
119 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
122 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
123 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
126 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
127 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
130 ALWAYS_INLINE uintptr_t atomic_fetch_and_release(uintptr_t *addr, uintptr_t val) {
131 return __sync_fetch_and_and(addr, val);
141 __sync_synchronize();
145 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
146 return __sync_or_and_fetch(addr, val);
149 ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
156 __sync_synchronize();
160 __sync_synchronize();
165 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
166 return __atomic_and_fetch(addr, val, __ATOMIC_RELEASE);
170 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
171 return __atomic_fetch_add(addr, val, __ATOMIC_ACQ_REL);
174 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
175 return __atomic_compare_exchange(addr, expected, desired,
false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
179 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
180 return __atomic_compare_exchange(addr, expected, desired,
true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
183 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
184 return __atomic_compare_exchange(addr, expected, desired,
true, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
187 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
188 return __atomic_compare_exchange(addr, expected, desired,
true, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
191 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
192 return __atomic_compare_exchange(addr, expected, desired,
true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
195 ALWAYS_INLINE uintptr_t atomic_fetch_and_release(uintptr_t *addr, uintptr_t val) {
196 return __atomic_fetch_and(addr, val, __ATOMIC_RELEASE);
201 __atomic_load(addr, val, __ATOMIC_RELAXED);
206 __atomic_load(addr, val, __ATOMIC_ACQUIRE);
209 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
210 return __atomic_or_fetch(addr, val, __ATOMIC_RELAXED);
213 ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
214 __atomic_store(addr, val, __ATOMIC_RELAXED);
219 __atomic_store(addr, val, __ATOMIC_RELEASE);
223 __atomic_thread_fence(__ATOMIC_ACQUIRE);
238 if (spin_count > 0) {
241 return spin_count > 0;
250 static constexpr
uint8_t lock_bit = 0x01;
251 static constexpr
uint8_t queue_lock_bit = 0x02;
252 static constexpr
uint8_t parked_bit = 0x02;
292 if_tsan_pre_lock(
this);
294 uintptr_t expected = 0;
295 uintptr_t desired = lock_bit;
297 if (!atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
301 if_tsan_post_lock(
this);
305 if_tsan_pre_unlock(
this);
307 uintptr_t val = atomic_fetch_and_release(&state, ~(uintptr_t)lock_bit);
310 bool no_thread_queuing = (val & queue_lock_bit) == 0;
312 bool some_queued = (val & ~(uintptr_t)(queue_lock_bit | lock_bit)) != 0;
313 if (no_thread_queuing && some_queued) {
317 if_tsan_post_unlock(
this);
321 WEAK void word_lock::lock_full() {
322 spin_control spinner;
324 atomic_load_relaxed(&state, &expected);
327 if (!(expected & lock_bit)) {
328 uintptr_t desired = expected | lock_bit;
330 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
336 if (((expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) != 0) && spinner.should_spin()) {
338 atomic_load_relaxed(&state, &expected);
342 word_lock_queue_data node;
344 node.parker.prepare_park();
347 word_lock_queue_data *head = (word_lock_queue_data *)(expected & ~(uintptr_t)(queue_lock_bit | lock_bit));
348 if (head ==
nullptr) {
359 uintptr_t desired = ((uintptr_t)&node) | (expected & (queue_lock_bit | lock_bit));
360 if (atomic_cas_weak_release_relaxed(&state, &expected, &desired)) {
363 atomic_load_relaxed(&state, &expected);
368 WEAK void word_lock::unlock_full() {
370 atomic_load_relaxed(&state, &expected);
375 bool thread_queuing = (expected & queue_lock_bit);
377 bool none_queued = (expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) == 0;
378 if (thread_queuing || none_queued) {
382 uintptr_t desired = expected | queue_lock_bit;
383 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
389 word_lock_queue_data *head = (word_lock_queue_data *)(expected & ~(uintptr_t)(queue_lock_bit | lock_bit));
390 word_lock_queue_data *current = head;
391 word_lock_queue_data *tail = current->tail;
392 int times_through = 0;
393 while (tail ==
nullptr) {
394 word_lock_queue_data *next = current->next;
396 next->prev = current;
398 tail = current->tail;
405 if (expected & lock_bit) {
406 uintptr_t desired = expected & ~(uintptr_t)queue_lock_bit;
407 if (atomic_cas_weak_relacq_relaxed(&state, &expected, &desired)) {
410 atomic_thread_fence_acquire();
414 word_lock_queue_data *new_tail = tail->prev;
415 if (new_tail ==
nullptr) {
416 bool continue_outer =
false;
417 while (!continue_outer) {
418 uintptr_t desired = expected & lock_bit;
419 if (atomic_cas_weak_relacq_relaxed(&state, &expected, &desired)) {
422 if ((expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) == 0) {
425 atomic_thread_fence_acquire();
426 continue_outer =
true;
430 if (continue_outer) {
434 head->tail = new_tail;
435 atomic_and_fetch_release(&state, ~(uintptr_t)queue_lock_bit);
441 tail->parker.unpark_start();
442 tail->parker.unpark();
443 tail->parker.unpark_finish();
461 #define LOAD_FACTOR 4
474 #define HASH_TABLE_BITS 10
479 #define table (*(hash_table *)table_storage)
486 WEAK void dump_hash() {
488 for (
auto &bucket :
table.buckets) {
489 queue_data *head = bucket.head;
490 while (head !=
nullptr) {
491 print(
nullptr) <<
"Bucket index " << i <<
" addr " << (
void *)head->sleep_address <<
"\n";
502 if (
sizeof(uintptr_t) >= 8) {
503 return (addr * (uintptr_t)0x9E3779B97F4A7C15) >> (64 - bits);
505 return (addr * (uintptr_t)0x9E3779B9) >> (32 - bits);
541 if (hash_from == hash_to) {
545 }
else if (hash_from < hash_to) {
565 if (&buckets.
from == &buckets.
to) {
567 }
else if (&buckets.
from > &buckets.
to) {
595 uintptr_t (*
unpark)(
void *control,
int unparked,
bool more_waiters);
611 if (!control.
validate(&control, action)) {
619 if (bucket.
head !=
nullptr) {
642 while (data !=
nullptr) {
645 if (cur_addr == addr) {
647 *data_location = next;
649 bool more_waiters =
false;
651 if (bucket.
tail == data) {
655 while (data2 !=
nullptr && !more_waiters) {
658 more_waiters = (cur_addr2 == addr);
665 data->
parker.unpark_start();
668 data->
parker.unpark_finish();
671 return more_waiters ? 1 : 0;
673 data_location = &data->
next;
679 control.
unpark(&control, 0,
false);
695 queue_data **temp_list = &temp_list_storage[0];
696 size_t max_waiters =
sizeof(temp_list_storage) /
sizeof(temp_list_storage[0]);
698 while (data !=
nullptr) {
703 if (cur_addr == addr) {
704 *data_location = next;
706 if (bucket.
tail == data) {
710 if (waiters == max_waiters) {
713 for (
size_t i = 0; i < max_waiters; i++) {
714 temp_list[i] = temp[i];
717 if (temp != &temp_list_storage[0]) {
724 temp_list[waiters++] = data;
725 data->
parker.unpark_start();
729 *data_location = data->
next;
737 for (
size_t i = 0; i < waiters; i++) {
738 temp_list[i]->
parker.unpark();
742 for (
size_t i = 0; i < waiters; i++) {
743 temp_list[i]->
parker.unpark_finish();
746 if (temp_list != &temp_list_storage[0]) {
757 if (!control.
validate(&control, action)) {
769 while (data !=
nullptr) {
774 if (cur_addr == addr_from) {
775 *data_location = next;
784 if (requeue ==
nullptr) {
787 requeue_tail->
next = data;
796 data_location = &data->
next;
802 if (requeue !=
nullptr) {
803 requeue_tail->
next =
nullptr;
804 if (buckets.
to.
head ==
nullptr) {
805 buckets.
to.
head = requeue;
809 buckets.
to.
tail = requeue_tail;
812 control.
requeue_callback(&control, action, wakeup !=
nullptr, requeue !=
nullptr);
814 if (wakeup !=
nullptr) {
816 wakeup->
parker.unpark_start();
819 wakeup->
parker.unpark_finish();
824 return wakeup !=
nullptr && action.
unpark_one;
844 atomic_load_relaxed(mutex_control->
lock_state, &result);
845 return result == (lock_bit | parked_bit);
853 uintptr_t return_state = more_waiters ? parked_bit : 0;
854 atomic_store_release(mutex_control->
lock_state, &return_state);
866 atomic_load_relaxed(&state, &expected);
869 if (!(expected & lock_bit)) {
870 uintptr_t desired = expected | lock_bit;
871 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
882 atomic_load_relaxed(&state, &expected);
887 if ((expected & parked_bit) == 0) {
888 uintptr_t desired = expected | parked_bit;
889 if (!atomic_cas_weak_relaxed_relaxed(&state, &expected, &desired)) {
896 uintptr_t result =
park((uintptr_t)
this, control);
897 if (result == (uintptr_t)
this) {
902 atomic_load_relaxed(&state, &expected);
907 uintptr_t expected = lock_bit;
908 uintptr_t desired = 0;
911 if (atomic_cas_strong_release_relaxed(&state, &expected, &desired)) {
921 uintptr_t expected = 0;
922 uintptr_t desired = lock_bit;
924 if (!atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
930 uintptr_t expected = lock_bit;
931 uintptr_t desired = 0;
933 if (!atomic_cas_weak_release_relaxed(&state, &expected, &desired)) {
940 atomic_load_relaxed(&state, &val);
942 if (!(val & lock_bit)) {
946 uintptr_t desired = val | parked_bit;
947 if (atomic_cas_weak_relaxed_relaxed(&state, &val, &desired)) {
954 atomic_or_fetch_relaxed(&state, parked_bit);
973 atomic_store_relaxed(signal_control->
cond_state, &val);
977 return (uintptr_t)signal_control->
mutex;
984 bool one_to_wake,
bool some_requeued);
1000 atomic_load_relaxed(broadcast_control->
cond_state, &val);
1004 if (val != (uintptr_t)broadcast_control->
mutex) {
1009 atomic_store_relaxed(broadcast_control->
cond_state, &val);
1042 atomic_load_relaxed(wait_control->
cond_state, &val);
1045 val = (uintptr_t)wait_control->
mutex;
1046 atomic_store_relaxed(wait_control->
cond_state, &val);
1047 }
else if (val != (uintptr_t)wait_control->
mutex) {
1065 if (!more_waiters) {
1067 atomic_store_relaxed(wait_control->
cond_state, &val);
1073 uintptr_t state = 0;
1079 if_tsan_pre_signal(
this);
1082 atomic_load_relaxed(&state, &val);
1084 if_tsan_post_signal(
this);
1089 if_tsan_post_signal(
this);
1093 if_tsan_pre_signal(
this);
1095 atomic_load_relaxed(&state, &val);
1097 if_tsan_post_signal(
this);
1102 if_tsan_post_signal(
this);
1107 uintptr_t result =
park((uintptr_t)
this, control);
1108 if (result != (uintptr_t)mutex) {
1111 if_tsan_pre_lock(mutex);
1115 atomic_load_relaxed((uintptr_t *)mutex, &val);
1118 if_tsan_post_lock(mutex);
1160 fast_cond->
wait(fast_mutex);
1173 if (array ==
nullptr) {
1179 if (array->
array ==
nullptr) {
This file declares the routines used by Halide internally in its runtime.
void halide_free(void *user_context, void *ptr)
void * halide_malloc(void *user_context, size_t x)
Halide calls these functions to allocate and free memory.
ALWAYS_INLINE void signal()
ALWAYS_INLINE fast_cond()=default
ALWAYS_INLINE void broadcast()
ALWAYS_INLINE void wait(fast_mutex *mutex)
ALWAYS_INLINE void make_parked()
ALWAYS_INLINE void lock()
ALWAYS_INLINE void unlock()
ALWAYS_INLINE bool make_parked_if_locked()
ALWAYS_INLINE bool should_spin()
ALWAYS_INLINE spin_control()=default
ALWAYS_INLINE void reset()
ALWAYS_INLINE void unlock()
ALWAYS_INLINE word_lock()=default
ALWAYS_INLINE void lock()
WEAK uintptr_t signal_parking_control_unpark(void *control, int unparked, bool more_waiters)
WEAK void parking_control_requeue_callback(void *control, const validate_action &action, bool one_to_wake, bool some_requeued)
WEAK void unlock_bucket_pair(bucket_pair &buckets)
WEAK char table_storage[sizeof(hash_table)]
WEAK bool parking_control_validate(void *control, validate_action &action)
ALWAYS_INLINE void check_hash(uintptr_t hash)
WEAK int unpark_requeue(uintptr_t addr_from, uintptr_t addr_to, parking_control &control, uintptr_t unpark_info)
WEAK void parking_control_before_sleep(void *control)
WEAK uintptr_t park(uintptr_t addr, parking_control &control)
WEAK uintptr_t unpark_one(uintptr_t addr, parking_control &control)
WEAK hash_bucket & lock_bucket(uintptr_t addr)
WEAK uintptr_t wait_parking_control_unpark(void *control, int unparked, bool more_waiters)
WEAK bool wait_parking_control_validate(void *control, validate_action &action)
WEAK bucket_pair lock_bucket_pair(uintptr_t addr_from, uintptr_t addr_to)
WEAK bool mutex_parking_control_validate(void *control, validate_action &action)
WEAK bool broadcast_parking_control_validate(void *control, validate_action &action)
WEAK void broadcast_parking_control_requeue_callback(void *control, const validate_action &action, bool one_to_wake, bool some_requeued)
WEAK uintptr_t mutex_parking_control_unpark(void *control, int unparked, bool more_waiters)
WEAK uintptr_t parking_control_unpark(void *control, int unparked, bool more_waiters)
WEAK uintptr_t unpark_all(uintptr_t addr, uintptr_t unpark_info)
WEAK void wait_parking_control_before_sleep(void *control)
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr print(const std::vector< Expr > &values)
Create an Expr that prints out its value whenever it is evaluated.
unsigned __INT8_TYPE__ uint8_t
void * memset(void *s, int val, size_t n)
void halide_thread_yield()
unsigned __INT32_TYPE__ uint32_t
#define halide_assert(user_context, cond)
ALWAYS_INLINE broadcast_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
ALWAYS_INLINE bucket_pair(hash_bucket &from, hash_bucket &to)
hash_bucket buckets[MAX_THREADS *LOAD_FACTOR]
ALWAYS_INLINE mutex_parking_control(uintptr_t *lock_state)
void(* before_sleep)(void *control)
ALWAYS_INLINE parking_control()
bool(* validate)(void *control, validate_action &action)
uintptr_t(* unpark)(void *control, int unparked, bool more_waiters)
void(* requeue_callback)(void *control, const validate_action &action, bool one_to_wake, bool some_requeued)
ALWAYS_INLINE ~queue_data()=default
ALWAYS_INLINE queue_data()=default
ALWAYS_INLINE signal_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
ALWAYS_INLINE validate_action()=default
uintptr_t invalid_unpark_info
ALWAYS_INLINE wait_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
word_lock_queue_data * prev
ALWAYS_INLINE word_lock_queue_data()=default
word_lock_queue_data * tail
word_lock_queue_data * next
ALWAYS_INLINE ~word_lock_queue_data()=default
Cross platform condition variable.
struct halide_mutex * array
WEAK int halide_mutex_array_lock(struct halide_mutex_array *array, int entry)
WEAK void halide_mutex_array_destroy(void *user_context, void *array)
WEAK void halide_mutex_unlock(halide_mutex *mutex)
WEAK halide_mutex_array * halide_mutex_array_create(int sz)
WEAK void halide_cond_signal(struct halide_cond *cond)
WEAK int halide_mutex_array_unlock(struct halide_mutex_array *array, int entry)
WEAK void halide_mutex_lock(halide_mutex *mutex)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
WEAK void halide_cond_wait(struct halide_cond *cond, struct halide_mutex *mutex)
WEAK void halide_cond_broadcast(struct halide_cond *cond)