LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #define __KMP_IMP
15 #include "omp.h" /* extern "C" declarations of user-visible routines */
16 #include "kmp.h"
17 #include "kmp_error.h"
18 #include "kmp_i18n.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_stats.h"
22 
23 #if OMPT_SUPPORT
24 #include "ompt-specific.h"
25 #endif
26 
27 #define MAX_MESSAGE 512
28 
29 // flags will be used in future, e.g. to implement openmp_strict library
30 // restrictions
31 
40 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
41  // By default __kmpc_begin() is no-op.
42  char *env;
43  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
44  __kmp_str_match_true(env)) {
45  __kmp_middle_initialize();
46  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
47  } else if (__kmp_ignore_mppbeg() == FALSE) {
48  // By default __kmp_ignore_mppbeg() returns TRUE.
49  __kmp_internal_begin();
50  KC_TRACE(10, ("__kmpc_begin: called\n"));
51  }
52 }
53 
62 void __kmpc_end(ident_t *loc) {
63  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
64  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
65  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
66  // returns FALSE and __kmpc_end() will unregister this root (it can cause
67  // library shut down).
68  if (__kmp_ignore_mppend() == FALSE) {
69  KC_TRACE(10, ("__kmpc_end: called\n"));
70  KA_TRACE(30, ("__kmpc_end\n"));
71 
72  __kmp_internal_end_thread(-1);
73  }
74 #if KMP_OS_WINDOWS && OMPT_SUPPORT
75  // Normal exit process on Windows does not allow worker threads of the final
76  // parallel region to finish reporting their events, so shutting down the
77  // library here fixes the issue at least for the cases where __kmpc_end() is
78  // placed properly.
79  if (ompt_enabled.enabled)
80  __kmp_internal_end_library(__kmp_gtid_get_specific());
81 #endif
82 }
83 
103  kmp_int32 gtid = __kmp_entry_gtid();
104 
105  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
106 
107  return gtid;
108 }
109 
125  KC_TRACE(10,
126  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
127 
128  return TCR_4(__kmp_all_nth);
129 }
130 
138  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
139  return __kmp_tid_from_gtid(__kmp_entry_gtid());
140 }
141 
148  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
149 
150  return __kmp_entry_thread()->th.th_team->t.t_nproc;
151 }
152 
159 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
160 #ifndef KMP_DEBUG
161 
162  return TRUE;
163 
164 #else
165 
166  const char *semi2;
167  const char *semi3;
168  int line_no;
169 
170  if (__kmp_par_range == 0) {
171  return TRUE;
172  }
173  semi2 = loc->psource;
174  if (semi2 == NULL) {
175  return TRUE;
176  }
177  semi2 = strchr(semi2, ';');
178  if (semi2 == NULL) {
179  return TRUE;
180  }
181  semi2 = strchr(semi2 + 1, ';');
182  if (semi2 == NULL) {
183  return TRUE;
184  }
185  if (__kmp_par_range_filename[0]) {
186  const char *name = semi2 - 1;
187  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
188  name--;
189  }
190  if ((*name == '/') || (*name == ';')) {
191  name++;
192  }
193  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
194  return __kmp_par_range < 0;
195  }
196  }
197  semi3 = strchr(semi2 + 1, ';');
198  if (__kmp_par_range_routine[0]) {
199  if ((semi3 != NULL) && (semi3 > semi2) &&
200  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
201  return __kmp_par_range < 0;
202  }
203  }
204  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
205  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
206  return __kmp_par_range > 0;
207  }
208  return __kmp_par_range < 0;
209  }
210  return TRUE;
211 
212 #endif /* KMP_DEBUG */
213 }
214 
221 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
222  return __kmp_entry_thread()->th.th_root->r.r_active;
223 }
224 
234 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
235  kmp_int32 num_threads) {
236  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
237  global_tid, num_threads));
238 
239  __kmp_push_num_threads(loc, global_tid, num_threads);
240 }
241 
242 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
243  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
244 
245  /* the num_threads are automatically popped */
246 }
247 
248 #if OMP_40_ENABLED
249 
250 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
251  kmp_int32 proc_bind) {
252  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
253  proc_bind));
254 
255  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
256 }
257 
258 #endif /* OMP_40_ENABLED */
259 
270 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
271  int gtid = __kmp_entry_gtid();
272 
273 #if (KMP_STATS_ENABLED)
274  // If we were in a serial region, then stop the serial timer, record
275  // the event, and start parallel region timer
276  stats_state_e previous_state = KMP_GET_THREAD_STATE();
277  if (previous_state == stats_state_e::SERIAL_REGION) {
278  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
279  } else {
280  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
281  }
282  int inParallel = __kmpc_in_parallel(loc);
283  if (inParallel) {
284  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
285  } else {
286  KMP_COUNT_BLOCK(OMP_PARALLEL);
287  }
288 #endif
289 
290  // maybe to save thr_state is enough here
291  {
292  va_list ap;
293  va_start(ap, microtask);
294 
295 #if OMPT_SUPPORT
296  ompt_frame_t *ompt_frame;
297  if (ompt_enabled.enabled) {
298  kmp_info_t *master_th = __kmp_threads[gtid];
299  kmp_team_t *parent_team = master_th->th.th_team;
300  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
301  if (lwt)
302  ompt_frame = &(lwt->ompt_task_info.frame);
303  else {
304  int tid = __kmp_tid_from_gtid(gtid);
305  ompt_frame = &(
306  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
307  }
308  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
309  OMPT_STORE_RETURN_ADDRESS(gtid);
310  }
311 #endif
312 
313 #if INCLUDE_SSC_MARKS
314  SSC_MARK_FORKING();
315 #endif
316  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
317  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
318  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
319 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
320 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
321  &ap
322 #else
323  ap
324 #endif
325  );
326 #if INCLUDE_SSC_MARKS
327  SSC_MARK_JOINING();
328 #endif
329  __kmp_join_call(loc, gtid
330 #if OMPT_SUPPORT
331  ,
332  fork_context_intel
333 #endif
334  );
335 
336  va_end(ap);
337  }
338 
339 #if KMP_STATS_ENABLED
340  if (previous_state == stats_state_e::SERIAL_REGION) {
341  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
342  } else {
343  KMP_POP_PARTITIONED_TIMER();
344  }
345 #endif // KMP_STATS_ENABLED
346 }
347 
348 #if OMP_40_ENABLED
349 
360 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
361  kmp_int32 num_teams, kmp_int32 num_threads) {
362  KA_TRACE(20,
363  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
364  global_tid, num_teams, num_threads));
365 
366  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
367 }
368 
379 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
380  ...) {
381  int gtid = __kmp_entry_gtid();
382  kmp_info_t *this_thr = __kmp_threads[gtid];
383  va_list ap;
384  va_start(ap, microtask);
385 
386  KMP_COUNT_BLOCK(OMP_TEAMS);
387 
388  // remember teams entry point and nesting level
389  this_thr->th.th_teams_microtask = microtask;
390  this_thr->th.th_teams_level =
391  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
392 
393 #if OMPT_SUPPORT
394  kmp_team_t *parent_team = this_thr->th.th_team;
395  int tid = __kmp_tid_from_gtid(gtid);
396  if (ompt_enabled.enabled) {
397  parent_team->t.t_implicit_task_taskdata[tid]
398  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
399  }
400  OMPT_STORE_RETURN_ADDRESS(gtid);
401 #endif
402 
403  // check if __kmpc_push_num_teams called, set default number of teams
404  // otherwise
405  if (this_thr->th.th_teams_size.nteams == 0) {
406  __kmp_push_num_teams(loc, gtid, 0, 0);
407  }
408  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
409  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
410  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
411 
412  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
413  VOLATILE_CAST(microtask_t)
414  __kmp_teams_master, // "wrapped" task
415  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
416 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
417  &ap
418 #else
419  ap
420 #endif
421  );
422  __kmp_join_call(loc, gtid
423 #if OMPT_SUPPORT
424  ,
425  fork_context_intel
426 #endif
427  );
428 
429  this_thr->th.th_teams_microtask = NULL;
430  this_thr->th.th_teams_level = 0;
431  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
432  va_end(ap);
433 }
434 #endif /* OMP_40_ENABLED */
435 
436 // I don't think this function should ever have been exported.
437 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
438 // openmp code ever called it, but it's been exported from the RTL for so
439 // long that I'm afraid to remove the definition.
440 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
441 
454 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
455 // The implementation is now in kmp_runtime.cpp so that it can share static
456 // functions with kmp_fork_call since the tasks to be done are similar in
457 // each case.
458 #if OMPT_SUPPORT
459  OMPT_STORE_RETURN_ADDRESS(global_tid);
460 #endif
461  __kmp_serialized_parallel(loc, global_tid);
462 }
463 
471 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
472  kmp_internal_control_t *top;
473  kmp_info_t *this_thr;
474  kmp_team_t *serial_team;
475 
476  KC_TRACE(10,
477  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
478 
479  /* skip all this code for autopar serialized loops since it results in
480  unacceptable overhead */
481  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
482  return;
483 
484  // Not autopar code
485  if (!TCR_4(__kmp_init_parallel))
486  __kmp_parallel_initialize();
487 
488  this_thr = __kmp_threads[global_tid];
489  serial_team = this_thr->th.th_serial_team;
490 
491 #if OMP_45_ENABLED
492  kmp_task_team_t *task_team = this_thr->th.th_task_team;
493 
494  // we need to wait for the proxy tasks before finishing the thread
495  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
496  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
497 #endif
498 
499  KMP_MB();
500  KMP_DEBUG_ASSERT(serial_team);
501  KMP_ASSERT(serial_team->t.t_serialized);
502  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
503  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
504  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
505  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
506 
507 #if OMPT_SUPPORT
508  if (ompt_enabled.enabled &&
509  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
510  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
511  if (ompt_enabled.ompt_callback_implicit_task) {
512  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
513  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
514  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
515  }
516 
517  // reset clear the task id only after unlinking the task
518  ompt_data_t *parent_task_data;
519  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
520 
521  if (ompt_enabled.ompt_callback_parallel_end) {
522  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
523  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
524  ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
525  }
526  __ompt_lw_taskteam_unlink(this_thr);
527  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
528  }
529 #endif
530 
531  /* If necessary, pop the internal control stack values and replace the team
532  * values */
533  top = serial_team->t.t_control_stack_top;
534  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
535  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
536  serial_team->t.t_control_stack_top = top->next;
537  __kmp_free(top);
538  }
539 
540  // if( serial_team -> t.t_serialized > 1 )
541  serial_team->t.t_level--;
542 
543  /* pop dispatch buffers stack */
544  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
545  {
546  dispatch_private_info_t *disp_buffer =
547  serial_team->t.t_dispatch->th_disp_buffer;
548  serial_team->t.t_dispatch->th_disp_buffer =
549  serial_team->t.t_dispatch->th_disp_buffer->next;
550  __kmp_free(disp_buffer);
551  }
552 #if OMP_50_ENABLED
553  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
554 #endif
555 
556  --serial_team->t.t_serialized;
557  if (serial_team->t.t_serialized == 0) {
558 
559 /* return to the parallel section */
560 
561 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
562  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
563  __kmp_clear_x87_fpu_status_word();
564  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
565  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
566  }
567 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
568 
569  this_thr->th.th_team = serial_team->t.t_parent;
570  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
571 
572  /* restore values cached in the thread */
573  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
574  this_thr->th.th_team_master =
575  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
576  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
577 
578  /* TODO the below shouldn't need to be adjusted for serialized teams */
579  this_thr->th.th_dispatch =
580  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
581 
582  __kmp_pop_current_task_from_thread(this_thr);
583 
584  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
585  this_thr->th.th_current_task->td_flags.executing = 1;
586 
587  if (__kmp_tasking_mode != tskm_immediate_exec) {
588  // Copy the task team from the new child / old parent team to the thread.
589  this_thr->th.th_task_team =
590  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
591  KA_TRACE(20,
592  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
593  "team %p\n",
594  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
595  }
596  } else {
597  if (__kmp_tasking_mode != tskm_immediate_exec) {
598  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
599  "depth of serial team %p to %d\n",
600  global_tid, serial_team, serial_team->t.t_serialized));
601  }
602  }
603 
604  if (__kmp_env_consistency_check)
605  __kmp_pop_parallel(global_tid, NULL);
606 #if OMPT_SUPPORT
607  if (ompt_enabled.enabled)
608  this_thr->th.ompt_thread_info.state =
609  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
610  : ompt_state_work_parallel);
611 #endif
612 }
613 
622 void __kmpc_flush(ident_t *loc) {
623  KC_TRACE(10, ("__kmpc_flush: called\n"));
624 
625  /* need explicit __mf() here since use volatile instead in library */
626  KMP_MB(); /* Flush all pending memory write invalidates. */
627 
628 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
629 #if KMP_MIC
630 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
631 // We shouldn't need it, though, since the ABI rules require that
632 // * If the compiler generates NGO stores it also generates the fence
633 // * If users hand-code NGO stores they should insert the fence
634 // therefore no incomplete unordered stores should be visible.
635 #else
636  // C74404
637  // This is to address non-temporal store instructions (sfence needed).
638  // The clflush instruction is addressed either (mfence needed).
639  // Probably the non-temporal load monvtdqa instruction should also be
640  // addressed.
641  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
642  if (!__kmp_cpuinfo.initialized) {
643  __kmp_query_cpuid(&__kmp_cpuinfo);
644  }
645  if (!__kmp_cpuinfo.sse2) {
646  // CPU cannot execute SSE2 instructions.
647  } else {
648 #if KMP_COMPILER_ICC
649  _mm_mfence();
650 #elif KMP_COMPILER_MSVC
651  MemoryBarrier();
652 #else
653  __sync_synchronize();
654 #endif // KMP_COMPILER_ICC
655  }
656 #endif // KMP_MIC
657 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
658 // Nothing to see here move along
659 #elif KMP_ARCH_PPC64
660 // Nothing needed here (we have a real MB above).
661 #if KMP_OS_CNK
662  // The flushing thread needs to yield here; this prevents a
663  // busy-waiting thread from saturating the pipeline. flush is
664  // often used in loops like this:
665  // while (!flag) {
666  // #pragma omp flush(flag)
667  // }
668  // and adding the yield here is good for at least a 10x speedup
669  // when running >2 threads per core (on the NAS LU benchmark).
670  __kmp_yield(TRUE);
671 #endif
672 #else
673 #error Unknown or unsupported architecture
674 #endif
675 
676 #if OMPT_SUPPORT && OMPT_OPTIONAL
677  if (ompt_enabled.ompt_callback_flush) {
678  ompt_callbacks.ompt_callback(ompt_callback_flush)(
679  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
680  }
681 #endif
682 }
683 
684 /* -------------------------------------------------------------------------- */
692 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
693  KMP_COUNT_BLOCK(OMP_BARRIER);
694  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
695 
696  if (!TCR_4(__kmp_init_parallel))
697  __kmp_parallel_initialize();
698 
699  if (__kmp_env_consistency_check) {
700  if (loc == 0) {
701  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
702  }
703 
704  __kmp_check_barrier(global_tid, ct_barrier, loc);
705  }
706 
707 #if OMPT_SUPPORT
708  ompt_frame_t *ompt_frame;
709  if (ompt_enabled.enabled) {
710  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
711  if (ompt_frame->enter_frame.ptr == NULL)
712  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
713  OMPT_STORE_RETURN_ADDRESS(global_tid);
714  }
715 #endif
716  __kmp_threads[global_tid]->th.th_ident = loc;
717  // TODO: explicit barrier_wait_id:
718  // this function is called when 'barrier' directive is present or
719  // implicit barrier at the end of a worksharing construct.
720  // 1) better to add a per-thread barrier counter to a thread data structure
721  // 2) set to 0 when a new team is created
722  // 4) no sync is required
723 
724  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
725 #if OMPT_SUPPORT && OMPT_OPTIONAL
726  if (ompt_enabled.enabled) {
727  ompt_frame->enter_frame = ompt_data_none;
728  }
729 #endif
730 }
731 
732 /* The BARRIER for a MASTER section is always explicit */
739 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
740  int status = 0;
741 
742  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
743 
744  if (!TCR_4(__kmp_init_parallel))
745  __kmp_parallel_initialize();
746 
747  if (KMP_MASTER_GTID(global_tid)) {
748  KMP_COUNT_BLOCK(OMP_MASTER);
749  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
750  status = 1;
751  }
752 
753 #if OMPT_SUPPORT && OMPT_OPTIONAL
754  if (status) {
755  if (ompt_enabled.ompt_callback_master) {
756  kmp_info_t *this_thr = __kmp_threads[global_tid];
757  kmp_team_t *team = this_thr->th.th_team;
758 
759  int tid = __kmp_tid_from_gtid(global_tid);
760  ompt_callbacks.ompt_callback(ompt_callback_master)(
761  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
762  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
763  OMPT_GET_RETURN_ADDRESS(0));
764  }
765  }
766 #endif
767 
768  if (__kmp_env_consistency_check) {
769 #if KMP_USE_DYNAMIC_LOCK
770  if (status)
771  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
772  else
773  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
774 #else
775  if (status)
776  __kmp_push_sync(global_tid, ct_master, loc, NULL);
777  else
778  __kmp_check_sync(global_tid, ct_master, loc, NULL);
779 #endif
780  }
781 
782  return status;
783 }
784 
793 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
794  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
795 
796  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
797  KMP_POP_PARTITIONED_TIMER();
798 
799 #if OMPT_SUPPORT && OMPT_OPTIONAL
800  kmp_info_t *this_thr = __kmp_threads[global_tid];
801  kmp_team_t *team = this_thr->th.th_team;
802  if (ompt_enabled.ompt_callback_master) {
803  int tid = __kmp_tid_from_gtid(global_tid);
804  ompt_callbacks.ompt_callback(ompt_callback_master)(
805  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
806  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
807  OMPT_GET_RETURN_ADDRESS(0));
808  }
809 #endif
810 
811  if (__kmp_env_consistency_check) {
812  if (global_tid < 0)
813  KMP_WARNING(ThreadIdentInvalid);
814 
815  if (KMP_MASTER_GTID(global_tid))
816  __kmp_pop_sync(global_tid, ct_master, loc);
817  }
818 }
819 
827 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
828  int cid = 0;
829  kmp_info_t *th;
830  KMP_DEBUG_ASSERT(__kmp_init_serial);
831 
832  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
833 
834  if (!TCR_4(__kmp_init_parallel))
835  __kmp_parallel_initialize();
836 
837 #if USE_ITT_BUILD
838  __kmp_itt_ordered_prep(gtid);
839 // TODO: ordered_wait_id
840 #endif /* USE_ITT_BUILD */
841 
842  th = __kmp_threads[gtid];
843 
844 #if OMPT_SUPPORT && OMPT_OPTIONAL
845  kmp_team_t *team;
846  ompt_wait_id_t lck;
847  void *codeptr_ra;
848  if (ompt_enabled.enabled) {
849  OMPT_STORE_RETURN_ADDRESS(gtid);
850  team = __kmp_team_from_gtid(gtid);
851  lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
852  /* OMPT state update */
853  th->th.ompt_thread_info.wait_id = lck;
854  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
855 
856  /* OMPT event callback */
857  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
858  if (ompt_enabled.ompt_callback_mutex_acquire) {
859  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
860  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
861  (ompt_wait_id_t)lck, codeptr_ra);
862  }
863  }
864 #endif
865 
866  if (th->th.th_dispatch->th_deo_fcn != 0)
867  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
868  else
869  __kmp_parallel_deo(&gtid, &cid, loc);
870 
871 #if OMPT_SUPPORT && OMPT_OPTIONAL
872  if (ompt_enabled.enabled) {
873  /* OMPT state update */
874  th->th.ompt_thread_info.state = ompt_state_work_parallel;
875  th->th.ompt_thread_info.wait_id = 0;
876 
877  /* OMPT event callback */
878  if (ompt_enabled.ompt_callback_mutex_acquired) {
879  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
880  ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
881  }
882  }
883 #endif
884 
885 #if USE_ITT_BUILD
886  __kmp_itt_ordered_start(gtid);
887 #endif /* USE_ITT_BUILD */
888 }
889 
897 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
898  int cid = 0;
899  kmp_info_t *th;
900 
901  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
902 
903 #if USE_ITT_BUILD
904  __kmp_itt_ordered_end(gtid);
905 // TODO: ordered_wait_id
906 #endif /* USE_ITT_BUILD */
907 
908  th = __kmp_threads[gtid];
909 
910  if (th->th.th_dispatch->th_dxo_fcn != 0)
911  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
912  else
913  __kmp_parallel_dxo(&gtid, &cid, loc);
914 
915 #if OMPT_SUPPORT && OMPT_OPTIONAL
916  OMPT_STORE_RETURN_ADDRESS(gtid);
917  if (ompt_enabled.ompt_callback_mutex_released) {
918  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
919  ompt_mutex_ordered,
920  (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
921  OMPT_LOAD_RETURN_ADDRESS(gtid));
922  }
923 #endif
924 }
925 
926 #if KMP_USE_DYNAMIC_LOCK
927 
928 static __forceinline void
929 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
930  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
931  // Pointer to the allocated indirect lock is written to crit, while indexing
932  // is ignored.
933  void *idx;
934  kmp_indirect_lock_t **lck;
935  lck = (kmp_indirect_lock_t **)crit;
936  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
937  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
938  KMP_SET_I_LOCK_LOCATION(ilk, loc);
939  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
940  KA_TRACE(20,
941  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
942 #if USE_ITT_BUILD
943  __kmp_itt_critical_creating(ilk->lock, loc);
944 #endif
945  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
946  if (status == 0) {
947 #if USE_ITT_BUILD
948  __kmp_itt_critical_destroyed(ilk->lock);
949 #endif
950  // We don't really need to destroy the unclaimed lock here since it will be
951  // cleaned up at program exit.
952  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
953  }
954  KMP_DEBUG_ASSERT(*lck != NULL);
955 }
956 
957 // Fast-path acquire tas lock
958 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
959  { \
960  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
961  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
962  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
963  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
964  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
965  kmp_uint32 spins; \
966  KMP_FSYNC_PREPARE(l); \
967  KMP_INIT_YIELD(spins); \
968  if (TCR_4(__kmp_nth) > \
969  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
970  KMP_YIELD(TRUE); \
971  } else { \
972  KMP_YIELD_SPIN(spins); \
973  } \
974  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
975  while ( \
976  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
977  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
978  __kmp_spin_backoff(&backoff); \
979  if (TCR_4(__kmp_nth) > \
980  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
981  KMP_YIELD(TRUE); \
982  } else { \
983  KMP_YIELD_SPIN(spins); \
984  } \
985  } \
986  } \
987  KMP_FSYNC_ACQUIRED(l); \
988  }
989 
990 // Fast-path test tas lock
991 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
992  { \
993  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
994  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
995  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
996  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
997  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
998  }
999 
1000 // Fast-path release tas lock
1001 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1002  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1003 
1004 #if KMP_USE_FUTEX
1005 
1006 #include <sys/syscall.h>
1007 #include <unistd.h>
1008 #ifndef FUTEX_WAIT
1009 #define FUTEX_WAIT 0
1010 #endif
1011 #ifndef FUTEX_WAKE
1012 #define FUTEX_WAKE 1
1013 #endif
1014 
1015 // Fast-path acquire futex lock
1016 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1017  { \
1018  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1019  kmp_int32 gtid_code = (gtid + 1) << 1; \
1020  KMP_MB(); \
1021  KMP_FSYNC_PREPARE(ftx); \
1022  kmp_int32 poll_val; \
1023  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1024  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1025  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1026  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1027  if (!cond) { \
1028  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1029  poll_val | \
1030  KMP_LOCK_BUSY(1, futex))) { \
1031  continue; \
1032  } \
1033  poll_val |= KMP_LOCK_BUSY(1, futex); \
1034  } \
1035  kmp_int32 rc; \
1036  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1037  NULL, NULL, 0)) != 0) { \
1038  continue; \
1039  } \
1040  gtid_code |= 1; \
1041  } \
1042  KMP_FSYNC_ACQUIRED(ftx); \
1043  }
1044 
1045 // Fast-path test futex lock
1046 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1047  { \
1048  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1049  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1050  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1051  KMP_FSYNC_ACQUIRED(ftx); \
1052  rc = TRUE; \
1053  } else { \
1054  rc = FALSE; \
1055  } \
1056  }
1057 
1058 // Fast-path release futex lock
1059 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1060  { \
1061  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1062  KMP_MB(); \
1063  KMP_FSYNC_RELEASING(ftx); \
1064  kmp_int32 poll_val = \
1065  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1066  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1067  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1068  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1069  } \
1070  KMP_MB(); \
1071  KMP_YIELD(TCR_4(__kmp_nth) > \
1072  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1073  }
1074 
1075 #endif // KMP_USE_FUTEX
1076 
1077 #else // KMP_USE_DYNAMIC_LOCK
1078 
1079 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1080  ident_t const *loc,
1081  kmp_int32 gtid) {
1082  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1083 
1084  // Because of the double-check, the following load doesn't need to be volatile
1085  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1086 
1087  if (lck == NULL) {
1088  void *idx;
1089 
1090  // Allocate & initialize the lock.
1091  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1092  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1093  __kmp_init_user_lock_with_checks(lck);
1094  __kmp_set_user_lock_location(lck, loc);
1095 #if USE_ITT_BUILD
1096  __kmp_itt_critical_creating(lck);
1097 // __kmp_itt_critical_creating() should be called *before* the first usage
1098 // of underlying lock. It is the only place where we can guarantee it. There
1099 // are chances the lock will destroyed with no usage, but it is not a
1100 // problem, because this is not real event seen by user but rather setting
1101 // name for object (lock). See more details in kmp_itt.h.
1102 #endif /* USE_ITT_BUILD */
1103 
1104  // Use a cmpxchg instruction to slam the start of the critical section with
1105  // the lock pointer. If another thread beat us to it, deallocate the lock,
1106  // and use the lock that the other thread allocated.
1107  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1108 
1109  if (status == 0) {
1110 // Deallocate the lock and reload the value.
1111 #if USE_ITT_BUILD
1112  __kmp_itt_critical_destroyed(lck);
1113 // Let ITT know the lock is destroyed and the same memory location may be reused
1114 // for another purpose.
1115 #endif /* USE_ITT_BUILD */
1116  __kmp_destroy_user_lock_with_checks(lck);
1117  __kmp_user_lock_free(&idx, gtid, lck);
1118  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1119  KMP_DEBUG_ASSERT(lck != NULL);
1120  }
1121  }
1122  return lck;
1123 }
1124 
1125 #endif // KMP_USE_DYNAMIC_LOCK
1126 
1137 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1138  kmp_critical_name *crit) {
1139 #if KMP_USE_DYNAMIC_LOCK
1140 #if OMPT_SUPPORT && OMPT_OPTIONAL
1141  OMPT_STORE_RETURN_ADDRESS(global_tid);
1142 #endif // OMPT_SUPPORT
1143  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1144 #else
1145  KMP_COUNT_BLOCK(OMP_CRITICAL);
1146 #if OMPT_SUPPORT && OMPT_OPTIONAL
1147  ompt_state_t prev_state = ompt_state_undefined;
1148  ompt_thread_info_t ti;
1149 #endif
1150  kmp_user_lock_p lck;
1151 
1152  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1153 
1154  // TODO: add THR_OVHD_STATE
1155 
1156  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1157  KMP_CHECK_USER_LOCK_INIT();
1158 
1159  if ((__kmp_user_lock_kind == lk_tas) &&
1160  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1161  lck = (kmp_user_lock_p)crit;
1162  }
1163 #if KMP_USE_FUTEX
1164  else if ((__kmp_user_lock_kind == lk_futex) &&
1165  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1166  lck = (kmp_user_lock_p)crit;
1167  }
1168 #endif
1169  else { // ticket, queuing or drdpa
1170  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1171  }
1172 
1173  if (__kmp_env_consistency_check)
1174  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1175 
1176 // since the critical directive binds to all threads, not just the current
1177 // team we have to check this even if we are in a serialized team.
1178 // also, even if we are the uber thread, we still have to conduct the lock,
1179 // as we have to contend with sibling threads.
1180 
1181 #if USE_ITT_BUILD
1182  __kmp_itt_critical_acquiring(lck);
1183 #endif /* USE_ITT_BUILD */
1184 #if OMPT_SUPPORT && OMPT_OPTIONAL
1185  OMPT_STORE_RETURN_ADDRESS(gtid);
1186  void *codeptr_ra = NULL;
1187  if (ompt_enabled.enabled) {
1188  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1189  /* OMPT state update */
1190  prev_state = ti.state;
1191  ti.wait_id = (ompt_wait_id_t)lck;
1192  ti.state = ompt_state_wait_critical;
1193 
1194  /* OMPT event callback */
1195  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1196  if (ompt_enabled.ompt_callback_mutex_acquire) {
1197  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1198  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1199  (ompt_wait_id_t)crit, codeptr_ra);
1200  }
1201  }
1202 #endif
1203  // Value of 'crit' should be good for using as a critical_id of the critical
1204  // section directive.
1205  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1206 
1207 #if USE_ITT_BUILD
1208  __kmp_itt_critical_acquired(lck);
1209 #endif /* USE_ITT_BUILD */
1210 #if OMPT_SUPPORT && OMPT_OPTIONAL
1211  if (ompt_enabled.enabled) {
1212  /* OMPT state update */
1213  ti.state = prev_state;
1214  ti.wait_id = 0;
1215 
1216  /* OMPT event callback */
1217  if (ompt_enabled.ompt_callback_mutex_acquired) {
1218  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1219  ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
1220  }
1221  }
1222 #endif
1223  KMP_POP_PARTITIONED_TIMER();
1224 
1225  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1226  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1227 #endif // KMP_USE_DYNAMIC_LOCK
1228 }
1229 
1230 #if KMP_USE_DYNAMIC_LOCK
1231 
1232 // Converts the given hint to an internal lock implementation
1233 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1234 #if KMP_USE_TSX
1235 #define KMP_TSX_LOCK(seq) lockseq_##seq
1236 #else
1237 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1238 #endif
1239 
1240 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1241 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1242 #else
1243 #define KMP_CPUINFO_RTM 0
1244 #endif
1245 
1246  // Hints that do not require further logic
1247  if (hint & kmp_lock_hint_hle)
1248  return KMP_TSX_LOCK(hle);
1249  if (hint & kmp_lock_hint_rtm)
1250  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1251  if (hint & kmp_lock_hint_adaptive)
1252  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1253 
1254  // Rule out conflicting hints first by returning the default lock
1255  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1256  return __kmp_user_lock_seq;
1257  if ((hint & omp_lock_hint_speculative) &&
1258  (hint & omp_lock_hint_nonspeculative))
1259  return __kmp_user_lock_seq;
1260 
1261  // Do not even consider speculation when it appears to be contended
1262  if (hint & omp_lock_hint_contended)
1263  return lockseq_queuing;
1264 
1265  // Uncontended lock without speculation
1266  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1267  return lockseq_tas;
1268 
1269  // HLE lock for speculation
1270  if (hint & omp_lock_hint_speculative)
1271  return KMP_TSX_LOCK(hle);
1272 
1273  return __kmp_user_lock_seq;
1274 }
1275 
1276 #if OMPT_SUPPORT && OMPT_OPTIONAL
1277 #if KMP_USE_DYNAMIC_LOCK
1278 static kmp_mutex_impl_t
1279 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1280  if (user_lock) {
1281  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1282  case 0:
1283  break;
1284 #if KMP_USE_FUTEX
1285  case locktag_futex:
1286  return kmp_mutex_impl_queuing;
1287 #endif
1288  case locktag_tas:
1289  return kmp_mutex_impl_spin;
1290 #if KMP_USE_TSX
1291  case locktag_hle:
1292  return kmp_mutex_impl_speculative;
1293 #endif
1294  default:
1295  return kmp_mutex_impl_none;
1296  }
1297  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1298  }
1299  KMP_ASSERT(ilock);
1300  switch (ilock->type) {
1301 #if KMP_USE_TSX
1302  case locktag_adaptive:
1303  case locktag_rtm:
1304  return kmp_mutex_impl_speculative;
1305 #endif
1306  case locktag_nested_tas:
1307  return kmp_mutex_impl_spin;
1308 #if KMP_USE_FUTEX
1309  case locktag_nested_futex:
1310 #endif
1311  case locktag_ticket:
1312  case locktag_queuing:
1313  case locktag_drdpa:
1314  case locktag_nested_ticket:
1315  case locktag_nested_queuing:
1316  case locktag_nested_drdpa:
1317  return kmp_mutex_impl_queuing;
1318  default:
1319  return kmp_mutex_impl_none;
1320  }
1321 }
1322 #else
1323 // For locks without dynamic binding
1324 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1325  switch (__kmp_user_lock_kind) {
1326  case lk_tas:
1327  return kmp_mutex_impl_spin;
1328 #if KMP_USE_FUTEX
1329  case lk_futex:
1330 #endif
1331  case lk_ticket:
1332  case lk_queuing:
1333  case lk_drdpa:
1334  return kmp_mutex_impl_queuing;
1335 #if KMP_USE_TSX
1336  case lk_hle:
1337  case lk_rtm:
1338  case lk_adaptive:
1339  return kmp_mutex_impl_speculative;
1340 #endif
1341  default:
1342  return kmp_mutex_impl_none;
1343  }
1344 }
1345 #endif // KMP_USE_DYNAMIC_LOCK
1346 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1347 
1361 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1362  kmp_critical_name *crit, uint32_t hint) {
1363  KMP_COUNT_BLOCK(OMP_CRITICAL);
1364  kmp_user_lock_p lck;
1365 #if OMPT_SUPPORT && OMPT_OPTIONAL
1366  ompt_state_t prev_state = ompt_state_undefined;
1367  ompt_thread_info_t ti;
1368  // This is the case, if called from __kmpc_critical:
1369  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1370  if (!codeptr)
1371  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1372 #endif
1373 
1374  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1375 
1376  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1377  // Check if it is initialized.
1378  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1379  if (*lk == 0) {
1380  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1381  if (KMP_IS_D_LOCK(lckseq)) {
1382  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1383  KMP_GET_D_TAG(lckseq));
1384  } else {
1385  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1386  }
1387  }
1388  // Branch for accessing the actual lock object and set operation. This
1389  // branching is inevitable since this lock initialization does not follow the
1390  // normal dispatch path (lock table is not used).
1391  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1392  lck = (kmp_user_lock_p)lk;
1393  if (__kmp_env_consistency_check) {
1394  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1395  __kmp_map_hint_to_lock(hint));
1396  }
1397 #if USE_ITT_BUILD
1398  __kmp_itt_critical_acquiring(lck);
1399 #endif
1400 #if OMPT_SUPPORT && OMPT_OPTIONAL
1401  if (ompt_enabled.enabled) {
1402  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1403  /* OMPT state update */
1404  prev_state = ti.state;
1405  ti.wait_id = (ompt_wait_id_t)lck;
1406  ti.state = ompt_state_wait_critical;
1407 
1408  /* OMPT event callback */
1409  if (ompt_enabled.ompt_callback_mutex_acquire) {
1410  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1411  ompt_mutex_critical, (unsigned int)hint,
1412  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
1413  }
1414  }
1415 #endif
1416 #if KMP_USE_INLINED_TAS
1417  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1418  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1419  } else
1420 #elif KMP_USE_INLINED_FUTEX
1421  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1422  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1423  } else
1424 #endif
1425  {
1426  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1427  }
1428  } else {
1429  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1430  lck = ilk->lock;
1431  if (__kmp_env_consistency_check) {
1432  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1433  __kmp_map_hint_to_lock(hint));
1434  }
1435 #if USE_ITT_BUILD
1436  __kmp_itt_critical_acquiring(lck);
1437 #endif
1438 #if OMPT_SUPPORT && OMPT_OPTIONAL
1439  if (ompt_enabled.enabled) {
1440  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1441  /* OMPT state update */
1442  prev_state = ti.state;
1443  ti.wait_id = (ompt_wait_id_t)lck;
1444  ti.state = ompt_state_wait_critical;
1445 
1446  /* OMPT event callback */
1447  if (ompt_enabled.ompt_callback_mutex_acquire) {
1448  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1449  ompt_mutex_critical, (unsigned int)hint,
1450  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
1451  }
1452  }
1453 #endif
1454  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1455  }
1456  KMP_POP_PARTITIONED_TIMER();
1457 
1458 #if USE_ITT_BUILD
1459  __kmp_itt_critical_acquired(lck);
1460 #endif /* USE_ITT_BUILD */
1461 #if OMPT_SUPPORT && OMPT_OPTIONAL
1462  if (ompt_enabled.enabled) {
1463  /* OMPT state update */
1464  ti.state = prev_state;
1465  ti.wait_id = 0;
1466 
1467  /* OMPT event callback */
1468  if (ompt_enabled.ompt_callback_mutex_acquired) {
1469  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1470  ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
1471  }
1472  }
1473 #endif
1474 
1475  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1476  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1477 } // __kmpc_critical_with_hint
1478 
1479 #endif // KMP_USE_DYNAMIC_LOCK
1480 
1490 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1491  kmp_critical_name *crit) {
1492  kmp_user_lock_p lck;
1493 
1494  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1495 
1496 #if KMP_USE_DYNAMIC_LOCK
1497  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1498  lck = (kmp_user_lock_p)crit;
1499  KMP_ASSERT(lck != NULL);
1500  if (__kmp_env_consistency_check) {
1501  __kmp_pop_sync(global_tid, ct_critical, loc);
1502  }
1503 #if USE_ITT_BUILD
1504  __kmp_itt_critical_releasing(lck);
1505 #endif
1506 #if KMP_USE_INLINED_TAS
1507  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1508  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1509  } else
1510 #elif KMP_USE_INLINED_FUTEX
1511  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1512  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1513  } else
1514 #endif
1515  {
1516  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1517  }
1518  } else {
1519  kmp_indirect_lock_t *ilk =
1520  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1521  KMP_ASSERT(ilk != NULL);
1522  lck = ilk->lock;
1523  if (__kmp_env_consistency_check) {
1524  __kmp_pop_sync(global_tid, ct_critical, loc);
1525  }
1526 #if USE_ITT_BUILD
1527  __kmp_itt_critical_releasing(lck);
1528 #endif
1529  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1530  }
1531 
1532 #else // KMP_USE_DYNAMIC_LOCK
1533 
1534  if ((__kmp_user_lock_kind == lk_tas) &&
1535  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1536  lck = (kmp_user_lock_p)crit;
1537  }
1538 #if KMP_USE_FUTEX
1539  else if ((__kmp_user_lock_kind == lk_futex) &&
1540  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1541  lck = (kmp_user_lock_p)crit;
1542  }
1543 #endif
1544  else { // ticket, queuing or drdpa
1545  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1546  }
1547 
1548  KMP_ASSERT(lck != NULL);
1549 
1550  if (__kmp_env_consistency_check)
1551  __kmp_pop_sync(global_tid, ct_critical, loc);
1552 
1553 #if USE_ITT_BUILD
1554  __kmp_itt_critical_releasing(lck);
1555 #endif /* USE_ITT_BUILD */
1556  // Value of 'crit' should be good for using as a critical_id of the critical
1557  // section directive.
1558  __kmp_release_user_lock_with_checks(lck, global_tid);
1559 
1560 #endif // KMP_USE_DYNAMIC_LOCK
1561 
1562 #if OMPT_SUPPORT && OMPT_OPTIONAL
1563  /* OMPT release event triggers after lock is released; place here to trigger
1564  * for all #if branches */
1565  OMPT_STORE_RETURN_ADDRESS(global_tid);
1566  if (ompt_enabled.ompt_callback_mutex_released) {
1567  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1568  ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
1569  }
1570 #endif
1571 
1572  KMP_POP_PARTITIONED_TIMER();
1573  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1574 }
1575 
1585 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1586  int status;
1587 
1588  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1589 
1590  if (!TCR_4(__kmp_init_parallel))
1591  __kmp_parallel_initialize();
1592 
1593  if (__kmp_env_consistency_check)
1594  __kmp_check_barrier(global_tid, ct_barrier, loc);
1595 
1596 #if OMPT_SUPPORT
1597  ompt_frame_t *ompt_frame;
1598  if (ompt_enabled.enabled) {
1599  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1600  if (ompt_frame->enter_frame.ptr == NULL)
1601  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1602  OMPT_STORE_RETURN_ADDRESS(global_tid);
1603  }
1604 #endif
1605 #if USE_ITT_NOTIFY
1606  __kmp_threads[global_tid]->th.th_ident = loc;
1607 #endif
1608  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1609 #if OMPT_SUPPORT && OMPT_OPTIONAL
1610  if (ompt_enabled.enabled) {
1611  ompt_frame->enter_frame = ompt_data_none;
1612  }
1613 #endif
1614 
1615  return (status != 0) ? 0 : 1;
1616 }
1617 
1627 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1628  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1629 
1630  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1631 }
1632 
1643 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1644  kmp_int32 ret;
1645 
1646  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1647 
1648  if (!TCR_4(__kmp_init_parallel))
1649  __kmp_parallel_initialize();
1650 
1651  if (__kmp_env_consistency_check) {
1652  if (loc == 0) {
1653  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1654  }
1655  __kmp_check_barrier(global_tid, ct_barrier, loc);
1656  }
1657 
1658 #if OMPT_SUPPORT
1659  ompt_frame_t *ompt_frame;
1660  if (ompt_enabled.enabled) {
1661  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1662  if (ompt_frame->enter_frame.ptr == NULL)
1663  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1664  OMPT_STORE_RETURN_ADDRESS(global_tid);
1665  }
1666 #endif
1667 #if USE_ITT_NOTIFY
1668  __kmp_threads[global_tid]->th.th_ident = loc;
1669 #endif
1670  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1671 #if OMPT_SUPPORT && OMPT_OPTIONAL
1672  if (ompt_enabled.enabled) {
1673  ompt_frame->enter_frame = ompt_data_none;
1674  }
1675 #endif
1676 
1677  ret = __kmpc_master(loc, global_tid);
1678 
1679  if (__kmp_env_consistency_check) {
1680  /* there's no __kmpc_end_master called; so the (stats) */
1681  /* actions of __kmpc_end_master are done here */
1682 
1683  if (global_tid < 0) {
1684  KMP_WARNING(ThreadIdentInvalid);
1685  }
1686  if (ret) {
1687  /* only one thread should do the pop since only */
1688  /* one did the push (see __kmpc_master()) */
1689 
1690  __kmp_pop_sync(global_tid, ct_master, loc);
1691  }
1692  }
1693 
1694  return (ret);
1695 }
1696 
1697 /* The BARRIER for a SINGLE process section is always explicit */
1709 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1710  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1711 
1712  if (rc) {
1713  // We are going to execute the single statement, so we should count it.
1714  KMP_COUNT_BLOCK(OMP_SINGLE);
1715  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1716  }
1717 
1718 #if OMPT_SUPPORT && OMPT_OPTIONAL
1719  kmp_info_t *this_thr = __kmp_threads[global_tid];
1720  kmp_team_t *team = this_thr->th.th_team;
1721  int tid = __kmp_tid_from_gtid(global_tid);
1722 
1723  if (ompt_enabled.enabled) {
1724  if (rc) {
1725  if (ompt_enabled.ompt_callback_work) {
1726  ompt_callbacks.ompt_callback(ompt_callback_work)(
1727  ompt_work_single_executor, ompt_scope_begin,
1728  &(team->t.ompt_team_info.parallel_data),
1729  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1730  1, OMPT_GET_RETURN_ADDRESS(0));
1731  }
1732  } else {
1733  if (ompt_enabled.ompt_callback_work) {
1734  ompt_callbacks.ompt_callback(ompt_callback_work)(
1735  ompt_work_single_other, ompt_scope_begin,
1736  &(team->t.ompt_team_info.parallel_data),
1737  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1738  1, OMPT_GET_RETURN_ADDRESS(0));
1739  ompt_callbacks.ompt_callback(ompt_callback_work)(
1740  ompt_work_single_other, ompt_scope_end,
1741  &(team->t.ompt_team_info.parallel_data),
1742  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1743  1, OMPT_GET_RETURN_ADDRESS(0));
1744  }
1745  }
1746  }
1747 #endif
1748 
1749  return rc;
1750 }
1751 
1761 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1762  __kmp_exit_single(global_tid);
1763  KMP_POP_PARTITIONED_TIMER();
1764 
1765 #if OMPT_SUPPORT && OMPT_OPTIONAL
1766  kmp_info_t *this_thr = __kmp_threads[global_tid];
1767  kmp_team_t *team = this_thr->th.th_team;
1768  int tid = __kmp_tid_from_gtid(global_tid);
1769 
1770  if (ompt_enabled.ompt_callback_work) {
1771  ompt_callbacks.ompt_callback(ompt_callback_work)(
1772  ompt_work_single_executor, ompt_scope_end,
1773  &(team->t.ompt_team_info.parallel_data),
1774  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1775  OMPT_GET_RETURN_ADDRESS(0));
1776  }
1777 #endif
1778 }
1779 
1787 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1788  KMP_POP_PARTITIONED_TIMER();
1789  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1790 
1791 #if OMPT_SUPPORT && OMPT_OPTIONAL
1792  if (ompt_enabled.ompt_callback_work) {
1793  ompt_work_t ompt_work_type = ompt_work_loop;
1794  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1795  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1796  // Determine workshare type
1797  if (loc != NULL) {
1798  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1799  ompt_work_type = ompt_work_loop;
1800  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1801  ompt_work_type = ompt_work_sections;
1802  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1803  ompt_work_type = ompt_work_distribute;
1804  } else {
1805  // use default set above.
1806  // a warning about this case is provided in __kmpc_for_static_init
1807  }
1808  KMP_DEBUG_ASSERT(ompt_work_type);
1809  }
1810  ompt_callbacks.ompt_callback(ompt_callback_work)(
1811  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1812  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1813  }
1814 #endif
1815  if (__kmp_env_consistency_check)
1816  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1817 }
1818 
1819 // User routines which take C-style arguments (call by value)
1820 // different from the Fortran equivalent routines
1821 
1822 void ompc_set_num_threads(int arg) {
1823  // !!!!! TODO: check the per-task binding
1824  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1825 }
1826 
1827 void ompc_set_dynamic(int flag) {
1828  kmp_info_t *thread;
1829 
1830  /* For the thread-private implementation of the internal controls */
1831  thread = __kmp_entry_thread();
1832 
1833  __kmp_save_internal_controls(thread);
1834 
1835  set__dynamic(thread, flag ? TRUE : FALSE);
1836 }
1837 
1838 void ompc_set_nested(int flag) {
1839  kmp_info_t *thread;
1840 
1841  /* For the thread-private internal controls implementation */
1842  thread = __kmp_entry_thread();
1843 
1844  __kmp_save_internal_controls(thread);
1845 
1846  set__nested(thread, flag ? TRUE : FALSE);
1847 }
1848 
1849 void ompc_set_max_active_levels(int max_active_levels) {
1850  /* TO DO */
1851  /* we want per-task implementation of this internal control */
1852 
1853  /* For the per-thread internal controls implementation */
1854  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1855 }
1856 
1857 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1858  // !!!!! TODO: check the per-task binding
1859  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1860 }
1861 
1862 int ompc_get_ancestor_thread_num(int level) {
1863  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1864 }
1865 
1866 int ompc_get_team_size(int level) {
1867  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1868 }
1869 
1870 #if OMP_50_ENABLED
1871 /* OpenMP 5.0 Affinity Format API */
1872 
1873 void ompc_set_affinity_format(char const *format) {
1874  if (!__kmp_init_serial) {
1875  __kmp_serial_initialize();
1876  }
1877  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1878  format, KMP_STRLEN(format) + 1);
1879 }
1880 
1881 size_t ompc_get_affinity_format(char *buffer, size_t size) {
1882  size_t format_size;
1883  if (!__kmp_init_serial) {
1884  __kmp_serial_initialize();
1885  }
1886  format_size = KMP_STRLEN(__kmp_affinity_format);
1887  if (buffer && size) {
1888  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1889  format_size + 1);
1890  }
1891  return format_size;
1892 }
1893 
1894 void ompc_display_affinity(char const *format) {
1895  int gtid;
1896  if (!TCR_4(__kmp_init_middle)) {
1897  __kmp_middle_initialize();
1898  }
1899  gtid = __kmp_get_gtid();
1900  __kmp_aux_display_affinity(gtid, format);
1901 }
1902 
1903 size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1904  char const *format) {
1905  int gtid;
1906  size_t num_required;
1907  kmp_str_buf_t capture_buf;
1908  if (!TCR_4(__kmp_init_middle)) {
1909  __kmp_middle_initialize();
1910  }
1911  gtid = __kmp_get_gtid();
1912  __kmp_str_buf_init(&capture_buf);
1913  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1914  if (buffer && buf_size) {
1915  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1916  capture_buf.used + 1);
1917  }
1918  __kmp_str_buf_free(&capture_buf);
1919  return num_required;
1920 }
1921 #endif /* OMP_50_ENABLED */
1922 
1923 void kmpc_set_stacksize(int arg) {
1924  // __kmp_aux_set_stacksize initializes the library if needed
1925  __kmp_aux_set_stacksize(arg);
1926 }
1927 
1928 void kmpc_set_stacksize_s(size_t arg) {
1929  // __kmp_aux_set_stacksize initializes the library if needed
1930  __kmp_aux_set_stacksize(arg);
1931 }
1932 
1933 void kmpc_set_blocktime(int arg) {
1934  int gtid, tid;
1935  kmp_info_t *thread;
1936 
1937  gtid = __kmp_entry_gtid();
1938  tid = __kmp_tid_from_gtid(gtid);
1939  thread = __kmp_thread_from_gtid(gtid);
1940 
1941  __kmp_aux_set_blocktime(arg, thread, tid);
1942 }
1943 
1944 void kmpc_set_library(int arg) {
1945  // __kmp_user_set_library initializes the library if needed
1946  __kmp_user_set_library((enum library_type)arg);
1947 }
1948 
1949 void kmpc_set_defaults(char const *str) {
1950  // __kmp_aux_set_defaults initializes the library if needed
1951  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1952 }
1953 
1954 void kmpc_set_disp_num_buffers(int arg) {
1955  // ignore after initialization because some teams have already
1956  // allocated dispatch buffers
1957  if (__kmp_init_serial == 0 && arg > 0)
1958  __kmp_dispatch_num_buffers = arg;
1959 }
1960 
1961 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1962 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1963  return -1;
1964 #else
1965  if (!TCR_4(__kmp_init_middle)) {
1966  __kmp_middle_initialize();
1967  }
1968  return __kmp_aux_set_affinity_mask_proc(proc, mask);
1969 #endif
1970 }
1971 
1972 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1973 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1974  return -1;
1975 #else
1976  if (!TCR_4(__kmp_init_middle)) {
1977  __kmp_middle_initialize();
1978  }
1979  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1980 #endif
1981 }
1982 
1983 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
1984 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1985  return -1;
1986 #else
1987  if (!TCR_4(__kmp_init_middle)) {
1988  __kmp_middle_initialize();
1989  }
1990  return __kmp_aux_get_affinity_mask_proc(proc, mask);
1991 #endif
1992 }
1993 
1994 /* -------------------------------------------------------------------------- */
2039 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2040  void *cpy_data, void (*cpy_func)(void *, void *),
2041  kmp_int32 didit) {
2042  void **data_ptr;
2043 
2044  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2045 
2046  KMP_MB();
2047 
2048  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2049 
2050  if (__kmp_env_consistency_check) {
2051  if (loc == 0) {
2052  KMP_WARNING(ConstructIdentInvalid);
2053  }
2054  }
2055 
2056  // ToDo: Optimize the following two barriers into some kind of split barrier
2057 
2058  if (didit)
2059  *data_ptr = cpy_data;
2060 
2061 #if OMPT_SUPPORT
2062  ompt_frame_t *ompt_frame;
2063  if (ompt_enabled.enabled) {
2064  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2065  if (ompt_frame->enter_frame.ptr == NULL)
2066  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2067  OMPT_STORE_RETURN_ADDRESS(gtid);
2068  }
2069 #endif
2070 /* This barrier is not a barrier region boundary */
2071 #if USE_ITT_NOTIFY
2072  __kmp_threads[gtid]->th.th_ident = loc;
2073 #endif
2074  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2075 
2076  if (!didit)
2077  (*cpy_func)(cpy_data, *data_ptr);
2078 
2079 // Consider next barrier a user-visible barrier for barrier region boundaries
2080 // Nesting checks are already handled by the single construct checks
2081 
2082 #if OMPT_SUPPORT
2083  if (ompt_enabled.enabled) {
2084  OMPT_STORE_RETURN_ADDRESS(gtid);
2085  }
2086 #endif
2087 #if USE_ITT_NOTIFY
2088  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2089 // tasks can overwrite the location)
2090 #endif
2091  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2092 #if OMPT_SUPPORT && OMPT_OPTIONAL
2093  if (ompt_enabled.enabled) {
2094  ompt_frame->enter_frame = ompt_data_none;
2095  }
2096 #endif
2097 }
2098 
2099 /* -------------------------------------------------------------------------- */
2100 
2101 #define INIT_LOCK __kmp_init_user_lock_with_checks
2102 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2103 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2104 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2105 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2106 #define ACQUIRE_NESTED_LOCK_TIMED \
2107  __kmp_acquire_nested_user_lock_with_checks_timed
2108 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2109 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2110 #define TEST_LOCK __kmp_test_user_lock_with_checks
2111 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2112 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2113 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2114 
2115 // TODO: Make check abort messages use location info & pass it into
2116 // with_checks routines
2117 
2118 #if KMP_USE_DYNAMIC_LOCK
2119 
2120 // internal lock initializer
2121 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2122  kmp_dyna_lockseq_t seq) {
2123  if (KMP_IS_D_LOCK(seq)) {
2124  KMP_INIT_D_LOCK(lock, seq);
2125 #if USE_ITT_BUILD
2126  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2127 #endif
2128  } else {
2129  KMP_INIT_I_LOCK(lock, seq);
2130 #if USE_ITT_BUILD
2131  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2132  __kmp_itt_lock_creating(ilk->lock, loc);
2133 #endif
2134  }
2135 }
2136 
2137 // internal nest lock initializer
2138 static __forceinline void
2139 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2140  kmp_dyna_lockseq_t seq) {
2141 #if KMP_USE_TSX
2142  // Don't have nested lock implementation for speculative locks
2143  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2144  seq = __kmp_user_lock_seq;
2145 #endif
2146  switch (seq) {
2147  case lockseq_tas:
2148  seq = lockseq_nested_tas;
2149  break;
2150 #if KMP_USE_FUTEX
2151  case lockseq_futex:
2152  seq = lockseq_nested_futex;
2153  break;
2154 #endif
2155  case lockseq_ticket:
2156  seq = lockseq_nested_ticket;
2157  break;
2158  case lockseq_queuing:
2159  seq = lockseq_nested_queuing;
2160  break;
2161  case lockseq_drdpa:
2162  seq = lockseq_nested_drdpa;
2163  break;
2164  default:
2165  seq = lockseq_nested_queuing;
2166  }
2167  KMP_INIT_I_LOCK(lock, seq);
2168 #if USE_ITT_BUILD
2169  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2170  __kmp_itt_lock_creating(ilk->lock, loc);
2171 #endif
2172 }
2173 
2174 /* initialize the lock with a hint */
2175 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2176  uintptr_t hint) {
2177  KMP_DEBUG_ASSERT(__kmp_init_serial);
2178  if (__kmp_env_consistency_check && user_lock == NULL) {
2179  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2180  }
2181 
2182  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2183 
2184 #if OMPT_SUPPORT && OMPT_OPTIONAL
2185  // This is the case, if called from omp_init_lock_with_hint:
2186  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2187  if (!codeptr)
2188  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2189  if (ompt_enabled.ompt_callback_lock_init) {
2190  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2191  ompt_mutex_lock, (omp_lock_hint_t)hint,
2192  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2193  codeptr);
2194  }
2195 #endif
2196 }
2197 
2198 /* initialize the lock with a hint */
2199 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2200  void **user_lock, uintptr_t hint) {
2201  KMP_DEBUG_ASSERT(__kmp_init_serial);
2202  if (__kmp_env_consistency_check && user_lock == NULL) {
2203  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2204  }
2205 
2206  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2207 
2208 #if OMPT_SUPPORT && OMPT_OPTIONAL
2209  // This is the case, if called from omp_init_lock_with_hint:
2210  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2211  if (!codeptr)
2212  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2213  if (ompt_enabled.ompt_callback_lock_init) {
2214  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2215  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2216  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2217  codeptr);
2218  }
2219 #endif
2220 }
2221 
2222 #endif // KMP_USE_DYNAMIC_LOCK
2223 
2224 /* initialize the lock */
2225 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2226 #if KMP_USE_DYNAMIC_LOCK
2227 
2228  KMP_DEBUG_ASSERT(__kmp_init_serial);
2229  if (__kmp_env_consistency_check && user_lock == NULL) {
2230  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2231  }
2232  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2233 
2234 #if OMPT_SUPPORT && OMPT_OPTIONAL
2235  // This is the case, if called from omp_init_lock_with_hint:
2236  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2237  if (!codeptr)
2238  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2239  if (ompt_enabled.ompt_callback_lock_init) {
2240  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2241  ompt_mutex_lock, omp_lock_hint_none,
2242  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2243  codeptr);
2244  }
2245 #endif
2246 
2247 #else // KMP_USE_DYNAMIC_LOCK
2248 
2249  static char const *const func = "omp_init_lock";
2250  kmp_user_lock_p lck;
2251  KMP_DEBUG_ASSERT(__kmp_init_serial);
2252 
2253  if (__kmp_env_consistency_check) {
2254  if (user_lock == NULL) {
2255  KMP_FATAL(LockIsUninitialized, func);
2256  }
2257  }
2258 
2259  KMP_CHECK_USER_LOCK_INIT();
2260 
2261  if ((__kmp_user_lock_kind == lk_tas) &&
2262  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2263  lck = (kmp_user_lock_p)user_lock;
2264  }
2265 #if KMP_USE_FUTEX
2266  else if ((__kmp_user_lock_kind == lk_futex) &&
2267  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2268  lck = (kmp_user_lock_p)user_lock;
2269  }
2270 #endif
2271  else {
2272  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2273  }
2274  INIT_LOCK(lck);
2275  __kmp_set_user_lock_location(lck, loc);
2276 
2277 #if OMPT_SUPPORT && OMPT_OPTIONAL
2278  // This is the case, if called from omp_init_lock_with_hint:
2279  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2280  if (!codeptr)
2281  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2282  if (ompt_enabled.ompt_callback_lock_init) {
2283  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2284  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2285  (ompt_wait_id_t)user_lock, codeptr);
2286  }
2287 #endif
2288 
2289 #if USE_ITT_BUILD
2290  __kmp_itt_lock_creating(lck);
2291 #endif /* USE_ITT_BUILD */
2292 
2293 #endif // KMP_USE_DYNAMIC_LOCK
2294 } // __kmpc_init_lock
2295 
2296 /* initialize the lock */
2297 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2298 #if KMP_USE_DYNAMIC_LOCK
2299 
2300  KMP_DEBUG_ASSERT(__kmp_init_serial);
2301  if (__kmp_env_consistency_check && user_lock == NULL) {
2302  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2303  }
2304  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2305 
2306 #if OMPT_SUPPORT && OMPT_OPTIONAL
2307  // This is the case, if called from omp_init_lock_with_hint:
2308  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2309  if (!codeptr)
2310  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2311  if (ompt_enabled.ompt_callback_lock_init) {
2312  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2313  ompt_mutex_nest_lock, omp_lock_hint_none,
2314  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2315  codeptr);
2316  }
2317 #endif
2318 
2319 #else // KMP_USE_DYNAMIC_LOCK
2320 
2321  static char const *const func = "omp_init_nest_lock";
2322  kmp_user_lock_p lck;
2323  KMP_DEBUG_ASSERT(__kmp_init_serial);
2324 
2325  if (__kmp_env_consistency_check) {
2326  if (user_lock == NULL) {
2327  KMP_FATAL(LockIsUninitialized, func);
2328  }
2329  }
2330 
2331  KMP_CHECK_USER_LOCK_INIT();
2332 
2333  if ((__kmp_user_lock_kind == lk_tas) &&
2334  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2335  OMP_NEST_LOCK_T_SIZE)) {
2336  lck = (kmp_user_lock_p)user_lock;
2337  }
2338 #if KMP_USE_FUTEX
2339  else if ((__kmp_user_lock_kind == lk_futex) &&
2340  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2341  OMP_NEST_LOCK_T_SIZE)) {
2342  lck = (kmp_user_lock_p)user_lock;
2343  }
2344 #endif
2345  else {
2346  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2347  }
2348 
2349  INIT_NESTED_LOCK(lck);
2350  __kmp_set_user_lock_location(lck, loc);
2351 
2352 #if OMPT_SUPPORT && OMPT_OPTIONAL
2353  // This is the case, if called from omp_init_lock_with_hint:
2354  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2355  if (!codeptr)
2356  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2357  if (ompt_enabled.ompt_callback_lock_init) {
2358  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2359  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2360  (ompt_wait_id_t)user_lock, codeptr);
2361  }
2362 #endif
2363 
2364 #if USE_ITT_BUILD
2365  __kmp_itt_lock_creating(lck);
2366 #endif /* USE_ITT_BUILD */
2367 
2368 #endif // KMP_USE_DYNAMIC_LOCK
2369 } // __kmpc_init_nest_lock
2370 
2371 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2372 #if KMP_USE_DYNAMIC_LOCK
2373 
2374 #if USE_ITT_BUILD
2375  kmp_user_lock_p lck;
2376  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2377  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2378  } else {
2379  lck = (kmp_user_lock_p)user_lock;
2380  }
2381  __kmp_itt_lock_destroyed(lck);
2382 #endif
2383 #if OMPT_SUPPORT && OMPT_OPTIONAL
2384  // This is the case, if called from omp_init_lock_with_hint:
2385  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2386  if (!codeptr)
2387  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2388  if (ompt_enabled.ompt_callback_lock_destroy) {
2389  kmp_user_lock_p lck;
2390  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2391  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2392  } else {
2393  lck = (kmp_user_lock_p)user_lock;
2394  }
2395  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2396  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2397  }
2398 #endif
2399  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2400 #else
2401  kmp_user_lock_p lck;
2402 
2403  if ((__kmp_user_lock_kind == lk_tas) &&
2404  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2405  lck = (kmp_user_lock_p)user_lock;
2406  }
2407 #if KMP_USE_FUTEX
2408  else if ((__kmp_user_lock_kind == lk_futex) &&
2409  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2410  lck = (kmp_user_lock_p)user_lock;
2411  }
2412 #endif
2413  else {
2414  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2415  }
2416 
2417 #if OMPT_SUPPORT && OMPT_OPTIONAL
2418  // This is the case, if called from omp_init_lock_with_hint:
2419  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2420  if (!codeptr)
2421  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2422  if (ompt_enabled.ompt_callback_lock_destroy) {
2423  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2424  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2425  }
2426 #endif
2427 
2428 #if USE_ITT_BUILD
2429  __kmp_itt_lock_destroyed(lck);
2430 #endif /* USE_ITT_BUILD */
2431  DESTROY_LOCK(lck);
2432 
2433  if ((__kmp_user_lock_kind == lk_tas) &&
2434  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2435  ;
2436  }
2437 #if KMP_USE_FUTEX
2438  else if ((__kmp_user_lock_kind == lk_futex) &&
2439  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2440  ;
2441  }
2442 #endif
2443  else {
2444  __kmp_user_lock_free(user_lock, gtid, lck);
2445  }
2446 #endif // KMP_USE_DYNAMIC_LOCK
2447 } // __kmpc_destroy_lock
2448 
2449 /* destroy the lock */
2450 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2451 #if KMP_USE_DYNAMIC_LOCK
2452 
2453 #if USE_ITT_BUILD
2454  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2455  __kmp_itt_lock_destroyed(ilk->lock);
2456 #endif
2457 #if OMPT_SUPPORT && OMPT_OPTIONAL
2458  // This is the case, if called from omp_init_lock_with_hint:
2459  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2460  if (!codeptr)
2461  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2462  if (ompt_enabled.ompt_callback_lock_destroy) {
2463  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2464  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2465  }
2466 #endif
2467  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2468 
2469 #else // KMP_USE_DYNAMIC_LOCK
2470 
2471  kmp_user_lock_p lck;
2472 
2473  if ((__kmp_user_lock_kind == lk_tas) &&
2474  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2475  OMP_NEST_LOCK_T_SIZE)) {
2476  lck = (kmp_user_lock_p)user_lock;
2477  }
2478 #if KMP_USE_FUTEX
2479  else if ((__kmp_user_lock_kind == lk_futex) &&
2480  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2481  OMP_NEST_LOCK_T_SIZE)) {
2482  lck = (kmp_user_lock_p)user_lock;
2483  }
2484 #endif
2485  else {
2486  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2487  }
2488 
2489 #if OMPT_SUPPORT && OMPT_OPTIONAL
2490  // This is the case, if called from omp_init_lock_with_hint:
2491  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2492  if (!codeptr)
2493  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2494  if (ompt_enabled.ompt_callback_lock_destroy) {
2495  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2496  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2497  }
2498 #endif
2499 
2500 #if USE_ITT_BUILD
2501  __kmp_itt_lock_destroyed(lck);
2502 #endif /* USE_ITT_BUILD */
2503 
2504  DESTROY_NESTED_LOCK(lck);
2505 
2506  if ((__kmp_user_lock_kind == lk_tas) &&
2507  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2508  OMP_NEST_LOCK_T_SIZE)) {
2509  ;
2510  }
2511 #if KMP_USE_FUTEX
2512  else if ((__kmp_user_lock_kind == lk_futex) &&
2513  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2514  OMP_NEST_LOCK_T_SIZE)) {
2515  ;
2516  }
2517 #endif
2518  else {
2519  __kmp_user_lock_free(user_lock, gtid, lck);
2520  }
2521 #endif // KMP_USE_DYNAMIC_LOCK
2522 } // __kmpc_destroy_nest_lock
2523 
2524 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2525  KMP_COUNT_BLOCK(OMP_set_lock);
2526 #if KMP_USE_DYNAMIC_LOCK
2527  int tag = KMP_EXTRACT_D_TAG(user_lock);
2528 #if USE_ITT_BUILD
2529  __kmp_itt_lock_acquiring(
2530  (kmp_user_lock_p)
2531  user_lock); // itt function will get to the right lock object.
2532 #endif
2533 #if OMPT_SUPPORT && OMPT_OPTIONAL
2534  // This is the case, if called from omp_init_lock_with_hint:
2535  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2536  if (!codeptr)
2537  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2538  if (ompt_enabled.ompt_callback_mutex_acquire) {
2539  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2540  ompt_mutex_lock, omp_lock_hint_none,
2541  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2542  codeptr);
2543  }
2544 #endif
2545 #if KMP_USE_INLINED_TAS
2546  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2547  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2548  } else
2549 #elif KMP_USE_INLINED_FUTEX
2550  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2551  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2552  } else
2553 #endif
2554  {
2555  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2556  }
2557 #if USE_ITT_BUILD
2558  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2559 #endif
2560 #if OMPT_SUPPORT && OMPT_OPTIONAL
2561  if (ompt_enabled.ompt_callback_mutex_acquired) {
2562  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2563  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2564  }
2565 #endif
2566 
2567 #else // KMP_USE_DYNAMIC_LOCK
2568 
2569  kmp_user_lock_p lck;
2570 
2571  if ((__kmp_user_lock_kind == lk_tas) &&
2572  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2573  lck = (kmp_user_lock_p)user_lock;
2574  }
2575 #if KMP_USE_FUTEX
2576  else if ((__kmp_user_lock_kind == lk_futex) &&
2577  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2578  lck = (kmp_user_lock_p)user_lock;
2579  }
2580 #endif
2581  else {
2582  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2583  }
2584 
2585 #if USE_ITT_BUILD
2586  __kmp_itt_lock_acquiring(lck);
2587 #endif /* USE_ITT_BUILD */
2588 #if OMPT_SUPPORT && OMPT_OPTIONAL
2589  // This is the case, if called from omp_init_lock_with_hint:
2590  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2591  if (!codeptr)
2592  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2593  if (ompt_enabled.ompt_callback_mutex_acquire) {
2594  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2595  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2596  (ompt_wait_id_t)lck, codeptr);
2597  }
2598 #endif
2599 
2600  ACQUIRE_LOCK(lck, gtid);
2601 
2602 #if USE_ITT_BUILD
2603  __kmp_itt_lock_acquired(lck);
2604 #endif /* USE_ITT_BUILD */
2605 
2606 #if OMPT_SUPPORT && OMPT_OPTIONAL
2607  if (ompt_enabled.ompt_callback_mutex_acquired) {
2608  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2609  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2610  }
2611 #endif
2612 
2613 #endif // KMP_USE_DYNAMIC_LOCK
2614 }
2615 
2616 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2617 #if KMP_USE_DYNAMIC_LOCK
2618 
2619 #if USE_ITT_BUILD
2620  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2621 #endif
2622 #if OMPT_SUPPORT && OMPT_OPTIONAL
2623  // This is the case, if called from omp_init_lock_with_hint:
2624  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2625  if (!codeptr)
2626  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2627  if (ompt_enabled.enabled) {
2628  if (ompt_enabled.ompt_callback_mutex_acquire) {
2629  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2630  ompt_mutex_nest_lock, omp_lock_hint_none,
2631  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2632  codeptr);
2633  }
2634  }
2635 #endif
2636  int acquire_status =
2637  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2638  (void) acquire_status;
2639 #if USE_ITT_BUILD
2640  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2641 #endif
2642 
2643 #if OMPT_SUPPORT && OMPT_OPTIONAL
2644  if (ompt_enabled.enabled) {
2645  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2646  if (ompt_enabled.ompt_callback_mutex_acquired) {
2647  // lock_first
2648  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2649  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2650  }
2651  } else {
2652  if (ompt_enabled.ompt_callback_nest_lock) {
2653  // lock_next
2654  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2655  ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
2656  }
2657  }
2658  }
2659 #endif
2660 
2661 #else // KMP_USE_DYNAMIC_LOCK
2662  int acquire_status;
2663  kmp_user_lock_p lck;
2664 
2665  if ((__kmp_user_lock_kind == lk_tas) &&
2666  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2667  OMP_NEST_LOCK_T_SIZE)) {
2668  lck = (kmp_user_lock_p)user_lock;
2669  }
2670 #if KMP_USE_FUTEX
2671  else if ((__kmp_user_lock_kind == lk_futex) &&
2672  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2673  OMP_NEST_LOCK_T_SIZE)) {
2674  lck = (kmp_user_lock_p)user_lock;
2675  }
2676 #endif
2677  else {
2678  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2679  }
2680 
2681 #if USE_ITT_BUILD
2682  __kmp_itt_lock_acquiring(lck);
2683 #endif /* USE_ITT_BUILD */
2684 #if OMPT_SUPPORT && OMPT_OPTIONAL
2685  // This is the case, if called from omp_init_lock_with_hint:
2686  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2687  if (!codeptr)
2688  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2689  if (ompt_enabled.enabled) {
2690  if (ompt_enabled.ompt_callback_mutex_acquire) {
2691  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2692  ompt_mutex_nest_lock, omp_lock_hint_none,
2693  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
2694  }
2695  }
2696 #endif
2697 
2698  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2699 
2700 #if USE_ITT_BUILD
2701  __kmp_itt_lock_acquired(lck);
2702 #endif /* USE_ITT_BUILD */
2703 
2704 #if OMPT_SUPPORT && OMPT_OPTIONAL
2705  if (ompt_enabled.enabled) {
2706  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2707  if (ompt_enabled.ompt_callback_mutex_acquired) {
2708  // lock_first
2709  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2710  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2711  }
2712  } else {
2713  if (ompt_enabled.ompt_callback_nest_lock) {
2714  // lock_next
2715  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2716  ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
2717  }
2718  }
2719  }
2720 #endif
2721 
2722 #endif // KMP_USE_DYNAMIC_LOCK
2723 }
2724 
2725 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2726 #if KMP_USE_DYNAMIC_LOCK
2727 
2728  int tag = KMP_EXTRACT_D_TAG(user_lock);
2729 #if USE_ITT_BUILD
2730  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2731 #endif
2732 #if KMP_USE_INLINED_TAS
2733  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2734  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2735  } else
2736 #elif KMP_USE_INLINED_FUTEX
2737  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2738  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2739  } else
2740 #endif
2741  {
2742  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2743  }
2744 
2745 #if OMPT_SUPPORT && OMPT_OPTIONAL
2746  // This is the case, if called from omp_init_lock_with_hint:
2747  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2748  if (!codeptr)
2749  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2750  if (ompt_enabled.ompt_callback_mutex_released) {
2751  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2752  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2753  }
2754 #endif
2755 
2756 #else // KMP_USE_DYNAMIC_LOCK
2757 
2758  kmp_user_lock_p lck;
2759 
2760  /* Can't use serial interval since not block structured */
2761  /* release the lock */
2762 
2763  if ((__kmp_user_lock_kind == lk_tas) &&
2764  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2765 #if KMP_OS_LINUX && \
2766  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2767 // "fast" path implemented to fix customer performance issue
2768 #if USE_ITT_BUILD
2769  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2770 #endif /* USE_ITT_BUILD */
2771  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2772  KMP_MB();
2773 
2774 #if OMPT_SUPPORT && OMPT_OPTIONAL
2775  // This is the case, if called from omp_init_lock_with_hint:
2776  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2777  if (!codeptr)
2778  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2779  if (ompt_enabled.ompt_callback_mutex_released) {
2780  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2781  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2782  }
2783 #endif
2784 
2785  return;
2786 #else
2787  lck = (kmp_user_lock_p)user_lock;
2788 #endif
2789  }
2790 #if KMP_USE_FUTEX
2791  else if ((__kmp_user_lock_kind == lk_futex) &&
2792  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2793  lck = (kmp_user_lock_p)user_lock;
2794  }
2795 #endif
2796  else {
2797  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2798  }
2799 
2800 #if USE_ITT_BUILD
2801  __kmp_itt_lock_releasing(lck);
2802 #endif /* USE_ITT_BUILD */
2803 
2804  RELEASE_LOCK(lck, gtid);
2805 
2806 #if OMPT_SUPPORT && OMPT_OPTIONAL
2807  // This is the case, if called from omp_init_lock_with_hint:
2808  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2809  if (!codeptr)
2810  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2811  if (ompt_enabled.ompt_callback_mutex_released) {
2812  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2813  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2814  }
2815 #endif
2816 
2817 #endif // KMP_USE_DYNAMIC_LOCK
2818 }
2819 
2820 /* release the lock */
2821 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2822 #if KMP_USE_DYNAMIC_LOCK
2823 
2824 #if USE_ITT_BUILD
2825  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2826 #endif
2827  int release_status =
2828  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2829  (void) release_status;
2830 
2831 #if OMPT_SUPPORT && OMPT_OPTIONAL
2832  // This is the case, if called from omp_init_lock_with_hint:
2833  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2834  if (!codeptr)
2835  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2836  if (ompt_enabled.enabled) {
2837  if (release_status == KMP_LOCK_RELEASED) {
2838  if (ompt_enabled.ompt_callback_mutex_released) {
2839  // release_lock_last
2840  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2841  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2842  }
2843  } else if (ompt_enabled.ompt_callback_nest_lock) {
2844  // release_lock_prev
2845  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2846  ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
2847  }
2848  }
2849 #endif
2850 
2851 #else // KMP_USE_DYNAMIC_LOCK
2852 
2853  kmp_user_lock_p lck;
2854 
2855  /* Can't use serial interval since not block structured */
2856 
2857  if ((__kmp_user_lock_kind == lk_tas) &&
2858  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2859  OMP_NEST_LOCK_T_SIZE)) {
2860 #if KMP_OS_LINUX && \
2861  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2862  // "fast" path implemented to fix customer performance issue
2863  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2864 #if USE_ITT_BUILD
2865  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2866 #endif /* USE_ITT_BUILD */
2867 
2868 #if OMPT_SUPPORT && OMPT_OPTIONAL
2869  int release_status = KMP_LOCK_STILL_HELD;
2870 #endif
2871 
2872  if (--(tl->lk.depth_locked) == 0) {
2873  TCW_4(tl->lk.poll, 0);
2874 #if OMPT_SUPPORT && OMPT_OPTIONAL
2875  release_status = KMP_LOCK_RELEASED;
2876 #endif
2877  }
2878  KMP_MB();
2879 
2880 #if OMPT_SUPPORT && OMPT_OPTIONAL
2881  // This is the case, if called from omp_init_lock_with_hint:
2882  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2883  if (!codeptr)
2884  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2885  if (ompt_enabled.enabled) {
2886  if (release_status == KMP_LOCK_RELEASED) {
2887  if (ompt_enabled.ompt_callback_mutex_released) {
2888  // release_lock_last
2889  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2890  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2891  }
2892  } else if (ompt_enabled.ompt_callback_nest_lock) {
2893  // release_lock_previous
2894  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2895  ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2896  }
2897  }
2898 #endif
2899 
2900  return;
2901 #else
2902  lck = (kmp_user_lock_p)user_lock;
2903 #endif
2904  }
2905 #if KMP_USE_FUTEX
2906  else if ((__kmp_user_lock_kind == lk_futex) &&
2907  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2908  OMP_NEST_LOCK_T_SIZE)) {
2909  lck = (kmp_user_lock_p)user_lock;
2910  }
2911 #endif
2912  else {
2913  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2914  }
2915 
2916 #if USE_ITT_BUILD
2917  __kmp_itt_lock_releasing(lck);
2918 #endif /* USE_ITT_BUILD */
2919 
2920  int release_status;
2921  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2922 #if OMPT_SUPPORT && OMPT_OPTIONAL
2923  // This is the case, if called from omp_init_lock_with_hint:
2924  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2925  if (!codeptr)
2926  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2927  if (ompt_enabled.enabled) {
2928  if (release_status == KMP_LOCK_RELEASED) {
2929  if (ompt_enabled.ompt_callback_mutex_released) {
2930  // release_lock_last
2931  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2932  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2933  }
2934  } else if (ompt_enabled.ompt_callback_nest_lock) {
2935  // release_lock_previous
2936  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2937  ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2938  }
2939  }
2940 #endif
2941 
2942 #endif // KMP_USE_DYNAMIC_LOCK
2943 }
2944 
2945 /* try to acquire the lock */
2946 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2947  KMP_COUNT_BLOCK(OMP_test_lock);
2948 
2949 #if KMP_USE_DYNAMIC_LOCK
2950  int rc;
2951  int tag = KMP_EXTRACT_D_TAG(user_lock);
2952 #if USE_ITT_BUILD
2953  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2954 #endif
2955 #if OMPT_SUPPORT && OMPT_OPTIONAL
2956  // This is the case, if called from omp_init_lock_with_hint:
2957  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2958  if (!codeptr)
2959  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2960  if (ompt_enabled.ompt_callback_mutex_acquire) {
2961  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2962  ompt_mutex_lock, omp_lock_hint_none,
2963  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2964  codeptr);
2965  }
2966 #endif
2967 #if KMP_USE_INLINED_TAS
2968  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2969  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2970  } else
2971 #elif KMP_USE_INLINED_FUTEX
2972  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2973  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2974  } else
2975 #endif
2976  {
2977  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2978  }
2979  if (rc) {
2980 #if USE_ITT_BUILD
2981  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2982 #endif
2983 #if OMPT_SUPPORT && OMPT_OPTIONAL
2984  if (ompt_enabled.ompt_callback_mutex_acquired) {
2985  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2986  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2987  }
2988 #endif
2989  return FTN_TRUE;
2990  } else {
2991 #if USE_ITT_BUILD
2992  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2993 #endif
2994  return FTN_FALSE;
2995  }
2996 
2997 #else // KMP_USE_DYNAMIC_LOCK
2998 
2999  kmp_user_lock_p lck;
3000  int rc;
3001 
3002  if ((__kmp_user_lock_kind == lk_tas) &&
3003  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3004  lck = (kmp_user_lock_p)user_lock;
3005  }
3006 #if KMP_USE_FUTEX
3007  else if ((__kmp_user_lock_kind == lk_futex) &&
3008  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3009  lck = (kmp_user_lock_p)user_lock;
3010  }
3011 #endif
3012  else {
3013  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3014  }
3015 
3016 #if USE_ITT_BUILD
3017  __kmp_itt_lock_acquiring(lck);
3018 #endif /* USE_ITT_BUILD */
3019 #if OMPT_SUPPORT && OMPT_OPTIONAL
3020  // This is the case, if called from omp_init_lock_with_hint:
3021  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3022  if (!codeptr)
3023  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3024  if (ompt_enabled.ompt_callback_mutex_acquire) {
3025  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3026  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3027  (ompt_wait_id_t)lck, codeptr);
3028  }
3029 #endif
3030 
3031  rc = TEST_LOCK(lck, gtid);
3032 #if USE_ITT_BUILD
3033  if (rc) {
3034  __kmp_itt_lock_acquired(lck);
3035  } else {
3036  __kmp_itt_lock_cancelled(lck);
3037  }
3038 #endif /* USE_ITT_BUILD */
3039 #if OMPT_SUPPORT && OMPT_OPTIONAL
3040  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3041  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3042  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
3043  }
3044 #endif
3045 
3046  return (rc ? FTN_TRUE : FTN_FALSE);
3047 
3048 /* Can't use serial interval since not block structured */
3049 
3050 #endif // KMP_USE_DYNAMIC_LOCK
3051 }
3052 
3053 /* try to acquire the lock */
3054 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3055 #if KMP_USE_DYNAMIC_LOCK
3056  int rc;
3057 #if USE_ITT_BUILD
3058  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3059 #endif
3060 #if OMPT_SUPPORT && OMPT_OPTIONAL
3061  // This is the case, if called from omp_init_lock_with_hint:
3062  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3063  if (!codeptr)
3064  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3065  if (ompt_enabled.ompt_callback_mutex_acquire) {
3066  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3067  ompt_mutex_nest_lock, omp_lock_hint_none,
3068  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
3069  codeptr);
3070  }
3071 #endif
3072  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3073 #if USE_ITT_BUILD
3074  if (rc) {
3075  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3076  } else {
3077  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3078  }
3079 #endif
3080 #if OMPT_SUPPORT && OMPT_OPTIONAL
3081  if (ompt_enabled.enabled && rc) {
3082  if (rc == 1) {
3083  if (ompt_enabled.ompt_callback_mutex_acquired) {
3084  // lock_first
3085  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3086  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
3087  }
3088  } else {
3089  if (ompt_enabled.ompt_callback_nest_lock) {
3090  // lock_next
3091  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3092  ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
3093  }
3094  }
3095  }
3096 #endif
3097  return rc;
3098 
3099 #else // KMP_USE_DYNAMIC_LOCK
3100 
3101  kmp_user_lock_p lck;
3102  int rc;
3103 
3104  if ((__kmp_user_lock_kind == lk_tas) &&
3105  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3106  OMP_NEST_LOCK_T_SIZE)) {
3107  lck = (kmp_user_lock_p)user_lock;
3108  }
3109 #if KMP_USE_FUTEX
3110  else if ((__kmp_user_lock_kind == lk_futex) &&
3111  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3112  OMP_NEST_LOCK_T_SIZE)) {
3113  lck = (kmp_user_lock_p)user_lock;
3114  }
3115 #endif
3116  else {
3117  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3118  }
3119 
3120 #if USE_ITT_BUILD
3121  __kmp_itt_lock_acquiring(lck);
3122 #endif /* USE_ITT_BUILD */
3123 
3124 #if OMPT_SUPPORT && OMPT_OPTIONAL
3125  // This is the case, if called from omp_init_lock_with_hint:
3126  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3127  if (!codeptr)
3128  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3129  if (ompt_enabled.enabled) &&
3130  ompt_enabled.ompt_callback_mutex_acquire) {
3131  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3132  ompt_mutex_nest_lock, omp_lock_hint_none,
3133  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
3134  }
3135 #endif
3136 
3137  rc = TEST_NESTED_LOCK(lck, gtid);
3138 #if USE_ITT_BUILD
3139  if (rc) {
3140  __kmp_itt_lock_acquired(lck);
3141  } else {
3142  __kmp_itt_lock_cancelled(lck);
3143  }
3144 #endif /* USE_ITT_BUILD */
3145 #if OMPT_SUPPORT && OMPT_OPTIONAL
3146  if (ompt_enabled.enabled && rc) {
3147  if (rc == 1) {
3148  if (ompt_enabled.ompt_callback_mutex_acquired) {
3149  // lock_first
3150  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3151  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
3152  }
3153  } else {
3154  if (ompt_enabled.ompt_callback_nest_lock) {
3155  // lock_next
3156  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3157  ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
3158  }
3159  }
3160  }
3161 #endif
3162  return rc;
3163 
3164 /* Can't use serial interval since not block structured */
3165 
3166 #endif // KMP_USE_DYNAMIC_LOCK
3167 }
3168 
3169 // Interface to fast scalable reduce methods routines
3170 
3171 // keep the selected method in a thread local structure for cross-function
3172 // usage: will be used in __kmpc_end_reduce* functions;
3173 // another solution: to re-determine the method one more time in
3174 // __kmpc_end_reduce* functions (new prototype required then)
3175 // AT: which solution is better?
3176 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3177  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3178 
3179 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3180  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3181 
3182 // description of the packed_reduction_method variable: look at the macros in
3183 // kmp.h
3184 
3185 // used in a critical section reduce block
3186 static __forceinline void
3187 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3188  kmp_critical_name *crit) {
3189 
3190  // this lock was visible to a customer and to the threading profile tool as a
3191  // serial overhead span (although it's used for an internal purpose only)
3192  // why was it visible in previous implementation?
3193  // should we keep it visible in new reduce block?
3194  kmp_user_lock_p lck;
3195 
3196 #if KMP_USE_DYNAMIC_LOCK
3197 
3198  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3199  // Check if it is initialized.
3200  if (*lk == 0) {
3201  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3202  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3203  KMP_GET_D_TAG(__kmp_user_lock_seq));
3204  } else {
3205  __kmp_init_indirect_csptr(crit, loc, global_tid,
3206  KMP_GET_I_TAG(__kmp_user_lock_seq));
3207  }
3208  }
3209  // Branch for accessing the actual lock object and set operation. This
3210  // branching is inevitable since this lock initialization does not follow the
3211  // normal dispatch path (lock table is not used).
3212  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3213  lck = (kmp_user_lock_p)lk;
3214  KMP_DEBUG_ASSERT(lck != NULL);
3215  if (__kmp_env_consistency_check) {
3216  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3217  }
3218  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3219  } else {
3220  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3221  lck = ilk->lock;
3222  KMP_DEBUG_ASSERT(lck != NULL);
3223  if (__kmp_env_consistency_check) {
3224  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3225  }
3226  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3227  }
3228 
3229 #else // KMP_USE_DYNAMIC_LOCK
3230 
3231  // We know that the fast reduction code is only emitted by Intel compilers
3232  // with 32 byte critical sections. If there isn't enough space, then we
3233  // have to use a pointer.
3234  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3235  lck = (kmp_user_lock_p)crit;
3236  } else {
3237  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3238  }
3239  KMP_DEBUG_ASSERT(lck != NULL);
3240 
3241  if (__kmp_env_consistency_check)
3242  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3243 
3244  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3245 
3246 #endif // KMP_USE_DYNAMIC_LOCK
3247 }
3248 
3249 // used in a critical section reduce block
3250 static __forceinline void
3251 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3252  kmp_critical_name *crit) {
3253 
3254  kmp_user_lock_p lck;
3255 
3256 #if KMP_USE_DYNAMIC_LOCK
3257 
3258  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3259  lck = (kmp_user_lock_p)crit;
3260  if (__kmp_env_consistency_check)
3261  __kmp_pop_sync(global_tid, ct_critical, loc);
3262  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3263  } else {
3264  kmp_indirect_lock_t *ilk =
3265  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3266  if (__kmp_env_consistency_check)
3267  __kmp_pop_sync(global_tid, ct_critical, loc);
3268  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3269  }
3270 
3271 #else // KMP_USE_DYNAMIC_LOCK
3272 
3273  // We know that the fast reduction code is only emitted by Intel compilers
3274  // with 32 byte critical sections. If there isn't enough space, then we have
3275  // to use a pointer.
3276  if (__kmp_base_user_lock_size > 32) {
3277  lck = *((kmp_user_lock_p *)crit);
3278  KMP_ASSERT(lck != NULL);
3279  } else {
3280  lck = (kmp_user_lock_p)crit;
3281  }
3282 
3283  if (__kmp_env_consistency_check)
3284  __kmp_pop_sync(global_tid, ct_critical, loc);
3285 
3286  __kmp_release_user_lock_with_checks(lck, global_tid);
3287 
3288 #endif // KMP_USE_DYNAMIC_LOCK
3289 } // __kmp_end_critical_section_reduce_block
3290 
3291 #if OMP_40_ENABLED
3292 static __forceinline int
3293 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3294  int *task_state) {
3295  kmp_team_t *team;
3296 
3297  // Check if we are inside the teams construct?
3298  if (th->th.th_teams_microtask) {
3299  *team_p = team = th->th.th_team;
3300  if (team->t.t_level == th->th.th_teams_level) {
3301  // This is reduction at teams construct.
3302  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3303  // Let's swap teams temporarily for the reduction.
3304  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3305  th->th.th_team = team->t.t_parent;
3306  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3307  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3308  *task_state = th->th.th_task_state;
3309  th->th.th_task_state = 0;
3310 
3311  return 1;
3312  }
3313  }
3314  return 0;
3315 }
3316 
3317 static __forceinline void
3318 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3319  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3320  th->th.th_info.ds.ds_tid = 0;
3321  th->th.th_team = team;
3322  th->th.th_team_nproc = team->t.t_nproc;
3323  th->th.th_task_team = team->t.t_task_team[task_state];
3324  th->th.th_task_state = task_state;
3325 }
3326 #endif
3327 
3328 /* 2.a.i. Reduce Block without a terminating barrier */
3344 kmp_int32
3345 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3346  size_t reduce_size, void *reduce_data,
3347  void (*reduce_func)(void *lhs_data, void *rhs_data),
3348  kmp_critical_name *lck) {
3349 
3350  KMP_COUNT_BLOCK(REDUCE_nowait);
3351  int retval = 0;
3352  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3353 #if OMP_40_ENABLED
3354  kmp_info_t *th;
3355  kmp_team_t *team;
3356  int teams_swapped = 0, task_state;
3357 #endif
3358  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3359 
3360  // why do we need this initialization here at all?
3361  // Reduction clause can not be used as a stand-alone directive.
3362 
3363  // do not call __kmp_serial_initialize(), it will be called by
3364  // __kmp_parallel_initialize() if needed
3365  // possible detection of false-positive race by the threadchecker ???
3366  if (!TCR_4(__kmp_init_parallel))
3367  __kmp_parallel_initialize();
3368 
3369 // check correctness of reduce block nesting
3370 #if KMP_USE_DYNAMIC_LOCK
3371  if (__kmp_env_consistency_check)
3372  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3373 #else
3374  if (__kmp_env_consistency_check)
3375  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3376 #endif
3377 
3378 #if OMP_40_ENABLED
3379  th = __kmp_thread_from_gtid(global_tid);
3380  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3381 #endif // OMP_40_ENABLED
3382 
3383  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3384  // the value should be kept in a variable
3385  // the variable should be either a construct-specific or thread-specific
3386  // property, not a team specific property
3387  // (a thread can reach the next reduce block on the next construct, reduce
3388  // method may differ on the next construct)
3389  // an ident_t "loc" parameter could be used as a construct-specific property
3390  // (what if loc == 0?)
3391  // (if both construct-specific and team-specific variables were shared,
3392  // then unness extra syncs should be needed)
3393  // a thread-specific variable is better regarding two issues above (next
3394  // construct and extra syncs)
3395  // a thread-specific "th_local.reduction_method" variable is used currently
3396  // each thread executes 'determine' and 'set' lines (no need to execute by one
3397  // thread, to avoid unness extra syncs)
3398 
3399  packed_reduction_method = __kmp_determine_reduction_method(
3400  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3401  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3402 
3403  if (packed_reduction_method == critical_reduce_block) {
3404 
3405  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3406  retval = 1;
3407 
3408  } else if (packed_reduction_method == empty_reduce_block) {
3409 
3410  // usage: if team size == 1, no synchronization is required ( Intel
3411  // platforms only )
3412  retval = 1;
3413 
3414  } else if (packed_reduction_method == atomic_reduce_block) {
3415 
3416  retval = 2;
3417 
3418  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3419  // won't be called by the code gen)
3420  // (it's not quite good, because the checking block has been closed by
3421  // this 'pop',
3422  // but atomic operation has not been executed yet, will be executed
3423  // slightly later, literally on next instruction)
3424  if (__kmp_env_consistency_check)
3425  __kmp_pop_sync(global_tid, ct_reduce, loc);
3426 
3427  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3428  tree_reduce_block)) {
3429 
3430 // AT: performance issue: a real barrier here
3431 // AT: (if master goes slow, other threads are blocked here waiting for the
3432 // master to come and release them)
3433 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3434 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3435 // be confusing to a customer)
3436 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3437 // might go faster and be more in line with sense of NOWAIT
3438 // AT: TO DO: do epcc test and compare times
3439 
3440 // this barrier should be invisible to a customer and to the threading profile
3441 // tool (it's neither a terminating barrier nor customer's code, it's
3442 // used for an internal purpose)
3443 #if OMPT_SUPPORT
3444  // JP: can this barrier potentially leed to task scheduling?
3445  // JP: as long as there is a barrier in the implementation, OMPT should and
3446  // will provide the barrier events
3447  // so we set-up the necessary frame/return addresses.
3448  ompt_frame_t *ompt_frame;
3449  if (ompt_enabled.enabled) {
3450  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3451  if (ompt_frame->enter_frame.ptr == NULL)
3452  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3453  OMPT_STORE_RETURN_ADDRESS(global_tid);
3454  }
3455 #endif
3456 #if USE_ITT_NOTIFY
3457  __kmp_threads[global_tid]->th.th_ident = loc;
3458 #endif
3459  retval =
3460  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3461  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3462  retval = (retval != 0) ? (0) : (1);
3463 #if OMPT_SUPPORT && OMPT_OPTIONAL
3464  if (ompt_enabled.enabled) {
3465  ompt_frame->enter_frame = ompt_data_none;
3466  }
3467 #endif
3468 
3469  // all other workers except master should do this pop here
3470  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3471  if (__kmp_env_consistency_check) {
3472  if (retval == 0) {
3473  __kmp_pop_sync(global_tid, ct_reduce, loc);
3474  }
3475  }
3476 
3477  } else {
3478 
3479  // should never reach this block
3480  KMP_ASSERT(0); // "unexpected method"
3481  }
3482 #if OMP_40_ENABLED
3483  if (teams_swapped) {
3484  __kmp_restore_swapped_teams(th, team, task_state);
3485  }
3486 #endif
3487  KA_TRACE(
3488  10,
3489  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3490  global_tid, packed_reduction_method, retval));
3491 
3492  return retval;
3493 }
3494 
3503 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3504  kmp_critical_name *lck) {
3505 
3506  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3507 
3508  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3509 
3510  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3511 
3512  if (packed_reduction_method == critical_reduce_block) {
3513 
3514  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3515 
3516  } else if (packed_reduction_method == empty_reduce_block) {
3517 
3518  // usage: if team size == 1, no synchronization is required ( on Intel
3519  // platforms only )
3520 
3521  } else if (packed_reduction_method == atomic_reduce_block) {
3522 
3523  // neither master nor other workers should get here
3524  // (code gen does not generate this call in case 2: atomic reduce block)
3525  // actually it's better to remove this elseif at all;
3526  // after removal this value will checked by the 'else' and will assert
3527 
3528  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3529  tree_reduce_block)) {
3530 
3531  // only master gets here
3532 
3533  } else {
3534 
3535  // should never reach this block
3536  KMP_ASSERT(0); // "unexpected method"
3537  }
3538 
3539  if (__kmp_env_consistency_check)
3540  __kmp_pop_sync(global_tid, ct_reduce, loc);
3541 
3542  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3543  global_tid, packed_reduction_method));
3544 
3545  return;
3546 }
3547 
3548 /* 2.a.ii. Reduce Block with a terminating barrier */
3549 
3565 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3566  size_t reduce_size, void *reduce_data,
3567  void (*reduce_func)(void *lhs_data, void *rhs_data),
3568  kmp_critical_name *lck) {
3569  KMP_COUNT_BLOCK(REDUCE_wait);
3570  int retval = 0;
3571  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3572 #if OMP_40_ENABLED
3573  kmp_info_t *th;
3574  kmp_team_t *team;
3575  int teams_swapped = 0, task_state;
3576 #endif
3577 
3578  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3579 
3580  // why do we need this initialization here at all?
3581  // Reduction clause can not be a stand-alone directive.
3582 
3583  // do not call __kmp_serial_initialize(), it will be called by
3584  // __kmp_parallel_initialize() if needed
3585  // possible detection of false-positive race by the threadchecker ???
3586  if (!TCR_4(__kmp_init_parallel))
3587  __kmp_parallel_initialize();
3588 
3589 // check correctness of reduce block nesting
3590 #if KMP_USE_DYNAMIC_LOCK
3591  if (__kmp_env_consistency_check)
3592  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3593 #else
3594  if (__kmp_env_consistency_check)
3595  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3596 #endif
3597 
3598 #if OMP_40_ENABLED
3599  th = __kmp_thread_from_gtid(global_tid);
3600  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3601 #endif // OMP_40_ENABLED
3602 
3603  packed_reduction_method = __kmp_determine_reduction_method(
3604  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3605  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3606 
3607  if (packed_reduction_method == critical_reduce_block) {
3608 
3609  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3610  retval = 1;
3611 
3612  } else if (packed_reduction_method == empty_reduce_block) {
3613 
3614  // usage: if team size == 1, no synchronization is required ( Intel
3615  // platforms only )
3616  retval = 1;
3617 
3618  } else if (packed_reduction_method == atomic_reduce_block) {
3619 
3620  retval = 2;
3621 
3622  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3623  tree_reduce_block)) {
3624 
3625 // case tree_reduce_block:
3626 // this barrier should be visible to a customer and to the threading profile
3627 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3628 #if OMPT_SUPPORT
3629  ompt_frame_t *ompt_frame;
3630  if (ompt_enabled.enabled) {
3631  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3632  if (ompt_frame->enter_frame.ptr == NULL)
3633  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3634  OMPT_STORE_RETURN_ADDRESS(global_tid);
3635  }
3636 #endif
3637 #if USE_ITT_NOTIFY
3638  __kmp_threads[global_tid]->th.th_ident =
3639  loc; // needed for correct notification of frames
3640 #endif
3641  retval =
3642  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3643  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3644  retval = (retval != 0) ? (0) : (1);
3645 #if OMPT_SUPPORT && OMPT_OPTIONAL
3646  if (ompt_enabled.enabled) {
3647  ompt_frame->enter_frame = ompt_data_none;
3648  }
3649 #endif
3650 
3651  // all other workers except master should do this pop here
3652  // ( none of other workers except master will enter __kmpc_end_reduce() )
3653  if (__kmp_env_consistency_check) {
3654  if (retval == 0) { // 0: all other workers; 1: master
3655  __kmp_pop_sync(global_tid, ct_reduce, loc);
3656  }
3657  }
3658 
3659  } else {
3660 
3661  // should never reach this block
3662  KMP_ASSERT(0); // "unexpected method"
3663  }
3664 #if OMP_40_ENABLED
3665  if (teams_swapped) {
3666  __kmp_restore_swapped_teams(th, team, task_state);
3667  }
3668 #endif
3669 
3670  KA_TRACE(10,
3671  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3672  global_tid, packed_reduction_method, retval));
3673 
3674  return retval;
3675 }
3676 
3687 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3688  kmp_critical_name *lck) {
3689 
3690  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3691 #if OMP_40_ENABLED
3692  kmp_info_t *th;
3693  kmp_team_t *team;
3694  int teams_swapped = 0, task_state;
3695 #endif
3696 
3697  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3698 
3699 #if OMP_40_ENABLED
3700  th = __kmp_thread_from_gtid(global_tid);
3701  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3702 #endif // OMP_40_ENABLED
3703 
3704  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3705 
3706  // this barrier should be visible to a customer and to the threading profile
3707  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3708 
3709  if (packed_reduction_method == critical_reduce_block) {
3710 
3711  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3712 
3713 // TODO: implicit barrier: should be exposed
3714 #if OMPT_SUPPORT
3715  ompt_frame_t *ompt_frame;
3716  if (ompt_enabled.enabled) {
3717  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3718  if (ompt_frame->enter_frame.ptr == NULL)
3719  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3720  OMPT_STORE_RETURN_ADDRESS(global_tid);
3721  }
3722 #endif
3723 #if USE_ITT_NOTIFY
3724  __kmp_threads[global_tid]->th.th_ident = loc;
3725 #endif
3726  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3727 #if OMPT_SUPPORT && OMPT_OPTIONAL
3728  if (ompt_enabled.enabled) {
3729  ompt_frame->enter_frame = ompt_data_none;
3730  }
3731 #endif
3732 
3733  } else if (packed_reduction_method == empty_reduce_block) {
3734 
3735 // usage: if team size==1, no synchronization is required (Intel platforms only)
3736 
3737 // TODO: implicit barrier: should be exposed
3738 #if OMPT_SUPPORT
3739  ompt_frame_t *ompt_frame;
3740  if (ompt_enabled.enabled) {
3741  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3742  if (ompt_frame->enter_frame.ptr == NULL)
3743  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3744  OMPT_STORE_RETURN_ADDRESS(global_tid);
3745  }
3746 #endif
3747 #if USE_ITT_NOTIFY
3748  __kmp_threads[global_tid]->th.th_ident = loc;
3749 #endif
3750  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3751 #if OMPT_SUPPORT && OMPT_OPTIONAL
3752  if (ompt_enabled.enabled) {
3753  ompt_frame->enter_frame = ompt_data_none;
3754  }
3755 #endif
3756 
3757  } else if (packed_reduction_method == atomic_reduce_block) {
3758 
3759 #if OMPT_SUPPORT
3760  ompt_frame_t *ompt_frame;
3761  if (ompt_enabled.enabled) {
3762  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3763  if (ompt_frame->enter_frame.ptr == NULL)
3764  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3765  OMPT_STORE_RETURN_ADDRESS(global_tid);
3766  }
3767 #endif
3768 // TODO: implicit barrier: should be exposed
3769 #if USE_ITT_NOTIFY
3770  __kmp_threads[global_tid]->th.th_ident = loc;
3771 #endif
3772  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3773 #if OMPT_SUPPORT && OMPT_OPTIONAL
3774  if (ompt_enabled.enabled) {
3775  ompt_frame->enter_frame = ompt_data_none;
3776  }
3777 #endif
3778 
3779  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3780  tree_reduce_block)) {
3781 
3782  // only master executes here (master releases all other workers)
3783  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3784  global_tid);
3785 
3786  } else {
3787 
3788  // should never reach this block
3789  KMP_ASSERT(0); // "unexpected method"
3790  }
3791 #if OMP_40_ENABLED
3792  if (teams_swapped) {
3793  __kmp_restore_swapped_teams(th, team, task_state);
3794  }
3795 #endif
3796 
3797  if (__kmp_env_consistency_check)
3798  __kmp_pop_sync(global_tid, ct_reduce, loc);
3799 
3800  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3801  global_tid, packed_reduction_method));
3802 
3803  return;
3804 }
3805 
3806 #undef __KMP_GET_REDUCTION_METHOD
3807 #undef __KMP_SET_REDUCTION_METHOD
3808 
3809 /* end of interface to fast scalable reduce routines */
3810 
3811 kmp_uint64 __kmpc_get_taskid() {
3812 
3813  kmp_int32 gtid;
3814  kmp_info_t *thread;
3815 
3816  gtid = __kmp_get_gtid();
3817  if (gtid < 0) {
3818  return 0;
3819  }
3820  thread = __kmp_thread_from_gtid(gtid);
3821  return thread->th.th_current_task->td_task_id;
3822 
3823 } // __kmpc_get_taskid
3824 
3825 kmp_uint64 __kmpc_get_parent_taskid() {
3826 
3827  kmp_int32 gtid;
3828  kmp_info_t *thread;
3829  kmp_taskdata_t *parent_task;
3830 
3831  gtid = __kmp_get_gtid();
3832  if (gtid < 0) {
3833  return 0;
3834  }
3835  thread = __kmp_thread_from_gtid(gtid);
3836  parent_task = thread->th.th_current_task->td_parent;
3837  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3838 
3839 } // __kmpc_get_parent_taskid
3840 
3841 #if OMP_45_ENABLED
3842 
3853 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3854  const struct kmp_dim *dims) {
3855  int j, idx;
3856  kmp_int64 last, trace_count;
3857  kmp_info_t *th = __kmp_threads[gtid];
3858  kmp_team_t *team = th->th.th_team;
3859  kmp_uint32 *flags;
3860  kmp_disp_t *pr_buf = th->th.th_dispatch;
3861  dispatch_shared_info_t *sh_buf;
3862 
3863  KA_TRACE(
3864  20,
3865  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3866  gtid, num_dims, !team->t.t_serialized));
3867  KMP_DEBUG_ASSERT(dims != NULL);
3868  KMP_DEBUG_ASSERT(num_dims > 0);
3869 
3870  if (team->t.t_serialized) {
3871  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3872  return; // no dependencies if team is serialized
3873  }
3874  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3875  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3876  // the next loop
3877  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3878 
3879  // Save bounds info into allocated private buffer
3880  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3881  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3882  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3883  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3884  pr_buf->th_doacross_info[0] =
3885  (kmp_int64)num_dims; // first element is number of dimensions
3886  // Save also address of num_done in order to access it later without knowing
3887  // the buffer index
3888  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3889  pr_buf->th_doacross_info[2] = dims[0].lo;
3890  pr_buf->th_doacross_info[3] = dims[0].up;
3891  pr_buf->th_doacross_info[4] = dims[0].st;
3892  last = 5;
3893  for (j = 1; j < num_dims; ++j) {
3894  kmp_int64
3895  range_length; // To keep ranges of all dimensions but the first dims[0]
3896  if (dims[j].st == 1) { // most common case
3897  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3898  range_length = dims[j].up - dims[j].lo + 1;
3899  } else {
3900  if (dims[j].st > 0) {
3901  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3902  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3903  } else { // negative increment
3904  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3905  range_length =
3906  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3907  }
3908  }
3909  pr_buf->th_doacross_info[last++] = range_length;
3910  pr_buf->th_doacross_info[last++] = dims[j].lo;
3911  pr_buf->th_doacross_info[last++] = dims[j].up;
3912  pr_buf->th_doacross_info[last++] = dims[j].st;
3913  }
3914 
3915  // Compute total trip count.
3916  // Start with range of dims[0] which we don't need to keep in the buffer.
3917  if (dims[0].st == 1) { // most common case
3918  trace_count = dims[0].up - dims[0].lo + 1;
3919  } else if (dims[0].st > 0) {
3920  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3921  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3922  } else { // negative increment
3923  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3924  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3925  }
3926  for (j = 1; j < num_dims; ++j) {
3927  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3928  }
3929  KMP_DEBUG_ASSERT(trace_count > 0);
3930 
3931  // Check if shared buffer is not occupied by other loop (idx -
3932  // __kmp_dispatch_num_buffers)
3933  if (idx != sh_buf->doacross_buf_idx) {
3934  // Shared buffer is occupied, wait for it to be free
3935  __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3936  __kmp_eq_4, NULL);
3937  }
3938 #if KMP_32_BIT_ARCH
3939  // Check if we are the first thread. After the CAS the first thread gets 0,
3940  // others get 1 if initialization is in progress, allocated pointer otherwise.
3941  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3942  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3943  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3944 #else
3945  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3946  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3947 #endif
3948  if (flags == NULL) {
3949  // we are the first thread, allocate the array of flags
3950  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3951  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3952  KMP_MB();
3953  sh_buf->doacross_flags = flags;
3954  } else if (flags == (kmp_uint32 *)1) {
3955 #if KMP_32_BIT_ARCH
3956  // initialization is still in progress, need to wait
3957  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3958 #else
3959  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3960 #endif
3961  KMP_YIELD(TRUE);
3962  KMP_MB();
3963  } else {
3964  KMP_MB();
3965  }
3966  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3967  pr_buf->th_doacross_flags =
3968  sh_buf->doacross_flags; // save private copy in order to not
3969  // touch shared buffer on each iteration
3970  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
3971 }
3972 
3973 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
3974  kmp_int32 shft, num_dims, i;
3975  kmp_uint32 flag;
3976  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3977  kmp_info_t *th = __kmp_threads[gtid];
3978  kmp_team_t *team = th->th.th_team;
3979  kmp_disp_t *pr_buf;
3980  kmp_int64 lo, up, st;
3981 
3982  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3983  if (team->t.t_serialized) {
3984  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3985  return; // no dependencies if team is serialized
3986  }
3987 
3988  // calculate sequential iteration number and check out-of-bounds condition
3989  pr_buf = th->th.th_dispatch;
3990  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3991  num_dims = pr_buf->th_doacross_info[0];
3992  lo = pr_buf->th_doacross_info[2];
3993  up = pr_buf->th_doacross_info[3];
3994  st = pr_buf->th_doacross_info[4];
3995  if (st == 1) { // most common case
3996  if (vec[0] < lo || vec[0] > up) {
3997  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3998  "bounds [%lld,%lld]\n",
3999  gtid, vec[0], lo, up));
4000  return;
4001  }
4002  iter_number = vec[0] - lo;
4003  } else if (st > 0) {
4004  if (vec[0] < lo || vec[0] > up) {
4005  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4006  "bounds [%lld,%lld]\n",
4007  gtid, vec[0], lo, up));
4008  return;
4009  }
4010  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4011  } else { // negative increment
4012  if (vec[0] > lo || vec[0] < up) {
4013  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4014  "bounds [%lld,%lld]\n",
4015  gtid, vec[0], lo, up));
4016  return;
4017  }
4018  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4019  }
4020  for (i = 1; i < num_dims; ++i) {
4021  kmp_int64 iter, ln;
4022  kmp_int32 j = i * 4;
4023  ln = pr_buf->th_doacross_info[j + 1];
4024  lo = pr_buf->th_doacross_info[j + 2];
4025  up = pr_buf->th_doacross_info[j + 3];
4026  st = pr_buf->th_doacross_info[j + 4];
4027  if (st == 1) {
4028  if (vec[i] < lo || vec[i] > up) {
4029  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4030  "bounds [%lld,%lld]\n",
4031  gtid, vec[i], lo, up));
4032  return;
4033  }
4034  iter = vec[i] - lo;
4035  } else if (st > 0) {
4036  if (vec[i] < lo || vec[i] > up) {
4037  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4038  "bounds [%lld,%lld]\n",
4039  gtid, vec[i], lo, up));
4040  return;
4041  }
4042  iter = (kmp_uint64)(vec[i] - lo) / st;
4043  } else { // st < 0
4044  if (vec[i] > lo || vec[i] < up) {
4045  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4046  "bounds [%lld,%lld]\n",
4047  gtid, vec[i], lo, up));
4048  return;
4049  }
4050  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4051  }
4052  iter_number = iter + ln * iter_number;
4053  }
4054  shft = iter_number % 32; // use 32-bit granularity
4055  iter_number >>= 5; // divided by 32
4056  flag = 1 << shft;
4057  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4058  KMP_YIELD(TRUE);
4059  }
4060  KMP_MB();
4061  KA_TRACE(20,
4062  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4063  gtid, (iter_number << 5) + shft));
4064 }
4065 
4066 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4067  kmp_int32 shft, num_dims, i;
4068  kmp_uint32 flag;
4069  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4070  kmp_info_t *th = __kmp_threads[gtid];
4071  kmp_team_t *team = th->th.th_team;
4072  kmp_disp_t *pr_buf;
4073  kmp_int64 lo, st;
4074 
4075  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4076  if (team->t.t_serialized) {
4077  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4078  return; // no dependencies if team is serialized
4079  }
4080 
4081  // calculate sequential iteration number (same as in "wait" but no
4082  // out-of-bounds checks)
4083  pr_buf = th->th.th_dispatch;
4084  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4085  num_dims = pr_buf->th_doacross_info[0];
4086  lo = pr_buf->th_doacross_info[2];
4087  st = pr_buf->th_doacross_info[4];
4088  if (st == 1) { // most common case
4089  iter_number = vec[0] - lo;
4090  } else if (st > 0) {
4091  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4092  } else { // negative increment
4093  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4094  }
4095  for (i = 1; i < num_dims; ++i) {
4096  kmp_int64 iter, ln;
4097  kmp_int32 j = i * 4;
4098  ln = pr_buf->th_doacross_info[j + 1];
4099  lo = pr_buf->th_doacross_info[j + 2];
4100  st = pr_buf->th_doacross_info[j + 4];
4101  if (st == 1) {
4102  iter = vec[i] - lo;
4103  } else if (st > 0) {
4104  iter = (kmp_uint64)(vec[i] - lo) / st;
4105  } else { // st < 0
4106  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4107  }
4108  iter_number = iter + ln * iter_number;
4109  }
4110  shft = iter_number % 32; // use 32-bit granularity
4111  iter_number >>= 5; // divided by 32
4112  flag = 1 << shft;
4113  KMP_MB();
4114  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4115  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4116  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4117  (iter_number << 5) + shft));
4118 }
4119 
4120 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4121  kmp_int32 num_done;
4122  kmp_info_t *th = __kmp_threads[gtid];
4123  kmp_team_t *team = th->th.th_team;
4124  kmp_disp_t *pr_buf = th->th.th_dispatch;
4125 
4126  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4127  if (team->t.t_serialized) {
4128  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4129  return; // nothing to do
4130  }
4131  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4132  if (num_done == th->th.th_team_nproc) {
4133  // we are the last thread, need to free shared resources
4134  int idx = pr_buf->th_doacross_buf_idx - 1;
4135  dispatch_shared_info_t *sh_buf =
4136  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4137  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4138  (kmp_int64)&sh_buf->doacross_num_done);
4139  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4140  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4141  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4142  sh_buf->doacross_flags = NULL;
4143  sh_buf->doacross_num_done = 0;
4144  sh_buf->doacross_buf_idx +=
4145  __kmp_dispatch_num_buffers; // free buffer for future re-use
4146  }
4147  // free private resources (need to keep buffer index forever)
4148  pr_buf->th_doacross_flags = NULL;
4149  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4150  pr_buf->th_doacross_info = NULL;
4151  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4152 }
4153 #endif
4154 
4155 #if OMP_50_ENABLED
4156 int __kmpc_get_target_offload(void) {
4157  if (!__kmp_init_serial) {
4158  __kmp_serial_initialize();
4159  }
4160  return __kmp_target_offload;
4161 }
4162 #endif // OMP_50_ENABLED
4163 
4164 // end of file //
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:890
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:224
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1425
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:64
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:234
kmp_int32 flags
Definition: kmp.h:226