LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 
22 #if OMPT_SUPPORT
23 #include "ompt-specific.h"
24 #endif
25 
26 #define MAX_MESSAGE 512
27 
28 // flags will be used in future, e.g. to implement openmp_strict library
29 // restrictions
30 
39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40  // By default __kmpc_begin() is no-op.
41  char *env;
42  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43  __kmp_str_match_true(env)) {
44  __kmp_middle_initialize();
45  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46  } else if (__kmp_ignore_mppbeg() == FALSE) {
47  // By default __kmp_ignore_mppbeg() returns TRUE.
48  __kmp_internal_begin();
49  KC_TRACE(10, ("__kmpc_begin: called\n"));
50  }
51 }
52 
61 void __kmpc_end(ident_t *loc) {
62  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65  // returns FALSE and __kmpc_end() will unregister this root (it can cause
66  // library shut down).
67  if (__kmp_ignore_mppend() == FALSE) {
68  KC_TRACE(10, ("__kmpc_end: called\n"));
69  KA_TRACE(30, ("__kmpc_end\n"));
70 
71  __kmp_internal_end_thread(-1);
72  }
73 }
74 
94  kmp_int32 gtid = __kmp_entry_gtid();
95 
96  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
97 
98  return gtid;
99 }
100 
116  KC_TRACE(10,
117  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
118 
119  return TCR_4(__kmp_all_nth);
120 }
121 
129  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
130  return __kmp_tid_from_gtid(__kmp_entry_gtid());
131 }
132 
139  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
140 
141  return __kmp_entry_thread()->th.th_team->t.t_nproc;
142 }
143 
150 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
151 #ifndef KMP_DEBUG
152 
153  return TRUE;
154 
155 #else
156 
157  const char *semi2;
158  const char *semi3;
159  int line_no;
160 
161  if (__kmp_par_range == 0) {
162  return TRUE;
163  }
164  semi2 = loc->psource;
165  if (semi2 == NULL) {
166  return TRUE;
167  }
168  semi2 = strchr(semi2, ';');
169  if (semi2 == NULL) {
170  return TRUE;
171  }
172  semi2 = strchr(semi2 + 1, ';');
173  if (semi2 == NULL) {
174  return TRUE;
175  }
176  if (__kmp_par_range_filename[0]) {
177  const char *name = semi2 - 1;
178  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
179  name--;
180  }
181  if ((*name == '/') || (*name == ';')) {
182  name++;
183  }
184  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
185  return __kmp_par_range < 0;
186  }
187  }
188  semi3 = strchr(semi2 + 1, ';');
189  if (__kmp_par_range_routine[0]) {
190  if ((semi3 != NULL) && (semi3 > semi2) &&
191  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
192  return __kmp_par_range < 0;
193  }
194  }
195  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
196  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
197  return __kmp_par_range > 0;
198  }
199  return __kmp_par_range < 0;
200  }
201  return TRUE;
202 
203 #endif /* KMP_DEBUG */
204 }
205 
212 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
213  return __kmp_entry_thread()->th.th_root->r.r_active;
214 }
215 
225 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
226  kmp_int32 num_threads) {
227  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
228  global_tid, num_threads));
229 
230  __kmp_push_num_threads(loc, global_tid, num_threads);
231 }
232 
233 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
234  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
235 
236  /* the num_threads are automatically popped */
237 }
238 
239 #if OMP_40_ENABLED
240 
241 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
242  kmp_int32 proc_bind) {
243  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
244  proc_bind));
245 
246  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
247 }
248 
249 #endif /* OMP_40_ENABLED */
250 
261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
262  int gtid = __kmp_entry_gtid();
263 
264 #if (KMP_STATS_ENABLED)
265  // If we were in a serial region, then stop the serial timer, record
266  // the event, and start parallel region timer
267  stats_state_e previous_state = KMP_GET_THREAD_STATE();
268  if (previous_state == stats_state_e::SERIAL_REGION) {
269  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
270  } else {
271  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
272  }
273  int inParallel = __kmpc_in_parallel(loc);
274  if (inParallel) {
275  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
276  } else {
277  KMP_COUNT_BLOCK(OMP_PARALLEL);
278  }
279 #endif
280 
281  // maybe to save thr_state is enough here
282  {
283  va_list ap;
284  va_start(ap, microtask);
285 
286 #if OMPT_SUPPORT
287  omp_frame_t *ompt_frame;
288  if (ompt_enabled.enabled) {
289  kmp_info_t *master_th = __kmp_threads[gtid];
290  kmp_team_t *parent_team = master_th->th.th_team;
291  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
292  if (lwt)
293  ompt_frame = &(lwt->ompt_task_info.frame);
294  else {
295  int tid = __kmp_tid_from_gtid(gtid);
296  ompt_frame = &(
297  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
298  }
299  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
300  OMPT_STORE_RETURN_ADDRESS(gtid);
301  }
302 #endif
303 
304 #if INCLUDE_SSC_MARKS
305  SSC_MARK_FORKING();
306 #endif
307  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
308  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
309  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
310 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
311 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
312  &ap
313 #else
314  ap
315 #endif
316  );
317 #if INCLUDE_SSC_MARKS
318  SSC_MARK_JOINING();
319 #endif
320  __kmp_join_call(loc, gtid
321 #if OMPT_SUPPORT
322  ,
323  fork_context_intel
324 #endif
325  );
326 
327  va_end(ap);
328  }
329 
330 #if KMP_STATS_ENABLED
331  if (previous_state == stats_state_e::SERIAL_REGION) {
332  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
333  } else {
334  KMP_POP_PARTITIONED_TIMER();
335  }
336 #endif // KMP_STATS_ENABLED
337 }
338 
339 #if OMP_40_ENABLED
340 
351 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
352  kmp_int32 num_teams, kmp_int32 num_threads) {
353  KA_TRACE(20,
354  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
355  global_tid, num_teams, num_threads));
356 
357  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
358 }
359 
370 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
371  ...) {
372  int gtid = __kmp_entry_gtid();
373  kmp_info_t *this_thr = __kmp_threads[gtid];
374  va_list ap;
375  va_start(ap, microtask);
376 
377  KMP_COUNT_BLOCK(OMP_TEAMS);
378 
379  // remember teams entry point and nesting level
380  this_thr->th.th_teams_microtask = microtask;
381  this_thr->th.th_teams_level =
382  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
383 
384 #if OMPT_SUPPORT
385  kmp_team_t *parent_team = this_thr->th.th_team;
386  int tid = __kmp_tid_from_gtid(gtid);
387  if (ompt_enabled.enabled) {
388  parent_team->t.t_implicit_task_taskdata[tid]
389  .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
390  }
391  OMPT_STORE_RETURN_ADDRESS(gtid);
392 #endif
393 
394  // check if __kmpc_push_num_teams called, set default number of teams
395  // otherwise
396  if (this_thr->th.th_teams_size.nteams == 0) {
397  __kmp_push_num_teams(loc, gtid, 0, 0);
398  }
399  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
400  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
401  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
402 
403  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
404  VOLATILE_CAST(microtask_t)
405  __kmp_teams_master, // "wrapped" task
406  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
407 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
408  &ap
409 #else
410  ap
411 #endif
412  );
413  __kmp_join_call(loc, gtid
414 #if OMPT_SUPPORT
415  ,
416  fork_context_intel
417 #endif
418  );
419 
420  this_thr->th.th_teams_microtask = NULL;
421  this_thr->th.th_teams_level = 0;
422  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
423  va_end(ap);
424 }
425 #endif /* OMP_40_ENABLED */
426 
427 // I don't think this function should ever have been exported.
428 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
429 // openmp code ever called it, but it's been exported from the RTL for so
430 // long that I'm afraid to remove the definition.
431 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
432 
445 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
446 // The implementation is now in kmp_runtime.cpp so that it can share static
447 // functions with kmp_fork_call since the tasks to be done are similar in
448 // each case.
449 #if OMPT_SUPPORT
450  OMPT_STORE_RETURN_ADDRESS(global_tid);
451 #endif
452  __kmp_serialized_parallel(loc, global_tid);
453 }
454 
462 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
463  kmp_internal_control_t *top;
464  kmp_info_t *this_thr;
465  kmp_team_t *serial_team;
466 
467  KC_TRACE(10,
468  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
469 
470  /* skip all this code for autopar serialized loops since it results in
471  unacceptable overhead */
472  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
473  return;
474 
475  // Not autopar code
476  if (!TCR_4(__kmp_init_parallel))
477  __kmp_parallel_initialize();
478 
479  this_thr = __kmp_threads[global_tid];
480  serial_team = this_thr->th.th_serial_team;
481 
482 #if OMP_45_ENABLED
483  kmp_task_team_t *task_team = this_thr->th.th_task_team;
484 
485  // we need to wait for the proxy tasks before finishing the thread
486  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
487  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
488 #endif
489 
490  KMP_MB();
491  KMP_DEBUG_ASSERT(serial_team);
492  KMP_ASSERT(serial_team->t.t_serialized);
493  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
494  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
495  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
496  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
497 
498 #if OMPT_SUPPORT
499  if (ompt_enabled.enabled &&
500  this_thr->th.ompt_thread_info.state != omp_state_overhead) {
501  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = NULL;
502  if (ompt_enabled.ompt_callback_implicit_task) {
503  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
504  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
505  OMPT_CUR_TASK_INFO(this_thr)->thread_num);
506  }
507 
508  // reset clear the task id only after unlinking the task
509  ompt_data_t *parent_task_data;
510  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
511 
512  if (ompt_enabled.ompt_callback_parallel_end) {
513  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
514  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
515  ompt_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
516  }
517  __ompt_lw_taskteam_unlink(this_thr);
518  this_thr->th.ompt_thread_info.state = omp_state_overhead;
519  }
520 #endif
521 
522  /* If necessary, pop the internal control stack values and replace the team
523  * values */
524  top = serial_team->t.t_control_stack_top;
525  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
526  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
527  serial_team->t.t_control_stack_top = top->next;
528  __kmp_free(top);
529  }
530 
531  // if( serial_team -> t.t_serialized > 1 )
532  serial_team->t.t_level--;
533 
534  /* pop dispatch buffers stack */
535  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
536  {
537  dispatch_private_info_t *disp_buffer =
538  serial_team->t.t_dispatch->th_disp_buffer;
539  serial_team->t.t_dispatch->th_disp_buffer =
540  serial_team->t.t_dispatch->th_disp_buffer->next;
541  __kmp_free(disp_buffer);
542  }
543 
544  --serial_team->t.t_serialized;
545  if (serial_team->t.t_serialized == 0) {
546 
547 /* return to the parallel section */
548 
549 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
550  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
551  __kmp_clear_x87_fpu_status_word();
552  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
553  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
554  }
555 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
556 
557  this_thr->th.th_team = serial_team->t.t_parent;
558  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
559 
560  /* restore values cached in the thread */
561  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
562  this_thr->th.th_team_master =
563  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
564  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
565 
566  /* TODO the below shouldn't need to be adjusted for serialized teams */
567  this_thr->th.th_dispatch =
568  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
569 
570  __kmp_pop_current_task_from_thread(this_thr);
571 
572  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
573  this_thr->th.th_current_task->td_flags.executing = 1;
574 
575  if (__kmp_tasking_mode != tskm_immediate_exec) {
576  // Copy the task team from the new child / old parent team to the thread.
577  this_thr->th.th_task_team =
578  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
579  KA_TRACE(20,
580  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
581  "team %p\n",
582  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
583  }
584  } else {
585  if (__kmp_tasking_mode != tskm_immediate_exec) {
586  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
587  "depth of serial team %p to %d\n",
588  global_tid, serial_team, serial_team->t.t_serialized));
589  }
590  }
591 
592  if (__kmp_env_consistency_check)
593  __kmp_pop_parallel(global_tid, NULL);
594 #if OMPT_SUPPORT
595  if (ompt_enabled.enabled)
596  this_thr->th.ompt_thread_info.state =
597  ((this_thr->th.th_team_serialized) ? omp_state_work_serial
598  : omp_state_work_parallel);
599 #endif
600 }
601 
610 void __kmpc_flush(ident_t *loc) {
611  KC_TRACE(10, ("__kmpc_flush: called\n"));
612 
613  /* need explicit __mf() here since use volatile instead in library */
614  KMP_MB(); /* Flush all pending memory write invalidates. */
615 
616 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
617 #if KMP_MIC
618 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
619 // We shouldn't need it, though, since the ABI rules require that
620 // * If the compiler generates NGO stores it also generates the fence
621 // * If users hand-code NGO stores they should insert the fence
622 // therefore no incomplete unordered stores should be visible.
623 #else
624  // C74404
625  // This is to address non-temporal store instructions (sfence needed).
626  // The clflush instruction is addressed either (mfence needed).
627  // Probably the non-temporal load monvtdqa instruction should also be
628  // addressed.
629  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
630  if (!__kmp_cpuinfo.initialized) {
631  __kmp_query_cpuid(&__kmp_cpuinfo);
632  }
633  if (!__kmp_cpuinfo.sse2) {
634  // CPU cannot execute SSE2 instructions.
635  } else {
636 #if KMP_COMPILER_ICC
637  _mm_mfence();
638 #elif KMP_COMPILER_MSVC
639  MemoryBarrier();
640 #else
641  __sync_synchronize();
642 #endif // KMP_COMPILER_ICC
643  }
644 #endif // KMP_MIC
645 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
646 // Nothing to see here move along
647 #elif KMP_ARCH_PPC64
648 // Nothing needed here (we have a real MB above).
649 #if KMP_OS_CNK
650  // The flushing thread needs to yield here; this prevents a
651  // busy-waiting thread from saturating the pipeline. flush is
652  // often used in loops like this:
653  // while (!flag) {
654  // #pragma omp flush(flag)
655  // }
656  // and adding the yield here is good for at least a 10x speedup
657  // when running >2 threads per core (on the NAS LU benchmark).
658  __kmp_yield(TRUE);
659 #endif
660 #else
661 #error Unknown or unsupported architecture
662 #endif
663 
664 #if OMPT_SUPPORT && OMPT_OPTIONAL
665  if (ompt_enabled.ompt_callback_flush) {
666  ompt_callbacks.ompt_callback(ompt_callback_flush)(
667  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
668  }
669 #endif
670 }
671 
672 /* -------------------------------------------------------------------------- */
680 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
681  KMP_COUNT_BLOCK(OMP_BARRIER);
682  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
683 
684  if (!TCR_4(__kmp_init_parallel))
685  __kmp_parallel_initialize();
686 
687  if (__kmp_env_consistency_check) {
688  if (loc == 0) {
689  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
690  }
691 
692  __kmp_check_barrier(global_tid, ct_barrier, loc);
693  }
694 
695 #if OMPT_SUPPORT
696  omp_frame_t *ompt_frame;
697  if (ompt_enabled.enabled) {
698  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
699  if (ompt_frame->enter_frame == NULL)
700  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
701  OMPT_STORE_RETURN_ADDRESS(global_tid);
702  }
703 #endif
704  __kmp_threads[global_tid]->th.th_ident = loc;
705  // TODO: explicit barrier_wait_id:
706  // this function is called when 'barrier' directive is present or
707  // implicit barrier at the end of a worksharing construct.
708  // 1) better to add a per-thread barrier counter to a thread data structure
709  // 2) set to 0 when a new team is created
710  // 4) no sync is required
711 
712  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
713 #if OMPT_SUPPORT && OMPT_OPTIONAL
714  if (ompt_enabled.enabled) {
715  ompt_frame->enter_frame = NULL;
716  }
717 #endif
718 }
719 
720 /* The BARRIER for a MASTER section is always explicit */
727 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
728  int status = 0;
729 
730  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
731 
732  if (!TCR_4(__kmp_init_parallel))
733  __kmp_parallel_initialize();
734 
735  if (KMP_MASTER_GTID(global_tid)) {
736  KMP_COUNT_BLOCK(OMP_MASTER);
737  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
738  status = 1;
739  }
740 
741 #if OMPT_SUPPORT && OMPT_OPTIONAL
742  if (status) {
743  if (ompt_enabled.ompt_callback_master) {
744  kmp_info_t *this_thr = __kmp_threads[global_tid];
745  kmp_team_t *team = this_thr->th.th_team;
746 
747  int tid = __kmp_tid_from_gtid(global_tid);
748  ompt_callbacks.ompt_callback(ompt_callback_master)(
749  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
750  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
751  OMPT_GET_RETURN_ADDRESS(0));
752  }
753  }
754 #endif
755 
756  if (__kmp_env_consistency_check) {
757 #if KMP_USE_DYNAMIC_LOCK
758  if (status)
759  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
760  else
761  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
762 #else
763  if (status)
764  __kmp_push_sync(global_tid, ct_master, loc, NULL);
765  else
766  __kmp_check_sync(global_tid, ct_master, loc, NULL);
767 #endif
768  }
769 
770  return status;
771 }
772 
781 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
782  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
783 
784  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
785  KMP_POP_PARTITIONED_TIMER();
786 
787 #if OMPT_SUPPORT && OMPT_OPTIONAL
788  kmp_info_t *this_thr = __kmp_threads[global_tid];
789  kmp_team_t *team = this_thr->th.th_team;
790  if (ompt_enabled.ompt_callback_master) {
791  int tid = __kmp_tid_from_gtid(global_tid);
792  ompt_callbacks.ompt_callback(ompt_callback_master)(
793  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
794  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
795  OMPT_GET_RETURN_ADDRESS(0));
796  }
797 #endif
798 
799  if (__kmp_env_consistency_check) {
800  if (global_tid < 0)
801  KMP_WARNING(ThreadIdentInvalid);
802 
803  if (KMP_MASTER_GTID(global_tid))
804  __kmp_pop_sync(global_tid, ct_master, loc);
805  }
806 }
807 
815 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
816  int cid = 0;
817  kmp_info_t *th;
818  KMP_DEBUG_ASSERT(__kmp_init_serial);
819 
820  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
821 
822  if (!TCR_4(__kmp_init_parallel))
823  __kmp_parallel_initialize();
824 
825 #if USE_ITT_BUILD
826  __kmp_itt_ordered_prep(gtid);
827 // TODO: ordered_wait_id
828 #endif /* USE_ITT_BUILD */
829 
830  th = __kmp_threads[gtid];
831 
832 #if OMPT_SUPPORT && OMPT_OPTIONAL
833  kmp_team_t *team;
834  omp_wait_id_t lck;
835  void *codeptr_ra;
836  if (ompt_enabled.enabled) {
837  OMPT_STORE_RETURN_ADDRESS(gtid);
838  team = __kmp_team_from_gtid(gtid);
839  lck = (omp_wait_id_t)&team->t.t_ordered.dt.t_value;
840  /* OMPT state update */
841  th->th.ompt_thread_info.wait_id = lck;
842  th->th.ompt_thread_info.state = omp_state_wait_ordered;
843 
844  /* OMPT event callback */
845  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
846  if (ompt_enabled.ompt_callback_mutex_acquire) {
847  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
848  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
849  (omp_wait_id_t)lck, codeptr_ra);
850  }
851  }
852 #endif
853 
854  if (th->th.th_dispatch->th_deo_fcn != 0)
855  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
856  else
857  __kmp_parallel_deo(&gtid, &cid, loc);
858 
859 #if OMPT_SUPPORT && OMPT_OPTIONAL
860  if (ompt_enabled.enabled) {
861  /* OMPT state update */
862  th->th.ompt_thread_info.state = omp_state_work_parallel;
863  th->th.ompt_thread_info.wait_id = 0;
864 
865  /* OMPT event callback */
866  if (ompt_enabled.ompt_callback_mutex_acquired) {
867  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
868  ompt_mutex_ordered, (omp_wait_id_t)lck, codeptr_ra);
869  }
870  }
871 #endif
872 
873 #if USE_ITT_BUILD
874  __kmp_itt_ordered_start(gtid);
875 #endif /* USE_ITT_BUILD */
876 }
877 
885 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
886  int cid = 0;
887  kmp_info_t *th;
888 
889  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
890 
891 #if USE_ITT_BUILD
892  __kmp_itt_ordered_end(gtid);
893 // TODO: ordered_wait_id
894 #endif /* USE_ITT_BUILD */
895 
896  th = __kmp_threads[gtid];
897 
898  if (th->th.th_dispatch->th_dxo_fcn != 0)
899  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
900  else
901  __kmp_parallel_dxo(&gtid, &cid, loc);
902 
903 #if OMPT_SUPPORT && OMPT_OPTIONAL
904  OMPT_STORE_RETURN_ADDRESS(gtid);
905  if (ompt_enabled.ompt_callback_mutex_released) {
906  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
907  ompt_mutex_ordered,
908  (omp_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
909  OMPT_LOAD_RETURN_ADDRESS(gtid));
910  }
911 #endif
912 }
913 
914 #if KMP_USE_DYNAMIC_LOCK
915 
916 static __forceinline void
917 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
918  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
919  // Pointer to the allocated indirect lock is written to crit, while indexing
920  // is ignored.
921  void *idx;
922  kmp_indirect_lock_t **lck;
923  lck = (kmp_indirect_lock_t **)crit;
924  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
925  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
926  KMP_SET_I_LOCK_LOCATION(ilk, loc);
927  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
928  KA_TRACE(20,
929  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
930 #if USE_ITT_BUILD
931  __kmp_itt_critical_creating(ilk->lock, loc);
932 #endif
933  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
934  if (status == 0) {
935 #if USE_ITT_BUILD
936  __kmp_itt_critical_destroyed(ilk->lock);
937 #endif
938  // We don't really need to destroy the unclaimed lock here since it will be
939  // cleaned up at program exit.
940  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
941  }
942  KMP_DEBUG_ASSERT(*lck != NULL);
943 }
944 
945 // Fast-path acquire tas lock
946 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
947  { \
948  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
949  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
950  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
951  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
952  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
953  kmp_uint32 spins; \
954  KMP_FSYNC_PREPARE(l); \
955  KMP_INIT_YIELD(spins); \
956  if (TCR_4(__kmp_nth) > \
957  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
958  KMP_YIELD(TRUE); \
959  } else { \
960  KMP_YIELD_SPIN(spins); \
961  } \
962  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
963  while ( \
964  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
965  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
966  __kmp_spin_backoff(&backoff); \
967  if (TCR_4(__kmp_nth) > \
968  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
969  KMP_YIELD(TRUE); \
970  } else { \
971  KMP_YIELD_SPIN(spins); \
972  } \
973  } \
974  } \
975  KMP_FSYNC_ACQUIRED(l); \
976  }
977 
978 // Fast-path test tas lock
979 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
980  { \
981  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
982  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
983  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
984  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
985  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
986  }
987 
988 // Fast-path release tas lock
989 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
990  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
991 
992 #if KMP_USE_FUTEX
993 
994 #include <sys/syscall.h>
995 #include <unistd.h>
996 #ifndef FUTEX_WAIT
997 #define FUTEX_WAIT 0
998 #endif
999 #ifndef FUTEX_WAKE
1000 #define FUTEX_WAKE 1
1001 #endif
1002 
1003 // Fast-path acquire futex lock
1004 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1005  { \
1006  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1007  kmp_int32 gtid_code = (gtid + 1) << 1; \
1008  KMP_MB(); \
1009  KMP_FSYNC_PREPARE(ftx); \
1010  kmp_int32 poll_val; \
1011  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1012  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1013  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1014  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1015  if (!cond) { \
1016  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1017  poll_val | \
1018  KMP_LOCK_BUSY(1, futex))) { \
1019  continue; \
1020  } \
1021  poll_val |= KMP_LOCK_BUSY(1, futex); \
1022  } \
1023  kmp_int32 rc; \
1024  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1025  NULL, NULL, 0)) != 0) { \
1026  continue; \
1027  } \
1028  gtid_code |= 1; \
1029  } \
1030  KMP_FSYNC_ACQUIRED(ftx); \
1031  }
1032 
1033 // Fast-path test futex lock
1034 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1035  { \
1036  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1037  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1038  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1039  KMP_FSYNC_ACQUIRED(ftx); \
1040  rc = TRUE; \
1041  } else { \
1042  rc = FALSE; \
1043  } \
1044  }
1045 
1046 // Fast-path release futex lock
1047 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1048  { \
1049  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1050  KMP_MB(); \
1051  KMP_FSYNC_RELEASING(ftx); \
1052  kmp_int32 poll_val = \
1053  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1054  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1055  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1056  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1057  } \
1058  KMP_MB(); \
1059  KMP_YIELD(TCR_4(__kmp_nth) > \
1060  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1061  }
1062 
1063 #endif // KMP_USE_FUTEX
1064 
1065 #else // KMP_USE_DYNAMIC_LOCK
1066 
1067 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1068  ident_t const *loc,
1069  kmp_int32 gtid) {
1070  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1071 
1072  // Because of the double-check, the following load doesn't need to be volatile
1073  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1074 
1075  if (lck == NULL) {
1076  void *idx;
1077 
1078  // Allocate & initialize the lock.
1079  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1080  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1081  __kmp_init_user_lock_with_checks(lck);
1082  __kmp_set_user_lock_location(lck, loc);
1083 #if USE_ITT_BUILD
1084  __kmp_itt_critical_creating(lck);
1085 // __kmp_itt_critical_creating() should be called *before* the first usage
1086 // of underlying lock. It is the only place where we can guarantee it. There
1087 // are chances the lock will destroyed with no usage, but it is not a
1088 // problem, because this is not real event seen by user but rather setting
1089 // name for object (lock). See more details in kmp_itt.h.
1090 #endif /* USE_ITT_BUILD */
1091 
1092  // Use a cmpxchg instruction to slam the start of the critical section with
1093  // the lock pointer. If another thread beat us to it, deallocate the lock,
1094  // and use the lock that the other thread allocated.
1095  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1096 
1097  if (status == 0) {
1098 // Deallocate the lock and reload the value.
1099 #if USE_ITT_BUILD
1100  __kmp_itt_critical_destroyed(lck);
1101 // Let ITT know the lock is destroyed and the same memory location may be reused
1102 // for another purpose.
1103 #endif /* USE_ITT_BUILD */
1104  __kmp_destroy_user_lock_with_checks(lck);
1105  __kmp_user_lock_free(&idx, gtid, lck);
1106  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1107  KMP_DEBUG_ASSERT(lck != NULL);
1108  }
1109  }
1110  return lck;
1111 }
1112 
1113 #endif // KMP_USE_DYNAMIC_LOCK
1114 
1125 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1126  kmp_critical_name *crit) {
1127 #if KMP_USE_DYNAMIC_LOCK
1128 #if OMPT_SUPPORT && OMPT_OPTIONAL
1129  OMPT_STORE_RETURN_ADDRESS(global_tid);
1130 #endif // OMPT_SUPPORT
1131  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1132 #else
1133  KMP_COUNT_BLOCK(OMP_CRITICAL);
1134 #if OMPT_SUPPORT && OMPT_OPTIONAL
1135  omp_state_t prev_state = omp_state_undefined;
1136  ompt_thread_info_t ti;
1137 #endif
1138  kmp_user_lock_p lck;
1139 
1140  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1141 
1142  // TODO: add THR_OVHD_STATE
1143 
1144  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1145  KMP_CHECK_USER_LOCK_INIT();
1146 
1147  if ((__kmp_user_lock_kind == lk_tas) &&
1148  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1149  lck = (kmp_user_lock_p)crit;
1150  }
1151 #if KMP_USE_FUTEX
1152  else if ((__kmp_user_lock_kind == lk_futex) &&
1153  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1154  lck = (kmp_user_lock_p)crit;
1155  }
1156 #endif
1157  else { // ticket, queuing or drdpa
1158  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1159  }
1160 
1161  if (__kmp_env_consistency_check)
1162  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1163 
1164 // since the critical directive binds to all threads, not just the current
1165 // team we have to check this even if we are in a serialized team.
1166 // also, even if we are the uber thread, we still have to conduct the lock,
1167 // as we have to contend with sibling threads.
1168 
1169 #if USE_ITT_BUILD
1170  __kmp_itt_critical_acquiring(lck);
1171 #endif /* USE_ITT_BUILD */
1172 #if OMPT_SUPPORT && OMPT_OPTIONAL
1173  OMPT_STORE_RETURN_ADDRESS(gtid);
1174  void *codeptr_ra = NULL;
1175  if (ompt_enabled.enabled) {
1176  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1177  /* OMPT state update */
1178  prev_state = ti.state;
1179  ti.wait_id = (omp_wait_id_t)lck;
1180  ti.state = omp_state_wait_critical;
1181 
1182  /* OMPT event callback */
1183  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1184  if (ompt_enabled.ompt_callback_mutex_acquire) {
1185  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1186  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1187  (omp_wait_id_t)crit, codeptr_ra);
1188  }
1189  }
1190 #endif
1191  // Value of 'crit' should be good for using as a critical_id of the critical
1192  // section directive.
1193  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1194 
1195 #if USE_ITT_BUILD
1196  __kmp_itt_critical_acquired(lck);
1197 #endif /* USE_ITT_BUILD */
1198 #if OMPT_SUPPORT && OMPT_OPTIONAL
1199  if (ompt_enabled.enabled) {
1200  /* OMPT state update */
1201  ti.state = prev_state;
1202  ti.wait_id = 0;
1203 
1204  /* OMPT event callback */
1205  if (ompt_enabled.ompt_callback_mutex_acquired) {
1206  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1207  ompt_mutex_critical, (omp_wait_id_t)crit, codeptr_ra);
1208  }
1209  }
1210 #endif
1211  KMP_POP_PARTITIONED_TIMER();
1212 
1213  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1214  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1215 #endif // KMP_USE_DYNAMIC_LOCK
1216 }
1217 
1218 #if KMP_USE_DYNAMIC_LOCK
1219 
1220 // Converts the given hint to an internal lock implementation
1221 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1222 #if KMP_USE_TSX
1223 #define KMP_TSX_LOCK(seq) lockseq_##seq
1224 #else
1225 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1226 #endif
1227 
1228 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1229 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1230 #else
1231 #define KMP_CPUINFO_RTM 0
1232 #endif
1233 
1234  // Hints that do not require further logic
1235  if (hint & kmp_lock_hint_hle)
1236  return KMP_TSX_LOCK(hle);
1237  if (hint & kmp_lock_hint_rtm)
1238  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1239  if (hint & kmp_lock_hint_adaptive)
1240  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1241 
1242  // Rule out conflicting hints first by returning the default lock
1243  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1244  return __kmp_user_lock_seq;
1245  if ((hint & omp_lock_hint_speculative) &&
1246  (hint & omp_lock_hint_nonspeculative))
1247  return __kmp_user_lock_seq;
1248 
1249  // Do not even consider speculation when it appears to be contended
1250  if (hint & omp_lock_hint_contended)
1251  return lockseq_queuing;
1252 
1253  // Uncontended lock without speculation
1254  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1255  return lockseq_tas;
1256 
1257  // HLE lock for speculation
1258  if (hint & omp_lock_hint_speculative)
1259  return KMP_TSX_LOCK(hle);
1260 
1261  return __kmp_user_lock_seq;
1262 }
1263 
1264 #if OMPT_SUPPORT && OMPT_OPTIONAL
1265 static kmp_mutex_impl_t
1266 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1267  if (user_lock) {
1268  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1269  case 0:
1270  break;
1271 #if KMP_USE_FUTEX
1272  case locktag_futex:
1273  return kmp_mutex_impl_queuing;
1274 #endif
1275  case locktag_tas:
1276  return kmp_mutex_impl_spin;
1277 #if KMP_USE_TSX
1278  case locktag_hle:
1279  return kmp_mutex_impl_speculative;
1280 #endif
1281  default:
1282  return ompt_mutex_impl_unknown;
1283  }
1284  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1285  }
1286  KMP_ASSERT(ilock);
1287  switch (ilock->type) {
1288 #if KMP_USE_TSX
1289  case locktag_adaptive:
1290  case locktag_rtm:
1291  return kmp_mutex_impl_speculative;
1292 #endif
1293  case locktag_nested_tas:
1294  return kmp_mutex_impl_spin;
1295 #if KMP_USE_FUTEX
1296  case locktag_nested_futex:
1297 #endif
1298  case locktag_ticket:
1299  case locktag_queuing:
1300  case locktag_drdpa:
1301  case locktag_nested_ticket:
1302  case locktag_nested_queuing:
1303  case locktag_nested_drdpa:
1304  return kmp_mutex_impl_queuing;
1305  default:
1306  return ompt_mutex_impl_unknown;
1307  }
1308 }
1309 
1310 // For locks without dynamic binding
1311 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1312  switch (__kmp_user_lock_kind) {
1313  case lk_tas:
1314  return kmp_mutex_impl_spin;
1315 #if KMP_USE_FUTEX
1316  case lk_futex:
1317 #endif
1318  case lk_ticket:
1319  case lk_queuing:
1320  case lk_drdpa:
1321  return kmp_mutex_impl_queuing;
1322 #if KMP_USE_TSX
1323  case lk_hle:
1324  case lk_rtm:
1325  case lk_adaptive:
1326  return kmp_mutex_impl_speculative;
1327 #endif
1328  default:
1329  return ompt_mutex_impl_unknown;
1330  }
1331 }
1332 #endif
1333 
1347 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1348  kmp_critical_name *crit, uintptr_t hint) {
1349  KMP_COUNT_BLOCK(OMP_CRITICAL);
1350  kmp_user_lock_p lck;
1351 #if OMPT_SUPPORT && OMPT_OPTIONAL
1352  omp_state_t prev_state = omp_state_undefined;
1353  ompt_thread_info_t ti;
1354  // This is the case, if called from __kmpc_critical:
1355  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1356  if (!codeptr)
1357  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1358 #endif
1359 
1360  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1361 
1362  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1363  // Check if it is initialized.
1364  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1365  if (*lk == 0) {
1366  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1367  if (KMP_IS_D_LOCK(lckseq)) {
1368  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1369  KMP_GET_D_TAG(lckseq));
1370  } else {
1371  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1372  }
1373  }
1374  // Branch for accessing the actual lock object and set operation. This
1375  // branching is inevitable since this lock initialization does not follow the
1376  // normal dispatch path (lock table is not used).
1377  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1378  lck = (kmp_user_lock_p)lk;
1379  if (__kmp_env_consistency_check) {
1380  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1381  __kmp_map_hint_to_lock(hint));
1382  }
1383 #if USE_ITT_BUILD
1384  __kmp_itt_critical_acquiring(lck);
1385 #endif
1386 #if OMPT_SUPPORT && OMPT_OPTIONAL
1387  if (ompt_enabled.enabled) {
1388  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1389  /* OMPT state update */
1390  prev_state = ti.state;
1391  ti.wait_id = (omp_wait_id_t)lck;
1392  ti.state = omp_state_wait_critical;
1393 
1394  /* OMPT event callback */
1395  if (ompt_enabled.ompt_callback_mutex_acquire) {
1396  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1397  ompt_mutex_critical, (unsigned int)hint,
1398  __ompt_get_mutex_impl_type(crit), (omp_wait_id_t)crit, codeptr);
1399  }
1400  }
1401 #endif
1402 #if KMP_USE_INLINED_TAS
1403  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1404  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1405  } else
1406 #elif KMP_USE_INLINED_FUTEX
1407  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1408  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1409  } else
1410 #endif
1411  {
1412  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1413  }
1414  } else {
1415  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1416  lck = ilk->lock;
1417  if (__kmp_env_consistency_check) {
1418  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1419  __kmp_map_hint_to_lock(hint));
1420  }
1421 #if USE_ITT_BUILD
1422  __kmp_itt_critical_acquiring(lck);
1423 #endif
1424 #if OMPT_SUPPORT && OMPT_OPTIONAL
1425  if (ompt_enabled.enabled) {
1426  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1427  /* OMPT state update */
1428  prev_state = ti.state;
1429  ti.wait_id = (omp_wait_id_t)lck;
1430  ti.state = omp_state_wait_critical;
1431 
1432  /* OMPT event callback */
1433  if (ompt_enabled.ompt_callback_mutex_acquire) {
1434  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1435  ompt_mutex_critical, (unsigned int)hint,
1436  __ompt_get_mutex_impl_type(0, ilk), (omp_wait_id_t)crit, codeptr);
1437  }
1438  }
1439 #endif
1440  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1441  }
1442  KMP_POP_PARTITIONED_TIMER();
1443 
1444 #if USE_ITT_BUILD
1445  __kmp_itt_critical_acquired(lck);
1446 #endif /* USE_ITT_BUILD */
1447 #if OMPT_SUPPORT && OMPT_OPTIONAL
1448  if (ompt_enabled.enabled) {
1449  /* OMPT state update */
1450  ti.state = prev_state;
1451  ti.wait_id = 0;
1452 
1453  /* OMPT event callback */
1454  if (ompt_enabled.ompt_callback_mutex_acquired) {
1455  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1456  ompt_mutex_critical, (omp_wait_id_t)crit, codeptr);
1457  }
1458  }
1459 #endif
1460 
1461  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1462  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1463 } // __kmpc_critical_with_hint
1464 
1465 #endif // KMP_USE_DYNAMIC_LOCK
1466 
1476 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1477  kmp_critical_name *crit) {
1478  kmp_user_lock_p lck;
1479 
1480  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1481 
1482 #if KMP_USE_DYNAMIC_LOCK
1483  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1484  lck = (kmp_user_lock_p)crit;
1485  KMP_ASSERT(lck != NULL);
1486  if (__kmp_env_consistency_check) {
1487  __kmp_pop_sync(global_tid, ct_critical, loc);
1488  }
1489 #if USE_ITT_BUILD
1490  __kmp_itt_critical_releasing(lck);
1491 #endif
1492 #if KMP_USE_INLINED_TAS
1493  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1494  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1495  } else
1496 #elif KMP_USE_INLINED_FUTEX
1497  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1498  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1499  } else
1500 #endif
1501  {
1502  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1503  }
1504  } else {
1505  kmp_indirect_lock_t *ilk =
1506  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1507  KMP_ASSERT(ilk != NULL);
1508  lck = ilk->lock;
1509  if (__kmp_env_consistency_check) {
1510  __kmp_pop_sync(global_tid, ct_critical, loc);
1511  }
1512 #if USE_ITT_BUILD
1513  __kmp_itt_critical_releasing(lck);
1514 #endif
1515  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1516  }
1517 
1518 #else // KMP_USE_DYNAMIC_LOCK
1519 
1520  if ((__kmp_user_lock_kind == lk_tas) &&
1521  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1522  lck = (kmp_user_lock_p)crit;
1523  }
1524 #if KMP_USE_FUTEX
1525  else if ((__kmp_user_lock_kind == lk_futex) &&
1526  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1527  lck = (kmp_user_lock_p)crit;
1528  }
1529 #endif
1530  else { // ticket, queuing or drdpa
1531  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1532  }
1533 
1534  KMP_ASSERT(lck != NULL);
1535 
1536  if (__kmp_env_consistency_check)
1537  __kmp_pop_sync(global_tid, ct_critical, loc);
1538 
1539 #if USE_ITT_BUILD
1540  __kmp_itt_critical_releasing(lck);
1541 #endif /* USE_ITT_BUILD */
1542  // Value of 'crit' should be good for using as a critical_id of the critical
1543  // section directive.
1544  __kmp_release_user_lock_with_checks(lck, global_tid);
1545 
1546 #endif // KMP_USE_DYNAMIC_LOCK
1547 
1548 #if OMPT_SUPPORT && OMPT_OPTIONAL
1549  /* OMPT release event triggers after lock is released; place here to trigger
1550  * for all #if branches */
1551  OMPT_STORE_RETURN_ADDRESS(global_tid);
1552  if (ompt_enabled.ompt_callback_mutex_released) {
1553  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1554  ompt_mutex_critical, (omp_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
1555  }
1556 #endif
1557 
1558  KMP_POP_PARTITIONED_TIMER();
1559  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1560 }
1561 
1571 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1572  int status;
1573 
1574  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1575 
1576  if (!TCR_4(__kmp_init_parallel))
1577  __kmp_parallel_initialize();
1578 
1579  if (__kmp_env_consistency_check)
1580  __kmp_check_barrier(global_tid, ct_barrier, loc);
1581 
1582 #if OMPT_SUPPORT
1583  omp_frame_t *ompt_frame;
1584  if (ompt_enabled.enabled) {
1585  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1586  if (ompt_frame->enter_frame == NULL)
1587  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1588  OMPT_STORE_RETURN_ADDRESS(global_tid);
1589  }
1590 #endif
1591 #if USE_ITT_NOTIFY
1592  __kmp_threads[global_tid]->th.th_ident = loc;
1593 #endif
1594  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1595 #if OMPT_SUPPORT && OMPT_OPTIONAL
1596  if (ompt_enabled.enabled) {
1597  ompt_frame->enter_frame = NULL;
1598  }
1599 #endif
1600 
1601  return (status != 0) ? 0 : 1;
1602 }
1603 
1613 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1614  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1615 
1616  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1617 }
1618 
1629 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1630  kmp_int32 ret;
1631 
1632  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1633 
1634  if (!TCR_4(__kmp_init_parallel))
1635  __kmp_parallel_initialize();
1636 
1637  if (__kmp_env_consistency_check) {
1638  if (loc == 0) {
1639  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1640  }
1641  __kmp_check_barrier(global_tid, ct_barrier, loc);
1642  }
1643 
1644 #if OMPT_SUPPORT
1645  omp_frame_t *ompt_frame;
1646  if (ompt_enabled.enabled) {
1647  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1648  if (ompt_frame->enter_frame == NULL)
1649  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1650  OMPT_STORE_RETURN_ADDRESS(global_tid);
1651  }
1652 #endif
1653 #if USE_ITT_NOTIFY
1654  __kmp_threads[global_tid]->th.th_ident = loc;
1655 #endif
1656  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1657 #if OMPT_SUPPORT && OMPT_OPTIONAL
1658  if (ompt_enabled.enabled) {
1659  ompt_frame->enter_frame = NULL;
1660  }
1661 #endif
1662 
1663  ret = __kmpc_master(loc, global_tid);
1664 
1665  if (__kmp_env_consistency_check) {
1666  /* there's no __kmpc_end_master called; so the (stats) */
1667  /* actions of __kmpc_end_master are done here */
1668 
1669  if (global_tid < 0) {
1670  KMP_WARNING(ThreadIdentInvalid);
1671  }
1672  if (ret) {
1673  /* only one thread should do the pop since only */
1674  /* one did the push (see __kmpc_master()) */
1675 
1676  __kmp_pop_sync(global_tid, ct_master, loc);
1677  }
1678  }
1679 
1680  return (ret);
1681 }
1682 
1683 /* The BARRIER for a SINGLE process section is always explicit */
1695 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1696  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1697 
1698  if (rc) {
1699  // We are going to execute the single statement, so we should count it.
1700  KMP_COUNT_BLOCK(OMP_SINGLE);
1701  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1702  }
1703 
1704 #if OMPT_SUPPORT && OMPT_OPTIONAL
1705  kmp_info_t *this_thr = __kmp_threads[global_tid];
1706  kmp_team_t *team = this_thr->th.th_team;
1707  int tid = __kmp_tid_from_gtid(global_tid);
1708 
1709  if (ompt_enabled.enabled) {
1710  if (rc) {
1711  if (ompt_enabled.ompt_callback_work) {
1712  ompt_callbacks.ompt_callback(ompt_callback_work)(
1713  ompt_work_single_executor, ompt_scope_begin,
1714  &(team->t.ompt_team_info.parallel_data),
1715  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1716  1, OMPT_GET_RETURN_ADDRESS(0));
1717  }
1718  } else {
1719  if (ompt_enabled.ompt_callback_work) {
1720  ompt_callbacks.ompt_callback(ompt_callback_work)(
1721  ompt_work_single_other, ompt_scope_begin,
1722  &(team->t.ompt_team_info.parallel_data),
1723  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1724  1, OMPT_GET_RETURN_ADDRESS(0));
1725  ompt_callbacks.ompt_callback(ompt_callback_work)(
1726  ompt_work_single_other, ompt_scope_end,
1727  &(team->t.ompt_team_info.parallel_data),
1728  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1729  1, OMPT_GET_RETURN_ADDRESS(0));
1730  }
1731  }
1732  }
1733 #endif
1734 
1735  return rc;
1736 }
1737 
1747 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1748  __kmp_exit_single(global_tid);
1749  KMP_POP_PARTITIONED_TIMER();
1750 
1751 #if OMPT_SUPPORT && OMPT_OPTIONAL
1752  kmp_info_t *this_thr = __kmp_threads[global_tid];
1753  kmp_team_t *team = this_thr->th.th_team;
1754  int tid = __kmp_tid_from_gtid(global_tid);
1755 
1756  if (ompt_enabled.ompt_callback_work) {
1757  ompt_callbacks.ompt_callback(ompt_callback_work)(
1758  ompt_work_single_executor, ompt_scope_end,
1759  &(team->t.ompt_team_info.parallel_data),
1760  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1761  OMPT_GET_RETURN_ADDRESS(0));
1762  }
1763 #endif
1764 }
1765 
1773 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1774  KMP_POP_PARTITIONED_TIMER();
1775  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1776 
1777 #if OMPT_SUPPORT && OMPT_OPTIONAL
1778  if (ompt_enabled.ompt_callback_work) {
1779  ompt_work_type_t ompt_work_type = ompt_work_loop;
1780  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1781  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1782  // Determine workshare type
1783  if (loc != NULL) {
1784  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1785  ompt_work_type = ompt_work_loop;
1786  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1787  ompt_work_type = ompt_work_sections;
1788  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1789  ompt_work_type = ompt_work_distribute;
1790  } else {
1791  // use default set above.
1792  // a warning about this case is provided in __kmpc_for_static_init
1793  }
1794  KMP_DEBUG_ASSERT(ompt_work_type);
1795  }
1796  ompt_callbacks.ompt_callback(ompt_callback_work)(
1797  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1798  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1799  }
1800 #endif
1801  if (__kmp_env_consistency_check)
1802  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1803 }
1804 
1805 // User routines which take C-style arguments (call by value)
1806 // different from the Fortran equivalent routines
1807 
1808 void ompc_set_num_threads(int arg) {
1809  // !!!!! TODO: check the per-task binding
1810  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1811 }
1812 
1813 void ompc_set_dynamic(int flag) {
1814  kmp_info_t *thread;
1815 
1816  /* For the thread-private implementation of the internal controls */
1817  thread = __kmp_entry_thread();
1818 
1819  __kmp_save_internal_controls(thread);
1820 
1821  set__dynamic(thread, flag ? TRUE : FALSE);
1822 }
1823 
1824 void ompc_set_nested(int flag) {
1825  kmp_info_t *thread;
1826 
1827  /* For the thread-private internal controls implementation */
1828  thread = __kmp_entry_thread();
1829 
1830  __kmp_save_internal_controls(thread);
1831 
1832  set__nested(thread, flag ? TRUE : FALSE);
1833 }
1834 
1835 void ompc_set_max_active_levels(int max_active_levels) {
1836  /* TO DO */
1837  /* we want per-task implementation of this internal control */
1838 
1839  /* For the per-thread internal controls implementation */
1840  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1841 }
1842 
1843 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1844  // !!!!! TODO: check the per-task binding
1845  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1846 }
1847 
1848 int ompc_get_ancestor_thread_num(int level) {
1849  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1850 }
1851 
1852 int ompc_get_team_size(int level) {
1853  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1854 }
1855 
1856 void kmpc_set_stacksize(int arg) {
1857  // __kmp_aux_set_stacksize initializes the library if needed
1858  __kmp_aux_set_stacksize(arg);
1859 }
1860 
1861 void kmpc_set_stacksize_s(size_t arg) {
1862  // __kmp_aux_set_stacksize initializes the library if needed
1863  __kmp_aux_set_stacksize(arg);
1864 }
1865 
1866 void kmpc_set_blocktime(int arg) {
1867  int gtid, tid;
1868  kmp_info_t *thread;
1869 
1870  gtid = __kmp_entry_gtid();
1871  tid = __kmp_tid_from_gtid(gtid);
1872  thread = __kmp_thread_from_gtid(gtid);
1873 
1874  __kmp_aux_set_blocktime(arg, thread, tid);
1875 }
1876 
1877 void kmpc_set_library(int arg) {
1878  // __kmp_user_set_library initializes the library if needed
1879  __kmp_user_set_library((enum library_type)arg);
1880 }
1881 
1882 void kmpc_set_defaults(char const *str) {
1883  // __kmp_aux_set_defaults initializes the library if needed
1884  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1885 }
1886 
1887 void kmpc_set_disp_num_buffers(int arg) {
1888  // ignore after initialization because some teams have already
1889  // allocated dispatch buffers
1890  if (__kmp_init_serial == 0 && arg > 0)
1891  __kmp_dispatch_num_buffers = arg;
1892 }
1893 
1894 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1895 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1896  return -1;
1897 #else
1898  if (!TCR_4(__kmp_init_middle)) {
1899  __kmp_middle_initialize();
1900  }
1901  return __kmp_aux_set_affinity_mask_proc(proc, mask);
1902 #endif
1903 }
1904 
1905 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1906 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1907  return -1;
1908 #else
1909  if (!TCR_4(__kmp_init_middle)) {
1910  __kmp_middle_initialize();
1911  }
1912  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1913 #endif
1914 }
1915 
1916 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
1917 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1918  return -1;
1919 #else
1920  if (!TCR_4(__kmp_init_middle)) {
1921  __kmp_middle_initialize();
1922  }
1923  return __kmp_aux_get_affinity_mask_proc(proc, mask);
1924 #endif
1925 }
1926 
1927 /* -------------------------------------------------------------------------- */
1972 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
1973  void *cpy_data, void (*cpy_func)(void *, void *),
1974  kmp_int32 didit) {
1975  void **data_ptr;
1976 
1977  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
1978 
1979  KMP_MB();
1980 
1981  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
1982 
1983  if (__kmp_env_consistency_check) {
1984  if (loc == 0) {
1985  KMP_WARNING(ConstructIdentInvalid);
1986  }
1987  }
1988 
1989  // ToDo: Optimize the following two barriers into some kind of split barrier
1990 
1991  if (didit)
1992  *data_ptr = cpy_data;
1993 
1994 #if OMPT_SUPPORT
1995  omp_frame_t *ompt_frame;
1996  if (ompt_enabled.enabled) {
1997  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1998  if (ompt_frame->enter_frame == NULL)
1999  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
2000  OMPT_STORE_RETURN_ADDRESS(gtid);
2001  }
2002 #endif
2003 /* This barrier is not a barrier region boundary */
2004 #if USE_ITT_NOTIFY
2005  __kmp_threads[gtid]->th.th_ident = loc;
2006 #endif
2007  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2008 
2009  if (!didit)
2010  (*cpy_func)(cpy_data, *data_ptr);
2011 
2012 // Consider next barrier a user-visible barrier for barrier region boundaries
2013 // Nesting checks are already handled by the single construct checks
2014 
2015 #if OMPT_SUPPORT
2016  if (ompt_enabled.enabled) {
2017  OMPT_STORE_RETURN_ADDRESS(gtid);
2018  }
2019 #endif
2020 #if USE_ITT_NOTIFY
2021  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2022 // tasks can overwrite the location)
2023 #endif
2024  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2025 #if OMPT_SUPPORT && OMPT_OPTIONAL
2026  if (ompt_enabled.enabled) {
2027  ompt_frame->enter_frame = NULL;
2028  }
2029 #endif
2030 }
2031 
2032 /* -------------------------------------------------------------------------- */
2033 
2034 #define INIT_LOCK __kmp_init_user_lock_with_checks
2035 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2036 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2037 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2038 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2039 #define ACQUIRE_NESTED_LOCK_TIMED \
2040  __kmp_acquire_nested_user_lock_with_checks_timed
2041 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2042 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2043 #define TEST_LOCK __kmp_test_user_lock_with_checks
2044 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2045 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2046 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2047 
2048 // TODO: Make check abort messages use location info & pass it into
2049 // with_checks routines
2050 
2051 #if KMP_USE_DYNAMIC_LOCK
2052 
2053 // internal lock initializer
2054 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2055  kmp_dyna_lockseq_t seq) {
2056  if (KMP_IS_D_LOCK(seq)) {
2057  KMP_INIT_D_LOCK(lock, seq);
2058 #if USE_ITT_BUILD
2059  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2060 #endif
2061  } else {
2062  KMP_INIT_I_LOCK(lock, seq);
2063 #if USE_ITT_BUILD
2064  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2065  __kmp_itt_lock_creating(ilk->lock, loc);
2066 #endif
2067  }
2068 }
2069 
2070 // internal nest lock initializer
2071 static __forceinline void
2072 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2073  kmp_dyna_lockseq_t seq) {
2074 #if KMP_USE_TSX
2075  // Don't have nested lock implementation for speculative locks
2076  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2077  seq = __kmp_user_lock_seq;
2078 #endif
2079  switch (seq) {
2080  case lockseq_tas:
2081  seq = lockseq_nested_tas;
2082  break;
2083 #if KMP_USE_FUTEX
2084  case lockseq_futex:
2085  seq = lockseq_nested_futex;
2086  break;
2087 #endif
2088  case lockseq_ticket:
2089  seq = lockseq_nested_ticket;
2090  break;
2091  case lockseq_queuing:
2092  seq = lockseq_nested_queuing;
2093  break;
2094  case lockseq_drdpa:
2095  seq = lockseq_nested_drdpa;
2096  break;
2097  default:
2098  seq = lockseq_nested_queuing;
2099  }
2100  KMP_INIT_I_LOCK(lock, seq);
2101 #if USE_ITT_BUILD
2102  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2103  __kmp_itt_lock_creating(ilk->lock, loc);
2104 #endif
2105 }
2106 
2107 /* initialize the lock with a hint */
2108 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2109  uintptr_t hint) {
2110  KMP_DEBUG_ASSERT(__kmp_init_serial);
2111  if (__kmp_env_consistency_check && user_lock == NULL) {
2112  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2113  }
2114 
2115  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2116 
2117 #if OMPT_SUPPORT && OMPT_OPTIONAL
2118  // This is the case, if called from omp_init_lock_with_hint:
2119  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2120  if (!codeptr)
2121  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2122  if (ompt_enabled.ompt_callback_lock_init) {
2123  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2124  ompt_mutex_lock, (omp_lock_hint_t)hint,
2125  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2126  codeptr);
2127  }
2128 #endif
2129 }
2130 
2131 /* initialize the lock with a hint */
2132 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2133  void **user_lock, uintptr_t hint) {
2134  KMP_DEBUG_ASSERT(__kmp_init_serial);
2135  if (__kmp_env_consistency_check && user_lock == NULL) {
2136  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2137  }
2138 
2139  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2140 
2141 #if OMPT_SUPPORT && OMPT_OPTIONAL
2142  // This is the case, if called from omp_init_lock_with_hint:
2143  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2144  if (!codeptr)
2145  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2146  if (ompt_enabled.ompt_callback_lock_init) {
2147  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2148  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2149  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2150  codeptr);
2151  }
2152 #endif
2153 }
2154 
2155 #endif // KMP_USE_DYNAMIC_LOCK
2156 
2157 /* initialize the lock */
2158 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2159 #if KMP_USE_DYNAMIC_LOCK
2160 
2161  KMP_DEBUG_ASSERT(__kmp_init_serial);
2162  if (__kmp_env_consistency_check && user_lock == NULL) {
2163  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2164  }
2165  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2166 
2167 #if OMPT_SUPPORT && OMPT_OPTIONAL
2168  // This is the case, if called from omp_init_lock_with_hint:
2169  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2170  if (!codeptr)
2171  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2172  if (ompt_enabled.ompt_callback_lock_init) {
2173  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2174  ompt_mutex_lock, omp_lock_hint_none,
2175  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2176  codeptr);
2177  }
2178 #endif
2179 
2180 #else // KMP_USE_DYNAMIC_LOCK
2181 
2182  static char const *const func = "omp_init_lock";
2183  kmp_user_lock_p lck;
2184  KMP_DEBUG_ASSERT(__kmp_init_serial);
2185 
2186  if (__kmp_env_consistency_check) {
2187  if (user_lock == NULL) {
2188  KMP_FATAL(LockIsUninitialized, func);
2189  }
2190  }
2191 
2192  KMP_CHECK_USER_LOCK_INIT();
2193 
2194  if ((__kmp_user_lock_kind == lk_tas) &&
2195  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2196  lck = (kmp_user_lock_p)user_lock;
2197  }
2198 #if KMP_USE_FUTEX
2199  else if ((__kmp_user_lock_kind == lk_futex) &&
2200  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2201  lck = (kmp_user_lock_p)user_lock;
2202  }
2203 #endif
2204  else {
2205  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2206  }
2207  INIT_LOCK(lck);
2208  __kmp_set_user_lock_location(lck, loc);
2209 
2210 #if OMPT_SUPPORT && OMPT_OPTIONAL
2211  // This is the case, if called from omp_init_lock_with_hint:
2212  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2213  if (!codeptr)
2214  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2215  if (ompt_enabled.ompt_callback_lock_init) {
2216  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2217  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2218  (omp_wait_id_t)user_lock, codeptr);
2219  }
2220 #endif
2221 
2222 #if USE_ITT_BUILD
2223  __kmp_itt_lock_creating(lck);
2224 #endif /* USE_ITT_BUILD */
2225 
2226 #endif // KMP_USE_DYNAMIC_LOCK
2227 } // __kmpc_init_lock
2228 
2229 /* initialize the lock */
2230 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2231 #if KMP_USE_DYNAMIC_LOCK
2232 
2233  KMP_DEBUG_ASSERT(__kmp_init_serial);
2234  if (__kmp_env_consistency_check && user_lock == NULL) {
2235  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2236  }
2237  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2238 
2239 #if OMPT_SUPPORT && OMPT_OPTIONAL
2240  // This is the case, if called from omp_init_lock_with_hint:
2241  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2242  if (!codeptr)
2243  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2244  if (ompt_enabled.ompt_callback_lock_init) {
2245  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2246  ompt_mutex_nest_lock, omp_lock_hint_none,
2247  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2248  codeptr);
2249  }
2250 #endif
2251 
2252 #else // KMP_USE_DYNAMIC_LOCK
2253 
2254  static char const *const func = "omp_init_nest_lock";
2255  kmp_user_lock_p lck;
2256  KMP_DEBUG_ASSERT(__kmp_init_serial);
2257 
2258  if (__kmp_env_consistency_check) {
2259  if (user_lock == NULL) {
2260  KMP_FATAL(LockIsUninitialized, func);
2261  }
2262  }
2263 
2264  KMP_CHECK_USER_LOCK_INIT();
2265 
2266  if ((__kmp_user_lock_kind == lk_tas) &&
2267  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2268  OMP_NEST_LOCK_T_SIZE)) {
2269  lck = (kmp_user_lock_p)user_lock;
2270  }
2271 #if KMP_USE_FUTEX
2272  else if ((__kmp_user_lock_kind == lk_futex) &&
2273  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2274  OMP_NEST_LOCK_T_SIZE)) {
2275  lck = (kmp_user_lock_p)user_lock;
2276  }
2277 #endif
2278  else {
2279  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2280  }
2281 
2282  INIT_NESTED_LOCK(lck);
2283  __kmp_set_user_lock_location(lck, loc);
2284 
2285 #if OMPT_SUPPORT && OMPT_OPTIONAL
2286  // This is the case, if called from omp_init_lock_with_hint:
2287  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2288  if (!codeptr)
2289  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2290  if (ompt_enabled.ompt_callback_lock_init) {
2291  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2292  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2293  (omp_wait_id_t)user_lock, codeptr);
2294  }
2295 #endif
2296 
2297 #if USE_ITT_BUILD
2298  __kmp_itt_lock_creating(lck);
2299 #endif /* USE_ITT_BUILD */
2300 
2301 #endif // KMP_USE_DYNAMIC_LOCK
2302 } // __kmpc_init_nest_lock
2303 
2304 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2305 #if KMP_USE_DYNAMIC_LOCK
2306 
2307 #if USE_ITT_BUILD
2308  kmp_user_lock_p lck;
2309  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2310  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2311  } else {
2312  lck = (kmp_user_lock_p)user_lock;
2313  }
2314  __kmp_itt_lock_destroyed(lck);
2315 #endif
2316 #if OMPT_SUPPORT && OMPT_OPTIONAL
2317  // This is the case, if called from omp_init_lock_with_hint:
2318  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2319  if (!codeptr)
2320  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2321  if (ompt_enabled.ompt_callback_lock_destroy) {
2322  kmp_user_lock_p lck;
2323  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2324  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2325  } else {
2326  lck = (kmp_user_lock_p)user_lock;
2327  }
2328  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2329  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2330  }
2331 #endif
2332  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2333 #else
2334  kmp_user_lock_p lck;
2335 
2336  if ((__kmp_user_lock_kind == lk_tas) &&
2337  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2338  lck = (kmp_user_lock_p)user_lock;
2339  }
2340 #if KMP_USE_FUTEX
2341  else if ((__kmp_user_lock_kind == lk_futex) &&
2342  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2343  lck = (kmp_user_lock_p)user_lock;
2344  }
2345 #endif
2346  else {
2347  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2348  }
2349 
2350 #if OMPT_SUPPORT && OMPT_OPTIONAL
2351  // This is the case, if called from omp_init_lock_with_hint:
2352  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2353  if (!codeptr)
2354  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2355  if (ompt_enabled.ompt_callback_lock_destroy) {
2356  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2357  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2358  }
2359 #endif
2360 
2361 #if USE_ITT_BUILD
2362  __kmp_itt_lock_destroyed(lck);
2363 #endif /* USE_ITT_BUILD */
2364  DESTROY_LOCK(lck);
2365 
2366  if ((__kmp_user_lock_kind == lk_tas) &&
2367  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2368  ;
2369  }
2370 #if KMP_USE_FUTEX
2371  else if ((__kmp_user_lock_kind == lk_futex) &&
2372  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2373  ;
2374  }
2375 #endif
2376  else {
2377  __kmp_user_lock_free(user_lock, gtid, lck);
2378  }
2379 #endif // KMP_USE_DYNAMIC_LOCK
2380 } // __kmpc_destroy_lock
2381 
2382 /* destroy the lock */
2383 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2384 #if KMP_USE_DYNAMIC_LOCK
2385 
2386 #if USE_ITT_BUILD
2387  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2388  __kmp_itt_lock_destroyed(ilk->lock);
2389 #endif
2390 #if OMPT_SUPPORT && OMPT_OPTIONAL
2391  // This is the case, if called from omp_init_lock_with_hint:
2392  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2393  if (!codeptr)
2394  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2395  if (ompt_enabled.ompt_callback_lock_destroy) {
2396  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2397  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2398  }
2399 #endif
2400  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2401 
2402 #else // KMP_USE_DYNAMIC_LOCK
2403 
2404  kmp_user_lock_p lck;
2405 
2406  if ((__kmp_user_lock_kind == lk_tas) &&
2407  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2408  OMP_NEST_LOCK_T_SIZE)) {
2409  lck = (kmp_user_lock_p)user_lock;
2410  }
2411 #if KMP_USE_FUTEX
2412  else if ((__kmp_user_lock_kind == lk_futex) &&
2413  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2414  OMP_NEST_LOCK_T_SIZE)) {
2415  lck = (kmp_user_lock_p)user_lock;
2416  }
2417 #endif
2418  else {
2419  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2420  }
2421 
2422 #if OMPT_SUPPORT && OMPT_OPTIONAL
2423  // This is the case, if called from omp_init_lock_with_hint:
2424  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2425  if (!codeptr)
2426  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2427  if (ompt_enabled.ompt_callback_lock_destroy) {
2428  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2429  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2430  }
2431 #endif
2432 
2433 #if USE_ITT_BUILD
2434  __kmp_itt_lock_destroyed(lck);
2435 #endif /* USE_ITT_BUILD */
2436 
2437  DESTROY_NESTED_LOCK(lck);
2438 
2439  if ((__kmp_user_lock_kind == lk_tas) &&
2440  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2441  OMP_NEST_LOCK_T_SIZE)) {
2442  ;
2443  }
2444 #if KMP_USE_FUTEX
2445  else if ((__kmp_user_lock_kind == lk_futex) &&
2446  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2447  OMP_NEST_LOCK_T_SIZE)) {
2448  ;
2449  }
2450 #endif
2451  else {
2452  __kmp_user_lock_free(user_lock, gtid, lck);
2453  }
2454 #endif // KMP_USE_DYNAMIC_LOCK
2455 } // __kmpc_destroy_nest_lock
2456 
2457 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2458  KMP_COUNT_BLOCK(OMP_set_lock);
2459 #if KMP_USE_DYNAMIC_LOCK
2460  int tag = KMP_EXTRACT_D_TAG(user_lock);
2461 #if USE_ITT_BUILD
2462  __kmp_itt_lock_acquiring(
2463  (kmp_user_lock_p)
2464  user_lock); // itt function will get to the right lock object.
2465 #endif
2466 #if OMPT_SUPPORT && OMPT_OPTIONAL
2467  // This is the case, if called from omp_init_lock_with_hint:
2468  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2469  if (!codeptr)
2470  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2471  if (ompt_enabled.ompt_callback_mutex_acquire) {
2472  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2473  ompt_mutex_lock, omp_lock_hint_none,
2474  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2475  codeptr);
2476  }
2477 #endif
2478 #if KMP_USE_INLINED_TAS
2479  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2480  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2481  } else
2482 #elif KMP_USE_INLINED_FUTEX
2483  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2484  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2485  } else
2486 #endif
2487  {
2488  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2489  }
2490 #if USE_ITT_BUILD
2491  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2492 #endif
2493 #if OMPT_SUPPORT && OMPT_OPTIONAL
2494  if (ompt_enabled.ompt_callback_mutex_acquired) {
2495  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2496  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2497  }
2498 #endif
2499 
2500 #else // KMP_USE_DYNAMIC_LOCK
2501 
2502  kmp_user_lock_p lck;
2503 
2504  if ((__kmp_user_lock_kind == lk_tas) &&
2505  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2506  lck = (kmp_user_lock_p)user_lock;
2507  }
2508 #if KMP_USE_FUTEX
2509  else if ((__kmp_user_lock_kind == lk_futex) &&
2510  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2511  lck = (kmp_user_lock_p)user_lock;
2512  }
2513 #endif
2514  else {
2515  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2516  }
2517 
2518 #if USE_ITT_BUILD
2519  __kmp_itt_lock_acquiring(lck);
2520 #endif /* USE_ITT_BUILD */
2521 #if OMPT_SUPPORT && OMPT_OPTIONAL
2522  // This is the case, if called from omp_init_lock_with_hint:
2523  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2524  if (!codeptr)
2525  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2526  if (ompt_enabled.ompt_callback_mutex_acquire) {
2527  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2528  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2529  (omp_wait_id_t)lck, codeptr);
2530  }
2531 #endif
2532 
2533  ACQUIRE_LOCK(lck, gtid);
2534 
2535 #if USE_ITT_BUILD
2536  __kmp_itt_lock_acquired(lck);
2537 #endif /* USE_ITT_BUILD */
2538 
2539 #if OMPT_SUPPORT && OMPT_OPTIONAL
2540  if (ompt_enabled.ompt_callback_mutex_acquired) {
2541  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2542  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2543  }
2544 #endif
2545 
2546 #endif // KMP_USE_DYNAMIC_LOCK
2547 }
2548 
2549 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2550 #if KMP_USE_DYNAMIC_LOCK
2551 
2552 #if USE_ITT_BUILD
2553  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2554 #endif
2555 #if OMPT_SUPPORT && OMPT_OPTIONAL
2556  // This is the case, if called from omp_init_lock_with_hint:
2557  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2558  if (!codeptr)
2559  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2560  if (ompt_enabled.enabled) {
2561  if (ompt_enabled.ompt_callback_mutex_acquire) {
2562  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2563  ompt_mutex_nest_lock, omp_lock_hint_none,
2564  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2565  codeptr);
2566  }
2567  }
2568 #endif
2569  int acquire_status =
2570  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2571 #if USE_ITT_BUILD
2572  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2573 #endif
2574 
2575 #if OMPT_SUPPORT && OMPT_OPTIONAL
2576  if (ompt_enabled.enabled) {
2577  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2578  if (ompt_enabled.ompt_callback_mutex_acquired) {
2579  // lock_first
2580  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2581  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2582  }
2583  } else {
2584  if (ompt_enabled.ompt_callback_nest_lock) {
2585  // lock_next
2586  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2587  ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
2588  }
2589  }
2590  }
2591 #endif
2592 
2593 #else // KMP_USE_DYNAMIC_LOCK
2594  int acquire_status;
2595  kmp_user_lock_p lck;
2596 
2597  if ((__kmp_user_lock_kind == lk_tas) &&
2598  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2599  OMP_NEST_LOCK_T_SIZE)) {
2600  lck = (kmp_user_lock_p)user_lock;
2601  }
2602 #if KMP_USE_FUTEX
2603  else if ((__kmp_user_lock_kind == lk_futex) &&
2604  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2605  OMP_NEST_LOCK_T_SIZE)) {
2606  lck = (kmp_user_lock_p)user_lock;
2607  }
2608 #endif
2609  else {
2610  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2611  }
2612 
2613 #if USE_ITT_BUILD
2614  __kmp_itt_lock_acquiring(lck);
2615 #endif /* USE_ITT_BUILD */
2616 #if OMPT_SUPPORT && OMPT_OPTIONAL
2617  // This is the case, if called from omp_init_lock_with_hint:
2618  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2619  if (!codeptr)
2620  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2621  if (ompt_enabled.enabled) {
2622  if (ompt_enabled.ompt_callback_mutex_acquire) {
2623  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2624  ompt_mutex_nest_lock, omp_lock_hint_none,
2625  __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
2626  }
2627  }
2628 #endif
2629 
2630  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2631 
2632 #if USE_ITT_BUILD
2633  __kmp_itt_lock_acquired(lck);
2634 #endif /* USE_ITT_BUILD */
2635 
2636 #if OMPT_SUPPORT && OMPT_OPTIONAL
2637  if (ompt_enabled.enabled) {
2638  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2639  if (ompt_enabled.ompt_callback_mutex_acquired) {
2640  // lock_first
2641  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2642  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2643  }
2644  } else {
2645  if (ompt_enabled.ompt_callback_nest_lock) {
2646  // lock_next
2647  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2648  ompt_scope_begin, (omp_wait_id_t)lck, codeptr);
2649  }
2650  }
2651  }
2652 #endif
2653 
2654 #endif // KMP_USE_DYNAMIC_LOCK
2655 }
2656 
2657 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2658 #if KMP_USE_DYNAMIC_LOCK
2659 
2660  int tag = KMP_EXTRACT_D_TAG(user_lock);
2661 #if USE_ITT_BUILD
2662  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2663 #endif
2664 #if KMP_USE_INLINED_TAS
2665  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2666  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2667  } else
2668 #elif KMP_USE_INLINED_FUTEX
2669  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2670  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2671  } else
2672 #endif
2673  {
2674  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2675  }
2676 
2677 #if OMPT_SUPPORT && OMPT_OPTIONAL
2678  // This is the case, if called from omp_init_lock_with_hint:
2679  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2680  if (!codeptr)
2681  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2682  if (ompt_enabled.ompt_callback_mutex_released) {
2683  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2684  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2685  }
2686 #endif
2687 
2688 #else // KMP_USE_DYNAMIC_LOCK
2689 
2690  kmp_user_lock_p lck;
2691 
2692  /* Can't use serial interval since not block structured */
2693  /* release the lock */
2694 
2695  if ((__kmp_user_lock_kind == lk_tas) &&
2696  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2697 #if KMP_OS_LINUX && \
2698  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2699 // "fast" path implemented to fix customer performance issue
2700 #if USE_ITT_BUILD
2701  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2702 #endif /* USE_ITT_BUILD */
2703  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2704  KMP_MB();
2705 
2706 #if OMPT_SUPPORT && OMPT_OPTIONAL
2707  // This is the case, if called from omp_init_lock_with_hint:
2708  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2709  if (!codeptr)
2710  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2711  if (ompt_enabled.ompt_callback_mutex_released) {
2712  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2713  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2714  }
2715 #endif
2716 
2717  return;
2718 #else
2719  lck = (kmp_user_lock_p)user_lock;
2720 #endif
2721  }
2722 #if KMP_USE_FUTEX
2723  else if ((__kmp_user_lock_kind == lk_futex) &&
2724  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2725  lck = (kmp_user_lock_p)user_lock;
2726  }
2727 #endif
2728  else {
2729  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2730  }
2731 
2732 #if USE_ITT_BUILD
2733  __kmp_itt_lock_releasing(lck);
2734 #endif /* USE_ITT_BUILD */
2735 
2736  RELEASE_LOCK(lck, gtid);
2737 
2738 #if OMPT_SUPPORT && OMPT_OPTIONAL
2739  // This is the case, if called from omp_init_lock_with_hint:
2740  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2741  if (!codeptr)
2742  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2743  if (ompt_enabled.ompt_callback_mutex_released) {
2744  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2745  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2746  }
2747 #endif
2748 
2749 #endif // KMP_USE_DYNAMIC_LOCK
2750 }
2751 
2752 /* release the lock */
2753 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2754 #if KMP_USE_DYNAMIC_LOCK
2755 
2756 #if USE_ITT_BUILD
2757  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2758 #endif
2759  int release_status =
2760  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2761 
2762 #if OMPT_SUPPORT && OMPT_OPTIONAL
2763  // This is the case, if called from omp_init_lock_with_hint:
2764  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2765  if (!codeptr)
2766  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2767  if (ompt_enabled.enabled) {
2768  if (release_status == KMP_LOCK_RELEASED) {
2769  if (ompt_enabled.ompt_callback_mutex_released) {
2770  // release_lock_last
2771  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2772  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2773  }
2774  } else if (ompt_enabled.ompt_callback_nest_lock) {
2775  // release_lock_prev
2776  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2777  ompt_scope_end, (omp_wait_id_t)user_lock, codeptr);
2778  }
2779  }
2780 #endif
2781 
2782 #else // KMP_USE_DYNAMIC_LOCK
2783 
2784  kmp_user_lock_p lck;
2785 
2786  /* Can't use serial interval since not block structured */
2787 
2788  if ((__kmp_user_lock_kind == lk_tas) &&
2789  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2790  OMP_NEST_LOCK_T_SIZE)) {
2791 #if KMP_OS_LINUX && \
2792  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2793  // "fast" path implemented to fix customer performance issue
2794  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2795 #if USE_ITT_BUILD
2796  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2797 #endif /* USE_ITT_BUILD */
2798 
2799 #if OMPT_SUPPORT && OMPT_OPTIONAL
2800  int release_status = KMP_LOCK_STILL_HELD;
2801 #endif
2802 
2803  if (--(tl->lk.depth_locked) == 0) {
2804  TCW_4(tl->lk.poll, 0);
2805 #if OMPT_SUPPORT && OMPT_OPTIONAL
2806  release_status = KMP_LOCK_RELEASED;
2807 #endif
2808  }
2809  KMP_MB();
2810 
2811 #if OMPT_SUPPORT && OMPT_OPTIONAL
2812  // This is the case, if called from omp_init_lock_with_hint:
2813  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2814  if (!codeptr)
2815  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2816  if (ompt_enabled.enabled) {
2817  if (release_status == KMP_LOCK_RELEASED) {
2818  if (ompt_enabled.ompt_callback_mutex_released) {
2819  // release_lock_last
2820  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2821  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2822  }
2823  } else if (ompt_enabled.ompt_callback_nest_lock) {
2824  // release_lock_previous
2825  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2826  ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
2827  }
2828  }
2829 #endif
2830 
2831  return;
2832 #else
2833  lck = (kmp_user_lock_p)user_lock;
2834 #endif
2835  }
2836 #if KMP_USE_FUTEX
2837  else if ((__kmp_user_lock_kind == lk_futex) &&
2838  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2839  OMP_NEST_LOCK_T_SIZE)) {
2840  lck = (kmp_user_lock_p)user_lock;
2841  }
2842 #endif
2843  else {
2844  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2845  }
2846 
2847 #if USE_ITT_BUILD
2848  __kmp_itt_lock_releasing(lck);
2849 #endif /* USE_ITT_BUILD */
2850 
2851  int release_status;
2852  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2853 #if OMPT_SUPPORT && OMPT_OPTIONAL
2854  // This is the case, if called from omp_init_lock_with_hint:
2855  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2856  if (!codeptr)
2857  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2858  if (ompt_enabled.enabled) {
2859  if (release_status == KMP_LOCK_RELEASED) {
2860  if (ompt_enabled.ompt_callback_mutex_released) {
2861  // release_lock_last
2862  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2863  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2864  }
2865  } else if (ompt_enabled.ompt_callback_nest_lock) {
2866  // release_lock_previous
2867  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2868  ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
2869  }
2870  }
2871 #endif
2872 
2873 #endif // KMP_USE_DYNAMIC_LOCK
2874 }
2875 
2876 /* try to acquire the lock */
2877 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2878  KMP_COUNT_BLOCK(OMP_test_lock);
2879 
2880 #if KMP_USE_DYNAMIC_LOCK
2881  int rc;
2882  int tag = KMP_EXTRACT_D_TAG(user_lock);
2883 #if USE_ITT_BUILD
2884  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2885 #endif
2886 #if OMPT_SUPPORT && OMPT_OPTIONAL
2887  // This is the case, if called from omp_init_lock_with_hint:
2888  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2889  if (!codeptr)
2890  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2891  if (ompt_enabled.ompt_callback_mutex_acquire) {
2892  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2893  ompt_mutex_lock, omp_lock_hint_none,
2894  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2895  codeptr);
2896  }
2897 #endif
2898 #if KMP_USE_INLINED_TAS
2899  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2900  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2901  } else
2902 #elif KMP_USE_INLINED_FUTEX
2903  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2904  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2905  } else
2906 #endif
2907  {
2908  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2909  }
2910  if (rc) {
2911 #if USE_ITT_BUILD
2912  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2913 #endif
2914 #if OMPT_SUPPORT && OMPT_OPTIONAL
2915  if (ompt_enabled.ompt_callback_mutex_acquired) {
2916  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2917  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2918  }
2919 #endif
2920  return FTN_TRUE;
2921  } else {
2922 #if USE_ITT_BUILD
2923  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2924 #endif
2925  return FTN_FALSE;
2926  }
2927 
2928 #else // KMP_USE_DYNAMIC_LOCK
2929 
2930  kmp_user_lock_p lck;
2931  int rc;
2932 
2933  if ((__kmp_user_lock_kind == lk_tas) &&
2934  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2935  lck = (kmp_user_lock_p)user_lock;
2936  }
2937 #if KMP_USE_FUTEX
2938  else if ((__kmp_user_lock_kind == lk_futex) &&
2939  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2940  lck = (kmp_user_lock_p)user_lock;
2941  }
2942 #endif
2943  else {
2944  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
2945  }
2946 
2947 #if USE_ITT_BUILD
2948  __kmp_itt_lock_acquiring(lck);
2949 #endif /* USE_ITT_BUILD */
2950 #if OMPT_SUPPORT && OMPT_OPTIONAL
2951  // This is the case, if called from omp_init_lock_with_hint:
2952  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2953  if (!codeptr)
2954  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2955  if (ompt_enabled.ompt_callback_mutex_acquire) {
2956  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2957  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2958  (omp_wait_id_t)lck, codeptr);
2959  }
2960 #endif
2961 
2962  rc = TEST_LOCK(lck, gtid);
2963 #if USE_ITT_BUILD
2964  if (rc) {
2965  __kmp_itt_lock_acquired(lck);
2966  } else {
2967  __kmp_itt_lock_cancelled(lck);
2968  }
2969 #endif /* USE_ITT_BUILD */
2970 #if OMPT_SUPPORT && OMPT_OPTIONAL
2971  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
2972  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2973  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2974  }
2975 #endif
2976 
2977  return (rc ? FTN_TRUE : FTN_FALSE);
2978 
2979 /* Can't use serial interval since not block structured */
2980 
2981 #endif // KMP_USE_DYNAMIC_LOCK
2982 }
2983 
2984 /* try to acquire the lock */
2985 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2986 #if KMP_USE_DYNAMIC_LOCK
2987  int rc;
2988 #if USE_ITT_BUILD
2989  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2990 #endif
2991 #if OMPT_SUPPORT && OMPT_OPTIONAL
2992  // This is the case, if called from omp_init_lock_with_hint:
2993  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2994  if (!codeptr)
2995  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2996  if (ompt_enabled.ompt_callback_mutex_acquire) {
2997  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2998  ompt_mutex_nest_lock, omp_lock_hint_none,
2999  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
3000  codeptr);
3001  }
3002 #endif
3003  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3004 #if USE_ITT_BUILD
3005  if (rc) {
3006  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3007  } else {
3008  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3009  }
3010 #endif
3011 #if OMPT_SUPPORT && OMPT_OPTIONAL
3012  if (ompt_enabled.enabled && rc) {
3013  if (rc == 1) {
3014  if (ompt_enabled.ompt_callback_mutex_acquired) {
3015  // lock_first
3016  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3017  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
3018  }
3019  } else {
3020  if (ompt_enabled.ompt_callback_nest_lock) {
3021  // lock_next
3022  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3023  ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
3024  }
3025  }
3026  }
3027 #endif
3028  return rc;
3029 
3030 #else // KMP_USE_DYNAMIC_LOCK
3031 
3032  kmp_user_lock_p lck;
3033  int rc;
3034 
3035  if ((__kmp_user_lock_kind == lk_tas) &&
3036  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3037  OMP_NEST_LOCK_T_SIZE)) {
3038  lck = (kmp_user_lock_p)user_lock;
3039  }
3040 #if KMP_USE_FUTEX
3041  else if ((__kmp_user_lock_kind == lk_futex) &&
3042  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3043  OMP_NEST_LOCK_T_SIZE)) {
3044  lck = (kmp_user_lock_p)user_lock;
3045  }
3046 #endif
3047  else {
3048  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3049  }
3050 
3051 #if USE_ITT_BUILD
3052  __kmp_itt_lock_acquiring(lck);
3053 #endif /* USE_ITT_BUILD */
3054 
3055 #if OMPT_SUPPORT && OMPT_OPTIONAL
3056  // This is the case, if called from omp_init_lock_with_hint:
3057  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3058  if (!codeptr)
3059  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3060  if (ompt_enabled.enabled) &&
3061  ompt_enabled.ompt_callback_mutex_acquire) {
3062  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3063  ompt_mutex_nest_lock, omp_lock_hint_none,
3064  __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
3065  }
3066 #endif
3067 
3068  rc = TEST_NESTED_LOCK(lck, gtid);
3069 #if USE_ITT_BUILD
3070  if (rc) {
3071  __kmp_itt_lock_acquired(lck);
3072  } else {
3073  __kmp_itt_lock_cancelled(lck);
3074  }
3075 #endif /* USE_ITT_BUILD */
3076 #if OMPT_SUPPORT && OMPT_OPTIONAL
3077  if (ompt_enabled.enabled && rc) {
3078  if (rc == 1) {
3079  if (ompt_enabled.ompt_callback_mutex_acquired) {
3080  // lock_first
3081  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3082  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
3083  }
3084  } else {
3085  if (ompt_enabled.ompt_callback_nest_lock) {
3086  // lock_next
3087  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3088  ompt_mutex_scope_begin, (omp_wait_id_t)lck, codeptr);
3089  }
3090  }
3091  }
3092 #endif
3093  return rc;
3094 
3095 /* Can't use serial interval since not block structured */
3096 
3097 #endif // KMP_USE_DYNAMIC_LOCK
3098 }
3099 
3100 // Interface to fast scalable reduce methods routines
3101 
3102 // keep the selected method in a thread local structure for cross-function
3103 // usage: will be used in __kmpc_end_reduce* functions;
3104 // another solution: to re-determine the method one more time in
3105 // __kmpc_end_reduce* functions (new prototype required then)
3106 // AT: which solution is better?
3107 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3108  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3109 
3110 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3111  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3112 
3113 // description of the packed_reduction_method variable: look at the macros in
3114 // kmp.h
3115 
3116 // used in a critical section reduce block
3117 static __forceinline void
3118 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3119  kmp_critical_name *crit) {
3120 
3121  // this lock was visible to a customer and to the threading profile tool as a
3122  // serial overhead span (although it's used for an internal purpose only)
3123  // why was it visible in previous implementation?
3124  // should we keep it visible in new reduce block?
3125  kmp_user_lock_p lck;
3126 
3127 #if KMP_USE_DYNAMIC_LOCK
3128 
3129  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3130  // Check if it is initialized.
3131  if (*lk == 0) {
3132  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3133  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3134  KMP_GET_D_TAG(__kmp_user_lock_seq));
3135  } else {
3136  __kmp_init_indirect_csptr(crit, loc, global_tid,
3137  KMP_GET_I_TAG(__kmp_user_lock_seq));
3138  }
3139  }
3140  // Branch for accessing the actual lock object and set operation. This
3141  // branching is inevitable since this lock initialization does not follow the
3142  // normal dispatch path (lock table is not used).
3143  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3144  lck = (kmp_user_lock_p)lk;
3145  KMP_DEBUG_ASSERT(lck != NULL);
3146  if (__kmp_env_consistency_check) {
3147  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3148  }
3149  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3150  } else {
3151  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3152  lck = ilk->lock;
3153  KMP_DEBUG_ASSERT(lck != NULL);
3154  if (__kmp_env_consistency_check) {
3155  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3156  }
3157  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3158  }
3159 
3160 #else // KMP_USE_DYNAMIC_LOCK
3161 
3162  // We know that the fast reduction code is only emitted by Intel compilers
3163  // with 32 byte critical sections. If there isn't enough space, then we
3164  // have to use a pointer.
3165  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3166  lck = (kmp_user_lock_p)crit;
3167  } else {
3168  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3169  }
3170  KMP_DEBUG_ASSERT(lck != NULL);
3171 
3172  if (__kmp_env_consistency_check)
3173  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3174 
3175  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3176 
3177 #endif // KMP_USE_DYNAMIC_LOCK
3178 }
3179 
3180 // used in a critical section reduce block
3181 static __forceinline void
3182 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3183  kmp_critical_name *crit) {
3184 
3185  kmp_user_lock_p lck;
3186 
3187 #if KMP_USE_DYNAMIC_LOCK
3188 
3189  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3190  lck = (kmp_user_lock_p)crit;
3191  if (__kmp_env_consistency_check)
3192  __kmp_pop_sync(global_tid, ct_critical, loc);
3193  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3194  } else {
3195  kmp_indirect_lock_t *ilk =
3196  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3197  if (__kmp_env_consistency_check)
3198  __kmp_pop_sync(global_tid, ct_critical, loc);
3199  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3200  }
3201 
3202 #else // KMP_USE_DYNAMIC_LOCK
3203 
3204  // We know that the fast reduction code is only emitted by Intel compilers
3205  // with 32 byte critical sections. If there isn't enough space, then we have
3206  // to use a pointer.
3207  if (__kmp_base_user_lock_size > 32) {
3208  lck = *((kmp_user_lock_p *)crit);
3209  KMP_ASSERT(lck != NULL);
3210  } else {
3211  lck = (kmp_user_lock_p)crit;
3212  }
3213 
3214  if (__kmp_env_consistency_check)
3215  __kmp_pop_sync(global_tid, ct_critical, loc);
3216 
3217  __kmp_release_user_lock_with_checks(lck, global_tid);
3218 
3219 #endif // KMP_USE_DYNAMIC_LOCK
3220 } // __kmp_end_critical_section_reduce_block
3221 
3222 #if OMP_40_ENABLED
3223 static __forceinline int
3224 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3225  int *task_state) {
3226  kmp_team_t *team;
3227 
3228  // Check if we are inside the teams construct?
3229  if (th->th.th_teams_microtask) {
3230  *team_p = team = th->th.th_team;
3231  if (team->t.t_level == th->th.th_teams_level) {
3232  // This is reduction at teams construct.
3233  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3234  // Let's swap teams temporarily for the reduction.
3235  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3236  th->th.th_team = team->t.t_parent;
3237  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3238  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3239  *task_state = th->th.th_task_state;
3240  th->th.th_task_state = 0;
3241 
3242  return 1;
3243  }
3244  }
3245  return 0;
3246 }
3247 
3248 static __forceinline void
3249 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3250  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3251  th->th.th_info.ds.ds_tid = 0;
3252  th->th.th_team = team;
3253  th->th.th_team_nproc = team->t.t_nproc;
3254  th->th.th_task_team = team->t.t_task_team[task_state];
3255  th->th.th_task_state = task_state;
3256 }
3257 #endif
3258 
3259 /* 2.a.i. Reduce Block without a terminating barrier */
3275 kmp_int32
3276 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3277  size_t reduce_size, void *reduce_data,
3278  void (*reduce_func)(void *lhs_data, void *rhs_data),
3279  kmp_critical_name *lck) {
3280 
3281  KMP_COUNT_BLOCK(REDUCE_nowait);
3282  int retval = 0;
3283  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3284 #if OMP_40_ENABLED
3285  kmp_info_t *th;
3286  kmp_team_t *team;
3287  int teams_swapped = 0, task_state;
3288 #endif
3289  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3290 
3291  // why do we need this initialization here at all?
3292  // Reduction clause can not be used as a stand-alone directive.
3293 
3294  // do not call __kmp_serial_initialize(), it will be called by
3295  // __kmp_parallel_initialize() if needed
3296  // possible detection of false-positive race by the threadchecker ???
3297  if (!TCR_4(__kmp_init_parallel))
3298  __kmp_parallel_initialize();
3299 
3300 // check correctness of reduce block nesting
3301 #if KMP_USE_DYNAMIC_LOCK
3302  if (__kmp_env_consistency_check)
3303  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3304 #else
3305  if (__kmp_env_consistency_check)
3306  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3307 #endif
3308 
3309 #if OMP_40_ENABLED
3310  th = __kmp_thread_from_gtid(global_tid);
3311  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3312 #endif // OMP_40_ENABLED
3313 
3314  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3315  // the value should be kept in a variable
3316  // the variable should be either a construct-specific or thread-specific
3317  // property, not a team specific property
3318  // (a thread can reach the next reduce block on the next construct, reduce
3319  // method may differ on the next construct)
3320  // an ident_t "loc" parameter could be used as a construct-specific property
3321  // (what if loc == 0?)
3322  // (if both construct-specific and team-specific variables were shared,
3323  // then unness extra syncs should be needed)
3324  // a thread-specific variable is better regarding two issues above (next
3325  // construct and extra syncs)
3326  // a thread-specific "th_local.reduction_method" variable is used currently
3327  // each thread executes 'determine' and 'set' lines (no need to execute by one
3328  // thread, to avoid unness extra syncs)
3329 
3330  packed_reduction_method = __kmp_determine_reduction_method(
3331  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3332  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3333 
3334  if (packed_reduction_method == critical_reduce_block) {
3335 
3336  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3337  retval = 1;
3338 
3339  } else if (packed_reduction_method == empty_reduce_block) {
3340 
3341  // usage: if team size == 1, no synchronization is required ( Intel
3342  // platforms only )
3343  retval = 1;
3344 
3345  } else if (packed_reduction_method == atomic_reduce_block) {
3346 
3347  retval = 2;
3348 
3349  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3350  // won't be called by the code gen)
3351  // (it's not quite good, because the checking block has been closed by
3352  // this 'pop',
3353  // but atomic operation has not been executed yet, will be executed
3354  // slightly later, literally on next instruction)
3355  if (__kmp_env_consistency_check)
3356  __kmp_pop_sync(global_tid, ct_reduce, loc);
3357 
3358  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3359  tree_reduce_block)) {
3360 
3361 // AT: performance issue: a real barrier here
3362 // AT: (if master goes slow, other threads are blocked here waiting for the
3363 // master to come and release them)
3364 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3365 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3366 // be confusing to a customer)
3367 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3368 // might go faster and be more in line with sense of NOWAIT
3369 // AT: TO DO: do epcc test and compare times
3370 
3371 // this barrier should be invisible to a customer and to the threading profile
3372 // tool (it's neither a terminating barrier nor customer's code, it's
3373 // used for an internal purpose)
3374 #if OMPT_SUPPORT
3375  // JP: can this barrier potentially leed to task scheduling?
3376  // JP: as long as there is a barrier in the implementation, OMPT should and
3377  // will provide the barrier events
3378  // so we set-up the necessary frame/return addresses.
3379  omp_frame_t *ompt_frame;
3380  if (ompt_enabled.enabled) {
3381  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3382  if (ompt_frame->enter_frame == NULL)
3383  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3384  OMPT_STORE_RETURN_ADDRESS(global_tid);
3385  }
3386 #endif
3387 #if USE_ITT_NOTIFY
3388  __kmp_threads[global_tid]->th.th_ident = loc;
3389 #endif
3390  retval =
3391  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3392  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3393  retval = (retval != 0) ? (0) : (1);
3394 #if OMPT_SUPPORT && OMPT_OPTIONAL
3395  if (ompt_enabled.enabled) {
3396  ompt_frame->enter_frame = NULL;
3397  }
3398 #endif
3399 
3400  // all other workers except master should do this pop here
3401  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3402  if (__kmp_env_consistency_check) {
3403  if (retval == 0) {
3404  __kmp_pop_sync(global_tid, ct_reduce, loc);
3405  }
3406  }
3407 
3408  } else {
3409 
3410  // should never reach this block
3411  KMP_ASSERT(0); // "unexpected method"
3412  }
3413 #if OMP_40_ENABLED
3414  if (teams_swapped) {
3415  __kmp_restore_swapped_teams(th, team, task_state);
3416  }
3417 #endif
3418  KA_TRACE(
3419  10,
3420  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3421  global_tid, packed_reduction_method, retval));
3422 
3423  return retval;
3424 }
3425 
3434 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3435  kmp_critical_name *lck) {
3436 
3437  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3438 
3439  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3440 
3441  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3442 
3443  if (packed_reduction_method == critical_reduce_block) {
3444 
3445  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3446 
3447  } else if (packed_reduction_method == empty_reduce_block) {
3448 
3449  // usage: if team size == 1, no synchronization is required ( on Intel
3450  // platforms only )
3451 
3452  } else if (packed_reduction_method == atomic_reduce_block) {
3453 
3454  // neither master nor other workers should get here
3455  // (code gen does not generate this call in case 2: atomic reduce block)
3456  // actually it's better to remove this elseif at all;
3457  // after removal this value will checked by the 'else' and will assert
3458 
3459  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3460  tree_reduce_block)) {
3461 
3462  // only master gets here
3463 
3464  } else {
3465 
3466  // should never reach this block
3467  KMP_ASSERT(0); // "unexpected method"
3468  }
3469 
3470  if (__kmp_env_consistency_check)
3471  __kmp_pop_sync(global_tid, ct_reduce, loc);
3472 
3473  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3474  global_tid, packed_reduction_method));
3475 
3476  return;
3477 }
3478 
3479 /* 2.a.ii. Reduce Block with a terminating barrier */
3480 
3496 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3497  size_t reduce_size, void *reduce_data,
3498  void (*reduce_func)(void *lhs_data, void *rhs_data),
3499  kmp_critical_name *lck) {
3500  KMP_COUNT_BLOCK(REDUCE_wait);
3501  int retval = 0;
3502  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3503 #if OMP_40_ENABLED
3504  kmp_info_t *th;
3505  kmp_team_t *team;
3506  int teams_swapped = 0, task_state;
3507 #endif
3508 
3509  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3510 
3511  // why do we need this initialization here at all?
3512  // Reduction clause can not be a stand-alone directive.
3513 
3514  // do not call __kmp_serial_initialize(), it will be called by
3515  // __kmp_parallel_initialize() if needed
3516  // possible detection of false-positive race by the threadchecker ???
3517  if (!TCR_4(__kmp_init_parallel))
3518  __kmp_parallel_initialize();
3519 
3520 // check correctness of reduce block nesting
3521 #if KMP_USE_DYNAMIC_LOCK
3522  if (__kmp_env_consistency_check)
3523  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3524 #else
3525  if (__kmp_env_consistency_check)
3526  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3527 #endif
3528 
3529 #if OMP_40_ENABLED
3530  th = __kmp_thread_from_gtid(global_tid);
3531  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3532 #endif // OMP_40_ENABLED
3533 
3534  packed_reduction_method = __kmp_determine_reduction_method(
3535  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3536  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3537 
3538  if (packed_reduction_method == critical_reduce_block) {
3539 
3540  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3541  retval = 1;
3542 
3543  } else if (packed_reduction_method == empty_reduce_block) {
3544 
3545  // usage: if team size == 1, no synchronization is required ( Intel
3546  // platforms only )
3547  retval = 1;
3548 
3549  } else if (packed_reduction_method == atomic_reduce_block) {
3550 
3551  retval = 2;
3552 
3553  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3554  tree_reduce_block)) {
3555 
3556 // case tree_reduce_block:
3557 // this barrier should be visible to a customer and to the threading profile
3558 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3559 #if OMPT_SUPPORT
3560  omp_frame_t *ompt_frame;
3561  if (ompt_enabled.enabled) {
3562  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3563  if (ompt_frame->enter_frame == NULL)
3564  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3565  OMPT_STORE_RETURN_ADDRESS(global_tid);
3566  }
3567 #endif
3568 #if USE_ITT_NOTIFY
3569  __kmp_threads[global_tid]->th.th_ident =
3570  loc; // needed for correct notification of frames
3571 #endif
3572  retval =
3573  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3574  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3575  retval = (retval != 0) ? (0) : (1);
3576 #if OMPT_SUPPORT && OMPT_OPTIONAL
3577  if (ompt_enabled.enabled) {
3578  ompt_frame->enter_frame = NULL;
3579  }
3580 #endif
3581 
3582  // all other workers except master should do this pop here
3583  // ( none of other workers except master will enter __kmpc_end_reduce() )
3584  if (__kmp_env_consistency_check) {
3585  if (retval == 0) { // 0: all other workers; 1: master
3586  __kmp_pop_sync(global_tid, ct_reduce, loc);
3587  }
3588  }
3589 
3590  } else {
3591 
3592  // should never reach this block
3593  KMP_ASSERT(0); // "unexpected method"
3594  }
3595 #if OMP_40_ENABLED
3596  if (teams_swapped) {
3597  __kmp_restore_swapped_teams(th, team, task_state);
3598  }
3599 #endif
3600 
3601  KA_TRACE(10,
3602  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3603  global_tid, packed_reduction_method, retval));
3604 
3605  return retval;
3606 }
3607 
3618 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3619  kmp_critical_name *lck) {
3620 
3621  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3622 #if OMP_40_ENABLED
3623  kmp_info_t *th;
3624  kmp_team_t *team;
3625  int teams_swapped = 0, task_state;
3626 #endif
3627 
3628  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3629 
3630 #if OMP_40_ENABLED
3631  th = __kmp_thread_from_gtid(global_tid);
3632  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3633 #endif // OMP_40_ENABLED
3634 
3635  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3636 
3637  // this barrier should be visible to a customer and to the threading profile
3638  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3639 
3640  if (packed_reduction_method == critical_reduce_block) {
3641 
3642  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3643 
3644 // TODO: implicit barrier: should be exposed
3645 #if OMPT_SUPPORT
3646  omp_frame_t *ompt_frame;
3647  if (ompt_enabled.enabled) {
3648  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3649  if (ompt_frame->enter_frame == NULL)
3650  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3651  OMPT_STORE_RETURN_ADDRESS(global_tid);
3652  }
3653 #endif
3654 #if USE_ITT_NOTIFY
3655  __kmp_threads[global_tid]->th.th_ident = loc;
3656 #endif
3657  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3658 #if OMPT_SUPPORT && OMPT_OPTIONAL
3659  if (ompt_enabled.enabled) {
3660  ompt_frame->enter_frame = NULL;
3661  }
3662 #endif
3663 
3664  } else if (packed_reduction_method == empty_reduce_block) {
3665 
3666 // usage: if team size==1, no synchronization is required (Intel platforms only)
3667 
3668 // TODO: implicit barrier: should be exposed
3669 #if OMPT_SUPPORT
3670  omp_frame_t *ompt_frame;
3671  if (ompt_enabled.enabled) {
3672  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3673  if (ompt_frame->enter_frame == NULL)
3674  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3675  OMPT_STORE_RETURN_ADDRESS(global_tid);
3676  }
3677 #endif
3678 #if USE_ITT_NOTIFY
3679  __kmp_threads[global_tid]->th.th_ident = loc;
3680 #endif
3681  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3682 #if OMPT_SUPPORT && OMPT_OPTIONAL
3683  if (ompt_enabled.enabled) {
3684  ompt_frame->enter_frame = NULL;
3685  }
3686 #endif
3687 
3688  } else if (packed_reduction_method == atomic_reduce_block) {
3689 
3690 #if OMPT_SUPPORT
3691  omp_frame_t *ompt_frame;
3692  if (ompt_enabled.enabled) {
3693  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3694  if (ompt_frame->enter_frame == NULL)
3695  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3696  OMPT_STORE_RETURN_ADDRESS(global_tid);
3697  }
3698 #endif
3699 // TODO: implicit barrier: should be exposed
3700 #if USE_ITT_NOTIFY
3701  __kmp_threads[global_tid]->th.th_ident = loc;
3702 #endif
3703  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3704 #if OMPT_SUPPORT && OMPT_OPTIONAL
3705  if (ompt_enabled.enabled) {
3706  ompt_frame->enter_frame = NULL;
3707  }
3708 #endif
3709 
3710  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3711  tree_reduce_block)) {
3712 
3713  // only master executes here (master releases all other workers)
3714  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3715  global_tid);
3716 
3717  } else {
3718 
3719  // should never reach this block
3720  KMP_ASSERT(0); // "unexpected method"
3721  }
3722 #if OMP_40_ENABLED
3723  if (teams_swapped) {
3724  __kmp_restore_swapped_teams(th, team, task_state);
3725  }
3726 #endif
3727 
3728  if (__kmp_env_consistency_check)
3729  __kmp_pop_sync(global_tid, ct_reduce, loc);
3730 
3731  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3732  global_tid, packed_reduction_method));
3733 
3734  return;
3735 }
3736 
3737 #undef __KMP_GET_REDUCTION_METHOD
3738 #undef __KMP_SET_REDUCTION_METHOD
3739 
3740 /* end of interface to fast scalable reduce routines */
3741 
3742 kmp_uint64 __kmpc_get_taskid() {
3743 
3744  kmp_int32 gtid;
3745  kmp_info_t *thread;
3746 
3747  gtid = __kmp_get_gtid();
3748  if (gtid < 0) {
3749  return 0;
3750  }
3751  thread = __kmp_thread_from_gtid(gtid);
3752  return thread->th.th_current_task->td_task_id;
3753 
3754 } // __kmpc_get_taskid
3755 
3756 kmp_uint64 __kmpc_get_parent_taskid() {
3757 
3758  kmp_int32 gtid;
3759  kmp_info_t *thread;
3760  kmp_taskdata_t *parent_task;
3761 
3762  gtid = __kmp_get_gtid();
3763  if (gtid < 0) {
3764  return 0;
3765  }
3766  thread = __kmp_thread_from_gtid(gtid);
3767  parent_task = thread->th.th_current_task->td_parent;
3768  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3769 
3770 } // __kmpc_get_parent_taskid
3771 
3772 #if OMP_45_ENABLED
3773 
3784 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3785  const struct kmp_dim *dims) {
3786  int j, idx;
3787  kmp_int64 last, trace_count;
3788  kmp_info_t *th = __kmp_threads[gtid];
3789  kmp_team_t *team = th->th.th_team;
3790  kmp_uint32 *flags;
3791  kmp_disp_t *pr_buf = th->th.th_dispatch;
3792  dispatch_shared_info_t *sh_buf;
3793 
3794  KA_TRACE(
3795  20,
3796  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3797  gtid, num_dims, !team->t.t_serialized));
3798  KMP_DEBUG_ASSERT(dims != NULL);
3799  KMP_DEBUG_ASSERT(num_dims > 0);
3800 
3801  if (team->t.t_serialized) {
3802  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3803  return; // no dependencies if team is serialized
3804  }
3805  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3806  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3807  // the next loop
3808  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3809 
3810  // Save bounds info into allocated private buffer
3811  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3812  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3813  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3814  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3815  pr_buf->th_doacross_info[0] =
3816  (kmp_int64)num_dims; // first element is number of dimensions
3817  // Save also address of num_done in order to access it later without knowing
3818  // the buffer index
3819  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3820  pr_buf->th_doacross_info[2] = dims[0].lo;
3821  pr_buf->th_doacross_info[3] = dims[0].up;
3822  pr_buf->th_doacross_info[4] = dims[0].st;
3823  last = 5;
3824  for (j = 1; j < num_dims; ++j) {
3825  kmp_int64
3826  range_length; // To keep ranges of all dimensions but the first dims[0]
3827  if (dims[j].st == 1) { // most common case
3828  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3829  range_length = dims[j].up - dims[j].lo + 1;
3830  } else {
3831  if (dims[j].st > 0) {
3832  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3833  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3834  } else { // negative increment
3835  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3836  range_length =
3837  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3838  }
3839  }
3840  pr_buf->th_doacross_info[last++] = range_length;
3841  pr_buf->th_doacross_info[last++] = dims[j].lo;
3842  pr_buf->th_doacross_info[last++] = dims[j].up;
3843  pr_buf->th_doacross_info[last++] = dims[j].st;
3844  }
3845 
3846  // Compute total trip count.
3847  // Start with range of dims[0] which we don't need to keep in the buffer.
3848  if (dims[0].st == 1) { // most common case
3849  trace_count = dims[0].up - dims[0].lo + 1;
3850  } else if (dims[0].st > 0) {
3851  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3852  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3853  } else { // negative increment
3854  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3855  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3856  }
3857  for (j = 1; j < num_dims; ++j) {
3858  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3859  }
3860  KMP_DEBUG_ASSERT(trace_count > 0);
3861 
3862  // Check if shared buffer is not occupied by other loop (idx -
3863  // __kmp_dispatch_num_buffers)
3864  if (idx != sh_buf->doacross_buf_idx) {
3865  // Shared buffer is occupied, wait for it to be free
3866  __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3867  __kmp_eq_4, NULL);
3868  }
3869 #if KMP_32_BIT_ARCH
3870  // Check if we are the first thread. After the CAS the first thread gets 0,
3871  // others get 1 if initialization is in progress, allocated pointer otherwise.
3872  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3873  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3874  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3875 #else
3876  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3877  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3878 #endif
3879  if (flags == NULL) {
3880  // we are the first thread, allocate the array of flags
3881  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3882  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3883  KMP_MB();
3884  sh_buf->doacross_flags = flags;
3885  } else if (flags == (kmp_uint32 *)1) {
3886 #if KMP_32_BIT_ARCH
3887  // initialization is still in progress, need to wait
3888  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3889 #else
3890  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3891 #endif
3892  KMP_YIELD(TRUE);
3893  KMP_MB();
3894  } else {
3895  KMP_MB();
3896  }
3897  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3898  pr_buf->th_doacross_flags =
3899  sh_buf->doacross_flags; // save private copy in order to not
3900  // touch shared buffer on each iteration
3901  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
3902 }
3903 
3904 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
3905  kmp_int32 shft, num_dims, i;
3906  kmp_uint32 flag;
3907  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3908  kmp_info_t *th = __kmp_threads[gtid];
3909  kmp_team_t *team = th->th.th_team;
3910  kmp_disp_t *pr_buf;
3911  kmp_int64 lo, up, st;
3912 
3913  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3914  if (team->t.t_serialized) {
3915  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3916  return; // no dependencies if team is serialized
3917  }
3918 
3919  // calculate sequential iteration number and check out-of-bounds condition
3920  pr_buf = th->th.th_dispatch;
3921  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3922  num_dims = pr_buf->th_doacross_info[0];
3923  lo = pr_buf->th_doacross_info[2];
3924  up = pr_buf->th_doacross_info[3];
3925  st = pr_buf->th_doacross_info[4];
3926  if (st == 1) { // most common case
3927  if (vec[0] < lo || vec[0] > up) {
3928  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3929  "bounds [%lld,%lld]\n",
3930  gtid, vec[0], lo, up));
3931  return;
3932  }
3933  iter_number = vec[0] - lo;
3934  } else if (st > 0) {
3935  if (vec[0] < lo || vec[0] > up) {
3936  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3937  "bounds [%lld,%lld]\n",
3938  gtid, vec[0], lo, up));
3939  return;
3940  }
3941  iter_number = (kmp_uint64)(vec[0] - lo) / st;
3942  } else { // negative increment
3943  if (vec[0] > lo || vec[0] < up) {
3944  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3945  "bounds [%lld,%lld]\n",
3946  gtid, vec[0], lo, up));
3947  return;
3948  }
3949  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3950  }
3951  for (i = 1; i < num_dims; ++i) {
3952  kmp_int64 iter, ln;
3953  kmp_int32 j = i * 4;
3954  ln = pr_buf->th_doacross_info[j + 1];
3955  lo = pr_buf->th_doacross_info[j + 2];
3956  up = pr_buf->th_doacross_info[j + 3];
3957  st = pr_buf->th_doacross_info[j + 4];
3958  if (st == 1) {
3959  if (vec[i] < lo || vec[i] > up) {
3960  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3961  "bounds [%lld,%lld]\n",
3962  gtid, vec[i], lo, up));
3963  return;
3964  }
3965  iter = vec[i] - lo;
3966  } else if (st > 0) {
3967  if (vec[i] < lo || vec[i] > up) {
3968  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3969  "bounds [%lld,%lld]\n",
3970  gtid, vec[i], lo, up));
3971  return;
3972  }
3973  iter = (kmp_uint64)(vec[i] - lo) / st;
3974  } else { // st < 0
3975  if (vec[i] > lo || vec[i] < up) {
3976  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3977  "bounds [%lld,%lld]\n",
3978  gtid, vec[i], lo, up));
3979  return;
3980  }
3981  iter = (kmp_uint64)(lo - vec[i]) / (-st);
3982  }
3983  iter_number = iter + ln * iter_number;
3984  }
3985  shft = iter_number % 32; // use 32-bit granularity
3986  iter_number >>= 5; // divided by 32
3987  flag = 1 << shft;
3988  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
3989  KMP_YIELD(TRUE);
3990  }
3991  KMP_MB();
3992  KA_TRACE(20,
3993  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3994  gtid, (iter_number << 5) + shft));
3995 }
3996 
3997 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
3998  kmp_int32 shft, num_dims, i;
3999  kmp_uint32 flag;
4000  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4001  kmp_info_t *th = __kmp_threads[gtid];
4002  kmp_team_t *team = th->th.th_team;
4003  kmp_disp_t *pr_buf;
4004  kmp_int64 lo, st;
4005 
4006  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4007  if (team->t.t_serialized) {
4008  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4009  return; // no dependencies if team is serialized
4010  }
4011 
4012  // calculate sequential iteration number (same as in "wait" but no
4013  // out-of-bounds checks)
4014  pr_buf = th->th.th_dispatch;
4015  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4016  num_dims = pr_buf->th_doacross_info[0];
4017  lo = pr_buf->th_doacross_info[2];
4018  st = pr_buf->th_doacross_info[4];
4019  if (st == 1) { // most common case
4020  iter_number = vec[0] - lo;
4021  } else if (st > 0) {
4022  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4023  } else { // negative increment
4024  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4025  }
4026  for (i = 1; i < num_dims; ++i) {
4027  kmp_int64 iter, ln;
4028  kmp_int32 j = i * 4;
4029  ln = pr_buf->th_doacross_info[j + 1];
4030  lo = pr_buf->th_doacross_info[j + 2];
4031  st = pr_buf->th_doacross_info[j + 4];
4032  if (st == 1) {
4033  iter = vec[i] - lo;
4034  } else if (st > 0) {
4035  iter = (kmp_uint64)(vec[i] - lo) / st;
4036  } else { // st < 0
4037  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4038  }
4039  iter_number = iter + ln * iter_number;
4040  }
4041  shft = iter_number % 32; // use 32-bit granularity
4042  iter_number >>= 5; // divided by 32
4043  flag = 1 << shft;
4044  KMP_MB();
4045  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4046  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4047  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4048  (iter_number << 5) + shft));
4049 }
4050 
4051 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4052  kmp_int32 num_done;
4053  kmp_info_t *th = __kmp_threads[gtid];
4054  kmp_team_t *team = th->th.th_team;
4055  kmp_disp_t *pr_buf = th->th.th_dispatch;
4056 
4057  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4058  if (team->t.t_serialized) {
4059  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4060  return; // nothing to do
4061  }
4062  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4063  if (num_done == th->th.th_team_nproc) {
4064  // we are the last thread, need to free shared resources
4065  int idx = pr_buf->th_doacross_buf_idx - 1;
4066  dispatch_shared_info_t *sh_buf =
4067  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4068  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4069  (kmp_int64)&sh_buf->doacross_num_done);
4070  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4071  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4072  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4073  sh_buf->doacross_flags = NULL;
4074  sh_buf->doacross_num_done = 0;
4075  sh_buf->doacross_buf_idx +=
4076  __kmp_dispatch_num_buffers; // free buffer for future re-use
4077  }
4078  // free private resources (need to keep buffer index forever)
4079  pr_buf->th_doacross_flags = NULL;
4080  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4081  pr_buf->th_doacross_info = NULL;
4082  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4083 }
4084 #endif
4085 
4086 #if OMP_50_ENABLED
4087 int __kmpc_get_target_offload(void) {
4088  if (!__kmp_init_serial) {
4089  __kmp_serial_initialize();
4090  }
4091  return __kmp_target_offload;
4092 }
4093 #endif // OMP_50_ENABLED
4094 
4095 // end of file //
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
Definition: kmp.h:186
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:890
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:207
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1396
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:64
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:217
kmp_int32 flags
Definition: kmp.h:209