LLVM OpenMP* Runtime Library
kmp_csupport.c
1 /*
2  * kmp_csupport.c -- kfront linkage support for OpenMP.
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "omp.h" /* extern "C" declarations of user-visible routines */
17 #include "kmp.h"
18 #include "kmp_i18n.h"
19 #include "kmp_itt.h"
20 #include "kmp_error.h"
21 #include "kmp_stats.h"
22 
23 #if OMPT_SUPPORT
24 #include "ompt-internal.h"
25 #include "ompt-specific.h"
26 #endif
27 
28 #define MAX_MESSAGE 512
29 
30 /* ------------------------------------------------------------------------ */
31 /* ------------------------------------------------------------------------ */
32 
33 /* flags will be used in future, e.g., to implement */
34 /* openmp_strict library restrictions */
35 
45 void
46 __kmpc_begin(ident_t *loc, kmp_int32 flags)
47 {
48  // By default __kmp_ignore_mppbeg() returns TRUE.
49  if (__kmp_ignore_mppbeg() == FALSE) {
50  __kmp_internal_begin();
51 
52  KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
53  }
54 }
55 
63 void
65 {
66  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
67  // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
68  // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
69  // will unregister this root (it can cause library shut down).
70  if (__kmp_ignore_mppend() == FALSE) {
71  KC_TRACE( 10, ("__kmpc_end: called\n" ) );
72  KA_TRACE( 30, ("__kmpc_end\n" ));
73 
74  __kmp_internal_end_thread( -1 );
75  }
76 }
77 
97 kmp_int32
99 {
100  kmp_int32 gtid = __kmp_entry_gtid();
101 
102  KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
103 
104  return gtid;
105 }
106 
120 kmp_int32
122 {
123  KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
124 
125  return TCR_4(__kmp_nth);
126 }
127 
134 kmp_int32
136 {
137  KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
138  return __kmp_tid_from_gtid( __kmp_entry_gtid() );
139 }
140 
146 kmp_int32
148 {
149  KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
150 
151  return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
152 }
153 
160 kmp_int32
162 {
163 #ifndef KMP_DEBUG
164 
165  return TRUE;
166 
167 #else
168 
169  const char *semi2;
170  const char *semi3;
171  int line_no;
172 
173  if (__kmp_par_range == 0) {
174  return TRUE;
175  }
176  semi2 = loc->psource;
177  if (semi2 == NULL) {
178  return TRUE;
179  }
180  semi2 = strchr(semi2, ';');
181  if (semi2 == NULL) {
182  return TRUE;
183  }
184  semi2 = strchr(semi2 + 1, ';');
185  if (semi2 == NULL) {
186  return TRUE;
187  }
188  if (__kmp_par_range_filename[0]) {
189  const char *name = semi2 - 1;
190  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
191  name--;
192  }
193  if ((*name == '/') || (*name == ';')) {
194  name++;
195  }
196  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
197  return __kmp_par_range < 0;
198  }
199  }
200  semi3 = strchr(semi2 + 1, ';');
201  if (__kmp_par_range_routine[0]) {
202  if ((semi3 != NULL) && (semi3 > semi2)
203  && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
204  return __kmp_par_range < 0;
205  }
206  }
207  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
208  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
209  return __kmp_par_range > 0;
210  }
211  return __kmp_par_range < 0;
212  }
213  return TRUE;
214 
215 #endif /* KMP_DEBUG */
216 
217 }
218 
224 kmp_int32
226 {
227  return __kmp_entry_thread() -> th.th_root -> r.r_active;
228 }
229 
239 void
240 __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
241 {
242  KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
243  global_tid, num_threads ) );
244 
245  __kmp_push_num_threads( loc, global_tid, num_threads );
246 }
247 
248 void
249 __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
250 {
251  KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
252 
253  /* the num_threads are automatically popped */
254 }
255 
256 
257 #if OMP_40_ENABLED
258 
259 void
260 __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
261 {
262  KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
263  global_tid, proc_bind ) );
264 
265  __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
266 }
267 
268 #endif /* OMP_40_ENABLED */
269 
270 
280 void
281 __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
282 {
283  int gtid = __kmp_entry_gtid();
284 
285 #if (KMP_STATS_ENABLED)
286  int inParallel = __kmpc_in_parallel(loc);
287  if (inParallel)
288  {
289  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
290  }
291  else
292  {
293  KMP_STOP_EXPLICIT_TIMER(OMP_serial);
294  KMP_COUNT_BLOCK(OMP_PARALLEL);
295  }
296 #endif
297 
298  // maybe to save thr_state is enough here
299  {
300  va_list ap;
301  va_start( ap, microtask );
302 
303 #if OMPT_SUPPORT
304  int tid = __kmp_tid_from_gtid( gtid );
305  kmp_info_t *master_th = __kmp_threads[ gtid ];
306  kmp_team_t *parent_team = master_th->th.th_team;
307  if (ompt_enabled) {
308  parent_team->t.t_implicit_task_taskdata[tid].
309  ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
310  }
311 #endif
312 
313 #if INCLUDE_SSC_MARKS
314  SSC_MARK_FORKING();
315 #endif
316  __kmp_fork_call( loc, gtid, fork_context_intel,
317  argc,
318 #if OMPT_SUPPORT
319  VOLATILE_CAST(void *) microtask, // "unwrapped" task
320 #endif
321  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
322  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
323 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
324 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
325  &ap
326 #else
327  ap
328 #endif
329  );
330 #if INCLUDE_SSC_MARKS
331  SSC_MARK_JOINING();
332 #endif
333  __kmp_join_call( loc, gtid
334 #if OMPT_SUPPORT
335  , fork_context_intel
336 #endif
337  );
338 
339  va_end( ap );
340 
341 #if OMPT_SUPPORT
342  if (ompt_enabled) {
343  parent_team->t.t_implicit_task_taskdata[tid].
344  ompt_task_info.frame.reenter_runtime_frame = 0;
345  }
346 #endif
347  }
348 #if (KMP_STATS_ENABLED)
349  if (!inParallel)
350  KMP_START_EXPLICIT_TIMER(OMP_serial);
351 #endif
352 }
353 
354 #if OMP_40_ENABLED
355 
366 void
367 __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
368 {
369  KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
370  global_tid, num_teams, num_threads ) );
371 
372  __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
373 }
374 
384 void
385 __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
386 {
387  int gtid = __kmp_entry_gtid();
388  kmp_info_t *this_thr = __kmp_threads[ gtid ];
389  va_list ap;
390  va_start( ap, microtask );
391 
392  KMP_COUNT_BLOCK(OMP_TEAMS);
393 
394  // remember teams entry point and nesting level
395  this_thr->th.th_teams_microtask = microtask;
396  this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
397 
398 #if OMPT_SUPPORT
399  kmp_team_t *parent_team = this_thr->th.th_team;
400  int tid = __kmp_tid_from_gtid( gtid );
401  if (ompt_enabled) {
402  parent_team->t.t_implicit_task_taskdata[tid].
403  ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
404  }
405 #endif
406 
407  // check if __kmpc_push_num_teams called, set default number of teams otherwise
408  if ( this_thr->th.th_teams_size.nteams == 0 ) {
409  __kmp_push_num_teams( loc, gtid, 0, 0 );
410  }
411  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
412  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
413  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
414 
415  __kmp_fork_call( loc, gtid, fork_context_intel,
416  argc,
417 #if OMPT_SUPPORT
418  VOLATILE_CAST(void *) microtask, // "unwrapped" task
419 #endif
420  VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
421  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
422 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
423  &ap
424 #else
425  ap
426 #endif
427  );
428  __kmp_join_call( loc, gtid
429 #if OMPT_SUPPORT
430  , fork_context_intel
431 #endif
432  );
433 
434 #if OMPT_SUPPORT
435  if (ompt_enabled) {
436  parent_team->t.t_implicit_task_taskdata[tid].
437  ompt_task_info.frame.reenter_runtime_frame = NULL;
438  }
439 #endif
440 
441  this_thr->th.th_teams_microtask = NULL;
442  this_thr->th.th_teams_level = 0;
443  *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
444  va_end( ap );
445 }
446 #endif /* OMP_40_ENABLED */
447 
448 
449 //
450 // I don't think this function should ever have been exported.
451 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
452 // openmp code ever called it, but it's been exported from the RTL for so
453 // long that I'm afraid to remove the definition.
454 //
455 int
456 __kmpc_invoke_task_func( int gtid )
457 {
458  return __kmp_invoke_task_func( gtid );
459 }
460 
473 void
474 __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
475 {
476  __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with
477  * kmp_fork_call since the tasks to be done are similar in each case.
478  */
479 }
480 
488 void
489 __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
490 {
491  kmp_internal_control_t *top;
492  kmp_info_t *this_thr;
493  kmp_team_t *serial_team;
494 
495  KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
496 
497  /* skip all this code for autopar serialized loops since it results in
498  unacceptable overhead */
499  if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
500  return;
501 
502  // Not autopar code
503  if( ! TCR_4( __kmp_init_parallel ) )
504  __kmp_parallel_initialize();
505 
506  this_thr = __kmp_threads[ global_tid ];
507  serial_team = this_thr->th.th_serial_team;
508 
509  #if OMP_41_ENABLED
510  kmp_task_team_t * task_team = this_thr->th.th_task_team;
511 
512  // we need to wait for the proxy tasks before finishing the thread
513  if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
514  __kmp_task_team_wait(this_thr, serial_team, NULL ); // is an ITT object needed here?
515  #endif
516 
517  KMP_MB();
518  KMP_DEBUG_ASSERT( serial_team );
519  KMP_ASSERT( serial_team -> t.t_serialized );
520  KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
521  KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
522  KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
523  KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
524 
525  /* If necessary, pop the internal control stack values and replace the team values */
526  top = serial_team -> t.t_control_stack_top;
527  if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
528  copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
529  serial_team -> t.t_control_stack_top = top -> next;
530  __kmp_free(top);
531  }
532 
533  //if( serial_team -> t.t_serialized > 1 )
534  serial_team -> t.t_level--;
535 
536  /* pop dispatch buffers stack */
537  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
538  {
539  dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
540  serial_team->t.t_dispatch->th_disp_buffer =
541  serial_team->t.t_dispatch->th_disp_buffer->next;
542  __kmp_free( disp_buffer );
543  }
544 
545  -- serial_team -> t.t_serialized;
546  if ( serial_team -> t.t_serialized == 0 ) {
547 
548  /* return to the parallel section */
549 
550 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
551  if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
552  __kmp_clear_x87_fpu_status_word();
553  __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
554  __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
555  }
556 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
557 
558  this_thr -> th.th_team = serial_team -> t.t_parent;
559  this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
560 
561  /* restore values cached in the thread */
562  this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
563  this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
564  this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
565 
566  /* TODO the below shouldn't need to be adjusted for serialized teams */
567  this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
568  t.t_dispatch[ serial_team -> t.t_master_tid ];
569 
570  __kmp_pop_current_task_from_thread( this_thr );
571 
572  KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
573  this_thr -> th.th_current_task -> td_flags.executing = 1;
574 
575  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
576  // Copy the task team from the new child / old parent team to the thread.
577  this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
578  KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
579  global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
580  }
581  } else {
582  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
583  KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
584  global_tid, serial_team, serial_team -> t.t_serialized ) );
585  }
586  }
587 
588 #if USE_ITT_BUILD
589  kmp_uint64 cur_time = 0;
590 #if USE_ITT_NOTIFY
591  if ( __itt_get_timestamp_ptr ) {
592  cur_time = __itt_get_timestamp();
593  }
594 #endif /* USE_ITT_NOTIFY */
595  if ( this_thr->th.th_team->t.t_level == 0
596 #if OMP_40_ENABLED
597  && this_thr->th.th_teams_microtask == NULL
598 #endif
599  ) {
600  // Report the barrier
601  this_thr->th.th_ident = loc;
602  if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
603  ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
604  {
605  __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized,
606  cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
607  if ( __kmp_forkjoin_frames_mode == 3 )
608  // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
609  __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time,
610  cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
611  } else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
612  ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
613  // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
614  __kmp_itt_region_joined( global_tid, 1 );
615  }
616 #endif /* USE_ITT_BUILD */
617 
618  if ( __kmp_env_consistency_check )
619  __kmp_pop_parallel( global_tid, NULL );
620 }
621 
630 void
632 {
633  KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
634 
635  /* need explicit __mf() here since use volatile instead in library */
636  KMP_MB(); /* Flush all pending memory write invalidates. */
637 
638  #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
639  #if KMP_MIC
640  // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
641  // We shouldn't need it, though, since the ABI rules require that
642  // * If the compiler generates NGO stores it also generates the fence
643  // * If users hand-code NGO stores they should insert the fence
644  // therefore no incomplete unordered stores should be visible.
645  #else
646  // C74404
647  // This is to address non-temporal store instructions (sfence needed).
648  // The clflush instruction is addressed either (mfence needed).
649  // Probably the non-temporal load monvtdqa instruction should also be addressed.
650  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
651  if ( ! __kmp_cpuinfo.initialized ) {
652  __kmp_query_cpuid( & __kmp_cpuinfo );
653  }; // if
654  if ( ! __kmp_cpuinfo.sse2 ) {
655  // CPU cannot execute SSE2 instructions.
656  } else {
657  #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC
658  _mm_mfence();
659  #else
660  __sync_synchronize();
661  #endif // KMP_COMPILER_ICC
662  }; // if
663  #endif // KMP_MIC
664  #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
665  // Nothing to see here move along
666  #elif KMP_ARCH_PPC64
667  // Nothing needed here (we have a real MB above).
668  #if KMP_OS_CNK
669  // The flushing thread needs to yield here; this prevents a
670  // busy-waiting thread from saturating the pipeline. flush is
671  // often used in loops like this:
672  // while (!flag) {
673  // #pragma omp flush(flag)
674  // }
675  // and adding the yield here is good for at least a 10x speedup
676  // when running >2 threads per core (on the NAS LU benchmark).
677  __kmp_yield(TRUE);
678  #endif
679  #else
680  #error Unknown or unsupported architecture
681  #endif
682 
683 }
684 
685 /* -------------------------------------------------------------------------- */
686 
687 /* -------------------------------------------------------------------------- */
688 
696 void
697 __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
698 {
699  KMP_COUNT_BLOCK(OMP_BARRIER);
700  KMP_TIME_BLOCK(OMP_barrier);
701  KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
702 
703  if (! TCR_4(__kmp_init_parallel))
704  __kmp_parallel_initialize();
705 
706  if ( __kmp_env_consistency_check ) {
707  if ( loc == 0 ) {
708  KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
709  }; // if
710 
711  __kmp_check_barrier( global_tid, ct_barrier, loc );
712  }
713 
714  __kmp_threads[ global_tid ]->th.th_ident = loc;
715  // TODO: explicit barrier_wait_id:
716  // this function is called when 'barrier' directive is present or
717  // implicit barrier at the end of a worksharing construct.
718  // 1) better to add a per-thread barrier counter to a thread data structure
719  // 2) set to 0 when a new team is created
720  // 4) no sync is required
721 
722  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
723 }
724 
725 /* The BARRIER for a MASTER section is always explicit */
732 kmp_int32
733 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
734 {
735  KMP_COUNT_BLOCK(OMP_MASTER);
736  int status = 0;
737 
738  KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
739 
740  if( ! TCR_4( __kmp_init_parallel ) )
741  __kmp_parallel_initialize();
742 
743  if( KMP_MASTER_GTID( global_tid )) {
744  KMP_START_EXPLICIT_TIMER(OMP_master);
745  status = 1;
746  }
747 
748 #if OMPT_SUPPORT && OMPT_TRACE
749  if (status) {
750  if (ompt_enabled &&
751  ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
752  kmp_info_t *this_thr = __kmp_threads[ global_tid ];
753  kmp_team_t *team = this_thr -> th.th_team;
754 
755  int tid = __kmp_tid_from_gtid( global_tid );
756  ompt_callbacks.ompt_callback(ompt_event_master_begin)(
757  team->t.ompt_team_info.parallel_id,
758  team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
759  }
760  }
761 #endif
762 
763  if ( __kmp_env_consistency_check ) {
764 #if KMP_USE_DYNAMIC_LOCK
765  if (status)
766  __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
767  else
768  __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
769 #else
770  if (status)
771  __kmp_push_sync( global_tid, ct_master, loc, NULL );
772  else
773  __kmp_check_sync( global_tid, ct_master, loc, NULL );
774 #endif
775  }
776 
777  return status;
778 }
779 
788 void
789 __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
790 {
791  KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
792 
793  KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
794  KMP_STOP_EXPLICIT_TIMER(OMP_master);
795 
796 #if OMPT_SUPPORT && OMPT_TRACE
797  kmp_info_t *this_thr = __kmp_threads[ global_tid ];
798  kmp_team_t *team = this_thr -> th.th_team;
799  if (ompt_enabled &&
800  ompt_callbacks.ompt_callback(ompt_event_master_end)) {
801  int tid = __kmp_tid_from_gtid( global_tid );
802  ompt_callbacks.ompt_callback(ompt_event_master_end)(
803  team->t.ompt_team_info.parallel_id,
804  team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
805  }
806 #endif
807 
808  if ( __kmp_env_consistency_check ) {
809  if( global_tid < 0 )
810  KMP_WARNING( ThreadIdentInvalid );
811 
812  if( KMP_MASTER_GTID( global_tid ))
813  __kmp_pop_sync( global_tid, ct_master, loc );
814  }
815 }
816 
824 void
825 __kmpc_ordered( ident_t * loc, kmp_int32 gtid )
826 {
827  int cid = 0;
828  kmp_info_t *th;
829  KMP_DEBUG_ASSERT( __kmp_init_serial );
830 
831  KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
832 
833  if (! TCR_4(__kmp_init_parallel))
834  __kmp_parallel_initialize();
835 
836 #if USE_ITT_BUILD
837  __kmp_itt_ordered_prep( gtid );
838  // TODO: ordered_wait_id
839 #endif /* USE_ITT_BUILD */
840 
841  th = __kmp_threads[ gtid ];
842 
843 #if OMPT_SUPPORT && OMPT_TRACE
844  if (ompt_enabled) {
845  /* OMPT state update */
846  th->th.ompt_thread_info.wait_id = (uint64_t) loc;
847  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
848 
849  /* OMPT event callback */
850  if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
851  ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
852  th->th.ompt_thread_info.wait_id);
853  }
854  }
855 #endif
856 
857  if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
858  (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
859  else
860  __kmp_parallel_deo( & gtid, & cid, loc );
861 
862 #if OMPT_SUPPORT && OMPT_TRACE
863  if (ompt_enabled) {
864  /* OMPT state update */
865  th->th.ompt_thread_info.state = ompt_state_work_parallel;
866  th->th.ompt_thread_info.wait_id = 0;
867 
868  /* OMPT event callback */
869  if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
870  ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
871  th->th.ompt_thread_info.wait_id);
872  }
873  }
874 #endif
875 
876 #if USE_ITT_BUILD
877  __kmp_itt_ordered_start( gtid );
878 #endif /* USE_ITT_BUILD */
879 }
880 
888 void
889 __kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
890 {
891  int cid = 0;
892  kmp_info_t *th;
893 
894  KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
895 
896 #if USE_ITT_BUILD
897  __kmp_itt_ordered_end( gtid );
898  // TODO: ordered_wait_id
899 #endif /* USE_ITT_BUILD */
900 
901  th = __kmp_threads[ gtid ];
902 
903  if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
904  (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
905  else
906  __kmp_parallel_dxo( & gtid, & cid, loc );
907 
908 #if OMPT_SUPPORT && OMPT_BLAME
909  if (ompt_enabled &&
910  ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
911  ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
912  th->th.ompt_thread_info.wait_id);
913  }
914 #endif
915 }
916 
917 #if KMP_USE_DYNAMIC_LOCK
918 
919 static __forceinline void
920 __kmp_init_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag)
921 {
922  // Pointer to the allocated indirect lock is written to crit, while indexing is ignored.
923  void *idx;
924  kmp_indirect_lock_t **lck;
925  lck = (kmp_indirect_lock_t **)crit;
926  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
927  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
928  KMP_SET_I_LOCK_LOCATION(ilk, loc);
929  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
930  KA_TRACE(20, ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
931 #if USE_ITT_BUILD
932  __kmp_itt_critical_creating(ilk->lock, loc);
933 #endif
934  int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
935  if (status == 0) {
936 #if USE_ITT_BUILD
937  __kmp_itt_critical_destroyed(ilk->lock);
938 #endif
939  // We don't really need to destroy the unclaimed lock here since it will be cleaned up at program exit.
940  //KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
941  }
942  KMP_DEBUG_ASSERT(*lck != NULL);
943 }
944 
945 // Fast-path acquire tas lock
946 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) { \
947  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
948  if (l->lk.poll != KMP_LOCK_FREE(tas) || \
949  ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
950  kmp_uint32 spins; \
951  KMP_FSYNC_PREPARE(l); \
952  KMP_INIT_YIELD(spins); \
953  if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
954  KMP_YIELD(TRUE); \
955  } else { \
956  KMP_YIELD_SPIN(spins); \
957  } \
958  while (l->lk.poll != KMP_LOCK_FREE(tas) || \
959  ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \
960  if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
961  KMP_YIELD(TRUE); \
962  } else { \
963  KMP_YIELD_SPIN(spins); \
964  } \
965  } \
966  } \
967  KMP_FSYNC_ACQUIRED(l); \
968 }
969 
970 // Fast-path test tas lock
971 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) { \
972  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
973  rc = l->lk.poll == KMP_LOCK_FREE(tas) && \
974  KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas)); \
975 }
976 
977 // Fast-path release tas lock
978 #define KMP_RELEASE_TAS_LOCK(lock, gtid) { \
979  TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \
980  KMP_MB(); \
981 }
982 
983 #if KMP_USE_FUTEX
984 
985 # include <unistd.h>
986 # include <sys/syscall.h>
987 # ifndef FUTEX_WAIT
988 # define FUTEX_WAIT 0
989 # endif
990 # ifndef FUTEX_WAKE
991 # define FUTEX_WAKE 1
992 # endif
993 
994 // Fast-path acquire futex lock
995 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
996  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
997  kmp_int32 gtid_code = (gtid+1) << 1; \
998  KMP_MB(); \
999  KMP_FSYNC_PREPARE(ftx); \
1000  kmp_int32 poll_val; \
1001  while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1002  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1003  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1004  if (!cond) { \
1005  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { \
1006  continue; \
1007  } \
1008  poll_val |= KMP_LOCK_BUSY(1, futex); \
1009  } \
1010  kmp_int32 rc; \
1011  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
1012  continue; \
1013  } \
1014  gtid_code |= 1; \
1015  } \
1016  KMP_FSYNC_ACQUIRED(ftx); \
1017 }
1018 
1019 // Fast-path test futex lock
1020 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) { \
1021  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1022  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1)) { \
1023  KMP_FSYNC_ACQUIRED(ftx); \
1024  rc = TRUE; \
1025  } else { \
1026  rc = FALSE; \
1027  } \
1028 }
1029 
1030 // Fast-path release futex lock
1031 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) { \
1032  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1033  KMP_MB(); \
1034  KMP_FSYNC_RELEASING(ftx); \
1035  kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1036  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1037  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1038  } \
1039  KMP_MB(); \
1040  KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1041 }
1042 
1043 #endif // KMP_USE_FUTEX
1044 
1045 #else // KMP_USE_DYNAMIC_LOCK
1046 
1047 static kmp_user_lock_p
1048 __kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
1049 {
1050  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1051 
1052  //
1053  // Because of the double-check, the following load
1054  // doesn't need to be volatile.
1055  //
1056  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1057 
1058  if ( lck == NULL ) {
1059  void * idx;
1060 
1061  // Allocate & initialize the lock.
1062  // Remember allocated locks in table in order to free them in __kmp_cleanup()
1063  lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1064  __kmp_init_user_lock_with_checks( lck );
1065  __kmp_set_user_lock_location( lck, loc );
1066 #if USE_ITT_BUILD
1067  __kmp_itt_critical_creating( lck );
1068  // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
1069  // lock. It is the only place where we can guarantee it. There are chances the lock will
1070  // destroyed with no usage, but it is not a problem, because this is not real event seen
1071  // by user but rather setting name for object (lock). See more details in kmp_itt.h.
1072 #endif /* USE_ITT_BUILD */
1073 
1074  //
1075  // Use a cmpxchg instruction to slam the start of the critical
1076  // section with the lock pointer. If another thread beat us
1077  // to it, deallocate the lock, and use the lock that the other
1078  // thread allocated.
1079  //
1080  int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1081 
1082  if ( status == 0 ) {
1083  // Deallocate the lock and reload the value.
1084 #if USE_ITT_BUILD
1085  __kmp_itt_critical_destroyed( lck );
1086  // Let ITT know the lock is destroyed and the same memory location may be reused for
1087  // another purpose.
1088 #endif /* USE_ITT_BUILD */
1089  __kmp_destroy_user_lock_with_checks( lck );
1090  __kmp_user_lock_free( &idx, gtid, lck );
1091  lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1092  KMP_DEBUG_ASSERT( lck != NULL );
1093  }
1094  }
1095  return lck;
1096 }
1097 
1098 #endif // KMP_USE_DYNAMIC_LOCK
1099 
1110 void
1111 __kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit )
1112 {
1113 #if KMP_USE_DYNAMIC_LOCK
1114  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1115 #else
1116  KMP_COUNT_BLOCK(OMP_CRITICAL);
1117  kmp_user_lock_p lck;
1118 
1119  KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1120 
1121  //TODO: add THR_OVHD_STATE
1122 
1123  KMP_CHECK_USER_LOCK_INIT();
1124 
1125  if ( ( __kmp_user_lock_kind == lk_tas )
1126  && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1127  lck = (kmp_user_lock_p)crit;
1128  }
1129 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1130  else if ( ( __kmp_user_lock_kind == lk_futex )
1131  && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1132  lck = (kmp_user_lock_p)crit;
1133  }
1134 #endif
1135  else { // ticket, queuing or drdpa
1136  lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1137  }
1138 
1139  if ( __kmp_env_consistency_check )
1140  __kmp_push_sync( global_tid, ct_critical, loc, lck );
1141 
1142  /* since the critical directive binds to all threads, not just
1143  * the current team we have to check this even if we are in a
1144  * serialized team */
1145  /* also, even if we are the uber thread, we still have to conduct the lock,
1146  * as we have to contend with sibling threads */
1147 
1148 #if USE_ITT_BUILD
1149  __kmp_itt_critical_acquiring( lck );
1150 #endif /* USE_ITT_BUILD */
1151  // Value of 'crit' should be good for using as a critical_id of the critical section directive.
1152  __kmp_acquire_user_lock_with_checks( lck, global_tid );
1153 
1154 #if USE_ITT_BUILD
1155  __kmp_itt_critical_acquired( lck );
1156 #endif /* USE_ITT_BUILD */
1157 
1158  KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
1159 #endif // KMP_USE_DYNAMIC_LOCK
1160 }
1161 
1162 #if KMP_USE_DYNAMIC_LOCK
1163 
1164 // Converts the given hint to an internal lock implementation
1165 static __forceinline kmp_dyna_lockseq_t
1166 __kmp_map_hint_to_lock(uintptr_t hint)
1167 {
1168 #if KMP_USE_TSX
1169 # define KMP_TSX_LOCK(seq) lockseq_##seq
1170 #else
1171 # define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1172 #endif
1173  // Hints that do not require further logic
1174  if (hint & kmp_lock_hint_hle)
1175  return KMP_TSX_LOCK(hle);
1176  if (hint & kmp_lock_hint_rtm)
1177  return (__kmp_cpuinfo.rtm)? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq;
1178  if (hint & kmp_lock_hint_adaptive)
1179  return (__kmp_cpuinfo.rtm)? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq;
1180 
1181  // Rule out conflicting hints first by returning the default lock
1182  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1183  return __kmp_user_lock_seq;
1184  if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative))
1185  return __kmp_user_lock_seq;
1186 
1187  // Do not even consider speculation when it appears to be contended
1188  if (hint & omp_lock_hint_contended)
1189  return lockseq_queuing;
1190 
1191  // Uncontended lock without speculation
1192  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1193  return lockseq_tas;
1194 
1195  // HLE lock for speculation
1196  if (hint & omp_lock_hint_speculative)
1197  return KMP_TSX_LOCK(hle);
1198 
1199  return __kmp_user_lock_seq;
1200 }
1201 
1214 void
1215 __kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint )
1216 {
1217  KMP_COUNT_BLOCK(OMP_CRITICAL);
1218  kmp_user_lock_p lck;
1219 
1220  KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1221 
1222  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1223  // Check if it is initialized.
1224  if (*lk == 0) {
1225  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1226  if (KMP_IS_D_LOCK(lckseq)) {
1227  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq));
1228  } else {
1229  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1230  }
1231  }
1232  // Branch for accessing the actual lock object and set operation. This branching is inevitable since
1233  // this lock initialization does not follow the normal dispatch path (lock table is not used).
1234  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1235  lck = (kmp_user_lock_p)lk;
1236  if (__kmp_env_consistency_check) {
1237  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1238  }
1239 # if USE_ITT_BUILD
1240  __kmp_itt_critical_acquiring(lck);
1241 # endif
1242 # if KMP_USE_INLINED_TAS
1243  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1244  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1245  } else
1246 # elif KMP_USE_INLINED_FUTEX
1247  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1248  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1249  } else
1250 # endif
1251  {
1252  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1253  }
1254  } else {
1255  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1256  lck = ilk->lock;
1257  if (__kmp_env_consistency_check) {
1258  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1259  }
1260 # if USE_ITT_BUILD
1261  __kmp_itt_critical_acquiring(lck);
1262 # endif
1263  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1264  }
1265 
1266 #if USE_ITT_BUILD
1267  __kmp_itt_critical_acquired( lck );
1268 #endif /* USE_ITT_BUILD */
1269 
1270  KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
1271 } // __kmpc_critical_with_hint
1272 
1273 #endif // KMP_USE_DYNAMIC_LOCK
1274 
1284 void
1285 __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1286 {
1287  kmp_user_lock_p lck;
1288 
1289  KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1290 
1291 #if KMP_USE_DYNAMIC_LOCK
1292  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1293  lck = (kmp_user_lock_p)crit;
1294  KMP_ASSERT(lck != NULL);
1295  if (__kmp_env_consistency_check) {
1296  __kmp_pop_sync(global_tid, ct_critical, loc);
1297  }
1298 # if USE_ITT_BUILD
1299  __kmp_itt_critical_releasing( lck );
1300 # endif
1301 # if KMP_USE_INLINED_TAS
1302  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1303  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1304  } else
1305 # elif KMP_USE_INLINED_FUTEX
1306  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1307  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1308  } else
1309 # endif
1310  {
1311  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1312  }
1313  } else {
1314  kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1315  KMP_ASSERT(ilk != NULL);
1316  lck = ilk->lock;
1317  if (__kmp_env_consistency_check) {
1318  __kmp_pop_sync(global_tid, ct_critical, loc);
1319  }
1320 # if USE_ITT_BUILD
1321  __kmp_itt_critical_releasing( lck );
1322 # endif
1323  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1324  }
1325 
1326 #else // KMP_USE_DYNAMIC_LOCK
1327 
1328  if ( ( __kmp_user_lock_kind == lk_tas )
1329  && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1330  lck = (kmp_user_lock_p)crit;
1331  }
1332 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1333  else if ( ( __kmp_user_lock_kind == lk_futex )
1334  && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1335  lck = (kmp_user_lock_p)crit;
1336  }
1337 #endif
1338  else { // ticket, queuing or drdpa
1339  lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1340  }
1341 
1342  KMP_ASSERT(lck != NULL);
1343 
1344  if ( __kmp_env_consistency_check )
1345  __kmp_pop_sync( global_tid, ct_critical, loc );
1346 
1347 #if USE_ITT_BUILD
1348  __kmp_itt_critical_releasing( lck );
1349 #endif /* USE_ITT_BUILD */
1350  // Value of 'crit' should be good for using as a critical_id of the critical section directive.
1351  __kmp_release_user_lock_with_checks( lck, global_tid );
1352 
1353 #if OMPT_SUPPORT && OMPT_BLAME
1354  if (ompt_enabled &&
1355  ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1356  ompt_callbacks.ompt_callback(ompt_event_release_critical)(
1357  (uint64_t) lck);
1358  }
1359 #endif
1360 
1361 #endif // KMP_USE_DYNAMIC_LOCK
1362 
1363  KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1364 }
1365 
1374 kmp_int32
1375 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1376 {
1377  int status;
1378 
1379  KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1380 
1381  if (! TCR_4(__kmp_init_parallel))
1382  __kmp_parallel_initialize();
1383 
1384  if ( __kmp_env_consistency_check )
1385  __kmp_check_barrier( global_tid, ct_barrier, loc );
1386 
1387 #if USE_ITT_NOTIFY
1388  __kmp_threads[global_tid]->th.th_ident = loc;
1389 #endif
1390  status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1391 
1392  return (status != 0) ? 0 : 1;
1393 }
1394 
1404 void
1405 __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1406 {
1407  KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1408 
1409  __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1410 }
1411 
1422 kmp_int32
1423 __kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1424 {
1425  kmp_int32 ret;
1426 
1427  KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1428 
1429  if (! TCR_4(__kmp_init_parallel))
1430  __kmp_parallel_initialize();
1431 
1432  if ( __kmp_env_consistency_check ) {
1433  if ( loc == 0 ) {
1434  KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1435  }
1436  __kmp_check_barrier( global_tid, ct_barrier, loc );
1437  }
1438 
1439 #if USE_ITT_NOTIFY
1440  __kmp_threads[global_tid]->th.th_ident = loc;
1441 #endif
1442  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1443 
1444  ret = __kmpc_master (loc, global_tid);
1445 
1446  if ( __kmp_env_consistency_check ) {
1447  /* there's no __kmpc_end_master called; so the (stats) */
1448  /* actions of __kmpc_end_master are done here */
1449 
1450  if ( global_tid < 0 ) {
1451  KMP_WARNING( ThreadIdentInvalid );
1452  }
1453  if (ret) {
1454  /* only one thread should do the pop since only */
1455  /* one did the push (see __kmpc_master()) */
1456 
1457  __kmp_pop_sync( global_tid, ct_master, loc );
1458  }
1459  }
1460 
1461  return (ret);
1462 }
1463 
1464 /* The BARRIER for a SINGLE process section is always explicit */
1476 kmp_int32
1477 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
1478 {
1479  KMP_COUNT_BLOCK(OMP_SINGLE);
1480  kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
1481  if(rc == TRUE) {
1482  KMP_START_EXPLICIT_TIMER(OMP_single);
1483  }
1484 
1485 #if OMPT_SUPPORT && OMPT_TRACE
1486  kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1487  kmp_team_t *team = this_thr -> th.th_team;
1488  int tid = __kmp_tid_from_gtid( global_tid );
1489 
1490  if (ompt_enabled) {
1491  if (rc) {
1492  if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1493  ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1494  team->t.ompt_team_info.parallel_id,
1495  team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1496  team->t.ompt_team_info.microtask);
1497  }
1498  } else {
1499  if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1500  ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1501  team->t.ompt_team_info.parallel_id,
1502  team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1503  }
1504  this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1505  }
1506  }
1507 #endif
1508 
1509  return rc;
1510 }
1511 
1521 void
1522 __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1523 {
1524  __kmp_exit_single( global_tid );
1525  KMP_STOP_EXPLICIT_TIMER(OMP_single);
1526 
1527 #if OMPT_SUPPORT && OMPT_TRACE
1528  kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1529  kmp_team_t *team = this_thr -> th.th_team;
1530  int tid = __kmp_tid_from_gtid( global_tid );
1531 
1532  if (ompt_enabled &&
1533  ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1534  ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1535  team->t.ompt_team_info.parallel_id,
1536  team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1537  }
1538 #endif
1539 }
1540 
1548 void
1549 __kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1550 {
1551  KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1552 
1553 #if OMPT_SUPPORT && OMPT_TRACE
1554  if (ompt_enabled &&
1555  ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
1556  kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1557  kmp_team_t *team = this_thr -> th.th_team;
1558  int tid = __kmp_tid_from_gtid( global_tid );
1559 
1560  ompt_callbacks.ompt_callback(ompt_event_loop_end)(
1561  team->t.ompt_team_info.parallel_id,
1562  team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1563  }
1564 #endif
1565 
1566  if ( __kmp_env_consistency_check )
1567  __kmp_pop_workshare( global_tid, ct_pdo, loc );
1568 }
1569 
1570 /*
1571  * User routines which take C-style arguments (call by value)
1572  * different from the Fortran equivalent routines
1573  */
1574 
1575 void
1576 ompc_set_num_threads( int arg )
1577 {
1578 // !!!!! TODO: check the per-task binding
1579  __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1580 }
1581 
1582 void
1583 ompc_set_dynamic( int flag )
1584 {
1585  kmp_info_t *thread;
1586 
1587  /* For the thread-private implementation of the internal controls */
1588  thread = __kmp_entry_thread();
1589 
1590  __kmp_save_internal_controls( thread );
1591 
1592  set__dynamic( thread, flag ? TRUE : FALSE );
1593 }
1594 
1595 void
1596 ompc_set_nested( int flag )
1597 {
1598  kmp_info_t *thread;
1599 
1600  /* For the thread-private internal controls implementation */
1601  thread = __kmp_entry_thread();
1602 
1603  __kmp_save_internal_controls( thread );
1604 
1605  set__nested( thread, flag ? TRUE : FALSE );
1606 }
1607 
1608 void
1609 ompc_set_max_active_levels( int max_active_levels )
1610 {
1611  /* TO DO */
1612  /* we want per-task implementation of this internal control */
1613 
1614  /* For the per-thread internal controls implementation */
1615  __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1616 }
1617 
1618 void
1619 ompc_set_schedule( omp_sched_t kind, int modifier )
1620 {
1621 // !!!!! TODO: check the per-task binding
1622  __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1623 }
1624 
1625 int
1626 ompc_get_ancestor_thread_num( int level )
1627 {
1628  return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1629 }
1630 
1631 int
1632 ompc_get_team_size( int level )
1633 {
1634  return __kmp_get_team_size( __kmp_entry_gtid(), level );
1635 }
1636 
1637 void
1638 kmpc_set_stacksize( int arg )
1639 {
1640  // __kmp_aux_set_stacksize initializes the library if needed
1641  __kmp_aux_set_stacksize( arg );
1642 }
1643 
1644 void
1645 kmpc_set_stacksize_s( size_t arg )
1646 {
1647  // __kmp_aux_set_stacksize initializes the library if needed
1648  __kmp_aux_set_stacksize( arg );
1649 }
1650 
1651 void
1652 kmpc_set_blocktime( int arg )
1653 {
1654  int gtid, tid;
1655  kmp_info_t *thread;
1656 
1657  gtid = __kmp_entry_gtid();
1658  tid = __kmp_tid_from_gtid(gtid);
1659  thread = __kmp_thread_from_gtid(gtid);
1660 
1661  __kmp_aux_set_blocktime( arg, thread, tid );
1662 }
1663 
1664 void
1665 kmpc_set_library( int arg )
1666 {
1667  // __kmp_user_set_library initializes the library if needed
1668  __kmp_user_set_library( (enum library_type)arg );
1669 }
1670 
1671 void
1672 kmpc_set_defaults( char const * str )
1673 {
1674  // __kmp_aux_set_defaults initializes the library if needed
1675  __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
1676 }
1677 
1678 int
1679 kmpc_set_affinity_mask_proc( int proc, void **mask )
1680 {
1681 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1682  return -1;
1683 #else
1684  if ( ! TCR_4(__kmp_init_middle) ) {
1685  __kmp_middle_initialize();
1686  }
1687  return __kmp_aux_set_affinity_mask_proc( proc, mask );
1688 #endif
1689 }
1690 
1691 int
1692 kmpc_unset_affinity_mask_proc( int proc, void **mask )
1693 {
1694 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1695  return -1;
1696 #else
1697  if ( ! TCR_4(__kmp_init_middle) ) {
1698  __kmp_middle_initialize();
1699  }
1700  return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1701 #endif
1702 }
1703 
1704 int
1705 kmpc_get_affinity_mask_proc( int proc, void **mask )
1706 {
1707 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1708  return -1;
1709 #else
1710  if ( ! TCR_4(__kmp_init_middle) ) {
1711  __kmp_middle_initialize();
1712  }
1713  return __kmp_aux_get_affinity_mask_proc( proc, mask );
1714 #endif
1715 }
1716 
1717 
1718 /* -------------------------------------------------------------------------- */
1759 void
1760 __kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1761 {
1762  void **data_ptr;
1763 
1764  KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1765 
1766  KMP_MB();
1767 
1768  data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1769 
1770  if ( __kmp_env_consistency_check ) {
1771  if ( loc == 0 ) {
1772  KMP_WARNING( ConstructIdentInvalid );
1773  }
1774  }
1775 
1776  /* ToDo: Optimize the following two barriers into some kind of split barrier */
1777 
1778  if (didit) *data_ptr = cpy_data;
1779 
1780  /* This barrier is not a barrier region boundary */
1781 #if USE_ITT_NOTIFY
1782  __kmp_threads[gtid]->th.th_ident = loc;
1783 #endif
1784  __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1785 
1786  if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1787 
1788  /* Consider next barrier the user-visible barrier for barrier region boundaries */
1789  /* Nesting checks are already handled by the single construct checks */
1790 
1791 #if USE_ITT_NOTIFY
1792  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
1793 #endif
1794  __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1795 }
1796 
1797 /* -------------------------------------------------------------------------- */
1798 
1799 #define INIT_LOCK __kmp_init_user_lock_with_checks
1800 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1801 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1802 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1803 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1804 #define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1805 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
1806 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1807 #define TEST_LOCK __kmp_test_user_lock_with_checks
1808 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1809 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1810 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1811 
1812 
1813 /*
1814  * TODO: Make check abort messages use location info & pass it
1815  * into with_checks routines
1816  */
1817 
1818 #if KMP_USE_DYNAMIC_LOCK
1819 
1820 // internal lock initializer
1821 static __forceinline void
1822 __kmp_init_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
1823 {
1824  if (KMP_IS_D_LOCK(seq)) {
1825  KMP_INIT_D_LOCK(lock, seq);
1826 #if USE_ITT_BUILD
1827  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
1828 #endif
1829  } else {
1830  KMP_INIT_I_LOCK(lock, seq);
1831 #if USE_ITT_BUILD
1832  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1833  __kmp_itt_lock_creating(ilk->lock, loc);
1834 #endif
1835  }
1836 }
1837 
1838 // internal nest lock initializer
1839 static __forceinline void
1840 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq)
1841 {
1842 #if KMP_USE_TSX
1843  // Don't have nested lock implementation for speculative locks
1844  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
1845  seq = __kmp_user_lock_seq;
1846 #endif
1847  switch (seq) {
1848  case lockseq_tas:
1849  seq = lockseq_nested_tas;
1850  break;
1851 #if KMP_USE_FUTEX
1852  case lockseq_futex:
1853  seq = lockseq_nested_futex;
1854  break;
1855 #endif
1856  case lockseq_ticket:
1857  seq = lockseq_nested_ticket;
1858  break;
1859  case lockseq_queuing:
1860  seq = lockseq_nested_queuing;
1861  break;
1862  case lockseq_drdpa:
1863  seq = lockseq_nested_drdpa;
1864  break;
1865  default:
1866  seq = lockseq_nested_queuing;
1867  }
1868  KMP_INIT_I_LOCK(lock, seq);
1869 #if USE_ITT_BUILD
1870  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1871  __kmp_itt_lock_creating(ilk->lock, loc);
1872 #endif
1873 }
1874 
1875 /* initialize the lock with a hint */
1876 void
1877 __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
1878 {
1879  KMP_DEBUG_ASSERT(__kmp_init_serial);
1880  if (__kmp_env_consistency_check && user_lock == NULL) {
1881  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
1882  }
1883 
1884  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1885 }
1886 
1887 /* initialize the lock with a hint */
1888 void
1889 __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint)
1890 {
1891  KMP_DEBUG_ASSERT(__kmp_init_serial);
1892  if (__kmp_env_consistency_check && user_lock == NULL) {
1893  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
1894  }
1895 
1896  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1897 }
1898 
1899 #endif // KMP_USE_DYNAMIC_LOCK
1900 
1901 /* initialize the lock */
1902 void
1903 __kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1904 #if KMP_USE_DYNAMIC_LOCK
1905  KMP_DEBUG_ASSERT(__kmp_init_serial);
1906  if (__kmp_env_consistency_check && user_lock == NULL) {
1907  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1908  }
1909  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
1910 
1911 #else // KMP_USE_DYNAMIC_LOCK
1912 
1913  static char const * const func = "omp_init_lock";
1914  kmp_user_lock_p lck;
1915  KMP_DEBUG_ASSERT( __kmp_init_serial );
1916 
1917  if ( __kmp_env_consistency_check ) {
1918  if ( user_lock == NULL ) {
1919  KMP_FATAL( LockIsUninitialized, func );
1920  }
1921  }
1922 
1923  KMP_CHECK_USER_LOCK_INIT();
1924 
1925  if ( ( __kmp_user_lock_kind == lk_tas )
1926  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1927  lck = (kmp_user_lock_p)user_lock;
1928  }
1929 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1930  else if ( ( __kmp_user_lock_kind == lk_futex )
1931  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1932  lck = (kmp_user_lock_p)user_lock;
1933  }
1934 #endif
1935  else {
1936  lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
1937  }
1938  INIT_LOCK( lck );
1939  __kmp_set_user_lock_location( lck, loc );
1940 
1941 #if OMPT_SUPPORT && OMPT_TRACE
1942  if (ompt_enabled &&
1943  ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
1944  ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck);
1945  }
1946 #endif
1947 
1948 #if USE_ITT_BUILD
1949  __kmp_itt_lock_creating( lck );
1950 #endif /* USE_ITT_BUILD */
1951 
1952 #endif // KMP_USE_DYNAMIC_LOCK
1953 } // __kmpc_init_lock
1954 
1955 /* initialize the lock */
1956 void
1957 __kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1958 #if KMP_USE_DYNAMIC_LOCK
1959 
1960  KMP_DEBUG_ASSERT(__kmp_init_serial);
1961  if (__kmp_env_consistency_check && user_lock == NULL) {
1962  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1963  }
1964  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
1965 
1966 #else // KMP_USE_DYNAMIC_LOCK
1967 
1968  static char const * const func = "omp_init_nest_lock";
1969  kmp_user_lock_p lck;
1970  KMP_DEBUG_ASSERT( __kmp_init_serial );
1971 
1972  if ( __kmp_env_consistency_check ) {
1973  if ( user_lock == NULL ) {
1974  KMP_FATAL( LockIsUninitialized, func );
1975  }
1976  }
1977 
1978  KMP_CHECK_USER_LOCK_INIT();
1979 
1980  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1981  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1982  lck = (kmp_user_lock_p)user_lock;
1983  }
1984 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1985  else if ( ( __kmp_user_lock_kind == lk_futex )
1986  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1987  <= OMP_NEST_LOCK_T_SIZE ) ) {
1988  lck = (kmp_user_lock_p)user_lock;
1989  }
1990 #endif
1991  else {
1992  lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
1993  }
1994 
1995  INIT_NESTED_LOCK( lck );
1996  __kmp_set_user_lock_location( lck, loc );
1997 
1998 #if OMPT_SUPPORT && OMPT_TRACE
1999  if (ompt_enabled &&
2000  ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
2001  ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck);
2002  }
2003 #endif
2004 
2005 #if USE_ITT_BUILD
2006  __kmp_itt_lock_creating( lck );
2007 #endif /* USE_ITT_BUILD */
2008 
2009 #endif // KMP_USE_DYNAMIC_LOCK
2010 } // __kmpc_init_nest_lock
2011 
2012 void
2013 __kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
2014 #if KMP_USE_DYNAMIC_LOCK
2015 
2016 # if USE_ITT_BUILD
2017  kmp_user_lock_p lck;
2018  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2019  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2020  } else {
2021  lck = (kmp_user_lock_p)user_lock;
2022  }
2023  __kmp_itt_lock_destroyed(lck);
2024 # endif
2025  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2026 #else
2027  kmp_user_lock_p lck;
2028 
2029  if ( ( __kmp_user_lock_kind == lk_tas )
2030  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2031  lck = (kmp_user_lock_p)user_lock;
2032  }
2033 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2034  else if ( ( __kmp_user_lock_kind == lk_futex )
2035  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2036  lck = (kmp_user_lock_p)user_lock;
2037  }
2038 #endif
2039  else {
2040  lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
2041  }
2042 
2043 #if OMPT_SUPPORT && OMPT_TRACE
2044  if (ompt_enabled &&
2045  ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
2046  ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck);
2047  }
2048 #endif
2049 
2050 #if USE_ITT_BUILD
2051  __kmp_itt_lock_destroyed( lck );
2052 #endif /* USE_ITT_BUILD */
2053  DESTROY_LOCK( lck );
2054 
2055  if ( ( __kmp_user_lock_kind == lk_tas )
2056  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2057  ;
2058  }
2059 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2060  else if ( ( __kmp_user_lock_kind == lk_futex )
2061  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2062  ;
2063  }
2064 #endif
2065  else {
2066  __kmp_user_lock_free( user_lock, gtid, lck );
2067  }
2068 #endif // KMP_USE_DYNAMIC_LOCK
2069 } // __kmpc_destroy_lock
2070 
2071 /* destroy the lock */
2072 void
2073 __kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
2074 #if KMP_USE_DYNAMIC_LOCK
2075 
2076 # if USE_ITT_BUILD
2077  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2078  __kmp_itt_lock_destroyed(ilk->lock);
2079 # endif
2080  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2081 
2082 #else // KMP_USE_DYNAMIC_LOCK
2083 
2084  kmp_user_lock_p lck;
2085 
2086  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2087  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2088  lck = (kmp_user_lock_p)user_lock;
2089  }
2090 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2091  else if ( ( __kmp_user_lock_kind == lk_futex )
2092  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2093  <= OMP_NEST_LOCK_T_SIZE ) ) {
2094  lck = (kmp_user_lock_p)user_lock;
2095  }
2096 #endif
2097  else {
2098  lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
2099  }
2100 
2101 #if OMPT_SUPPORT && OMPT_TRACE
2102  if (ompt_enabled &&
2103  ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
2104  ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck);
2105  }
2106 #endif
2107 
2108 #if USE_ITT_BUILD
2109  __kmp_itt_lock_destroyed( lck );
2110 #endif /* USE_ITT_BUILD */
2111 
2112  DESTROY_NESTED_LOCK( lck );
2113 
2114  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2115  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2116  ;
2117  }
2118 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2119  else if ( ( __kmp_user_lock_kind == lk_futex )
2120  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2121  <= OMP_NEST_LOCK_T_SIZE ) ) {
2122  ;
2123  }
2124 #endif
2125  else {
2126  __kmp_user_lock_free( user_lock, gtid, lck );
2127  }
2128 #endif // KMP_USE_DYNAMIC_LOCK
2129 } // __kmpc_destroy_nest_lock
2130 
2131 void
2132 __kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
2133  KMP_COUNT_BLOCK(OMP_set_lock);
2134 #if KMP_USE_DYNAMIC_LOCK
2135  int tag = KMP_EXTRACT_D_TAG(user_lock);
2136 # if USE_ITT_BUILD
2137  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
2138 # endif
2139 # if KMP_USE_INLINED_TAS
2140  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2141  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2142  } else
2143 # elif KMP_USE_INLINED_FUTEX
2144  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2145  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2146  } else
2147 # endif
2148  {
2149  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2150  }
2151 # if USE_ITT_BUILD
2152  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2153 # endif
2154 
2155 #else // KMP_USE_DYNAMIC_LOCK
2156 
2157  kmp_user_lock_p lck;
2158 
2159  if ( ( __kmp_user_lock_kind == lk_tas )
2160  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2161  lck = (kmp_user_lock_p)user_lock;
2162  }
2163 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2164  else if ( ( __kmp_user_lock_kind == lk_futex )
2165  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2166  lck = (kmp_user_lock_p)user_lock;
2167  }
2168 #endif
2169  else {
2170  lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
2171  }
2172 
2173 #if USE_ITT_BUILD
2174  __kmp_itt_lock_acquiring( lck );
2175 #endif /* USE_ITT_BUILD */
2176 
2177  ACQUIRE_LOCK( lck, gtid );
2178 
2179 #if USE_ITT_BUILD
2180  __kmp_itt_lock_acquired( lck );
2181 #endif /* USE_ITT_BUILD */
2182 
2183 #if OMPT_SUPPORT && OMPT_TRACE
2184  if (ompt_enabled &&
2185  ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
2186  ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck);
2187  }
2188 #endif
2189 
2190 #endif // KMP_USE_DYNAMIC_LOCK
2191 }
2192 
2193 void
2194 __kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
2195 #if KMP_USE_DYNAMIC_LOCK
2196 
2197 # if USE_ITT_BUILD
2198  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2199 # endif
2200  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2201 # if USE_ITT_BUILD
2202  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2203 #endif
2204 
2205 #if OMPT_SUPPORT && OMPT_TRACE
2206  if (ompt_enabled) {
2207  // missing support here: need to know whether acquired first or not
2208  }
2209 #endif
2210 
2211 #else // KMP_USE_DYNAMIC_LOCK
2212  int acquire_status;
2213  kmp_user_lock_p lck;
2214 
2215  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2216  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2217  lck = (kmp_user_lock_p)user_lock;
2218  }
2219 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2220  else if ( ( __kmp_user_lock_kind == lk_futex )
2221  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2222  <= OMP_NEST_LOCK_T_SIZE ) ) {
2223  lck = (kmp_user_lock_p)user_lock;
2224  }
2225 #endif
2226  else {
2227  lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
2228  }
2229 
2230 #if USE_ITT_BUILD
2231  __kmp_itt_lock_acquiring( lck );
2232 #endif /* USE_ITT_BUILD */
2233 
2234  ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status );
2235 
2236 #if USE_ITT_BUILD
2237  __kmp_itt_lock_acquired( lck );
2238 #endif /* USE_ITT_BUILD */
2239 
2240 #if OMPT_SUPPORT && OMPT_TRACE
2241  if (ompt_enabled) {
2242  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2243  if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
2244  ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck);
2245  } else {
2246  if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
2247  ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck);
2248  }
2249  }
2250 #endif
2251 
2252 #endif // KMP_USE_DYNAMIC_LOCK
2253 }
2254 
2255 void
2256 __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2257 {
2258 #if KMP_USE_DYNAMIC_LOCK
2259 
2260  int tag = KMP_EXTRACT_D_TAG(user_lock);
2261 # if USE_ITT_BUILD
2262  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2263 # endif
2264 # if KMP_USE_INLINED_TAS
2265  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2266  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2267  } else
2268 # elif KMP_USE_INLINED_FUTEX
2269  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2270  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2271  } else
2272 # endif
2273  {
2274  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2275  }
2276 
2277 #else // KMP_USE_DYNAMIC_LOCK
2278 
2279  kmp_user_lock_p lck;
2280 
2281  /* Can't use serial interval since not block structured */
2282  /* release the lock */
2283 
2284  if ( ( __kmp_user_lock_kind == lk_tas )
2285  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2286 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2287  // "fast" path implemented to fix customer performance issue
2288 #if USE_ITT_BUILD
2289  __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2290 #endif /* USE_ITT_BUILD */
2291  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2292  KMP_MB();
2293  return;
2294 #else
2295  lck = (kmp_user_lock_p)user_lock;
2296 #endif
2297  }
2298 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2299  else if ( ( __kmp_user_lock_kind == lk_futex )
2300  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2301  lck = (kmp_user_lock_p)user_lock;
2302  }
2303 #endif
2304  else {
2305  lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
2306  }
2307 
2308 #if USE_ITT_BUILD
2309  __kmp_itt_lock_releasing( lck );
2310 #endif /* USE_ITT_BUILD */
2311 
2312  RELEASE_LOCK( lck, gtid );
2313 
2314 #if OMPT_SUPPORT && OMPT_BLAME
2315  if (ompt_enabled &&
2316  ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2317  ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
2318  }
2319 #endif
2320 
2321 #endif // KMP_USE_DYNAMIC_LOCK
2322 }
2323 
2324 /* release the lock */
2325 void
2326 __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2327 {
2328 #if KMP_USE_DYNAMIC_LOCK
2329 
2330 # if USE_ITT_BUILD
2331  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2332 # endif
2333  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2334 
2335 #else // KMP_USE_DYNAMIC_LOCK
2336 
2337  kmp_user_lock_p lck;
2338 
2339  /* Can't use serial interval since not block structured */
2340 
2341  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2342  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2343 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2344  // "fast" path implemented to fix customer performance issue
2345  kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
2346 #if USE_ITT_BUILD
2347  __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2348 #endif /* USE_ITT_BUILD */
2349  if ( --(tl->lk.depth_locked) == 0 ) {
2350  TCW_4(tl->lk.poll, 0);
2351  }
2352  KMP_MB();
2353  return;
2354 #else
2355  lck = (kmp_user_lock_p)user_lock;
2356 #endif
2357  }
2358 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2359  else if ( ( __kmp_user_lock_kind == lk_futex )
2360  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2361  <= OMP_NEST_LOCK_T_SIZE ) ) {
2362  lck = (kmp_user_lock_p)user_lock;
2363  }
2364 #endif
2365  else {
2366  lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
2367  }
2368 
2369 #if USE_ITT_BUILD
2370  __kmp_itt_lock_releasing( lck );
2371 #endif /* USE_ITT_BUILD */
2372 
2373  int release_status;
2374  release_status = RELEASE_NESTED_LOCK( lck, gtid );
2375 #if OMPT_SUPPORT && OMPT_BLAME
2376  if (ompt_enabled) {
2377  if (release_status == KMP_LOCK_RELEASED) {
2378  if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2379  ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2380  (uint64_t) lck);
2381  }
2382  } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
2383  ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2384  (uint64_t) lck);
2385  }
2386  }
2387 #endif
2388 
2389 #endif // KMP_USE_DYNAMIC_LOCK
2390 }
2391 
2392 /* try to acquire the lock */
2393 int
2394 __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2395 {
2396  KMP_COUNT_BLOCK(OMP_test_lock);
2397 
2398 #if KMP_USE_DYNAMIC_LOCK
2399  int rc;
2400  int tag = KMP_EXTRACT_D_TAG(user_lock);
2401 # if USE_ITT_BUILD
2402  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2403 # endif
2404 # if KMP_USE_INLINED_TAS
2405  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2406  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2407  } else
2408 # elif KMP_USE_INLINED_FUTEX
2409  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2410  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2411  } else
2412 # endif
2413  {
2414  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2415  }
2416  if (rc) {
2417 # if USE_ITT_BUILD
2418  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2419 # endif
2420  return FTN_TRUE;
2421  } else {
2422 # if USE_ITT_BUILD
2423  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2424 # endif
2425  return FTN_FALSE;
2426  }
2427 
2428 #else // KMP_USE_DYNAMIC_LOCK
2429 
2430  kmp_user_lock_p lck;
2431  int rc;
2432 
2433  if ( ( __kmp_user_lock_kind == lk_tas )
2434  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2435  lck = (kmp_user_lock_p)user_lock;
2436  }
2437 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2438  else if ( ( __kmp_user_lock_kind == lk_futex )
2439  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2440  lck = (kmp_user_lock_p)user_lock;
2441  }
2442 #endif
2443  else {
2444  lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
2445  }
2446 
2447 #if USE_ITT_BUILD
2448  __kmp_itt_lock_acquiring( lck );
2449 #endif /* USE_ITT_BUILD */
2450 
2451  rc = TEST_LOCK( lck, gtid );
2452 #if USE_ITT_BUILD
2453  if ( rc ) {
2454  __kmp_itt_lock_acquired( lck );
2455  } else {
2456  __kmp_itt_lock_cancelled( lck );
2457  }
2458 #endif /* USE_ITT_BUILD */
2459  return ( rc ? FTN_TRUE : FTN_FALSE );
2460 
2461  /* Can't use serial interval since not block structured */
2462 
2463 #endif // KMP_USE_DYNAMIC_LOCK
2464 }
2465 
2466 /* try to acquire the lock */
2467 int
2468 __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2469 {
2470 #if KMP_USE_DYNAMIC_LOCK
2471  int rc;
2472 # if USE_ITT_BUILD
2473  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2474 # endif
2475  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2476 # if USE_ITT_BUILD
2477  if (rc) {
2478  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2479  } else {
2480  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2481  }
2482 # endif
2483  return rc;
2484 
2485 #else // KMP_USE_DYNAMIC_LOCK
2486 
2487  kmp_user_lock_p lck;
2488  int rc;
2489 
2490  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2491  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2492  lck = (kmp_user_lock_p)user_lock;
2493  }
2494 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2495  else if ( ( __kmp_user_lock_kind == lk_futex )
2496  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2497  <= OMP_NEST_LOCK_T_SIZE ) ) {
2498  lck = (kmp_user_lock_p)user_lock;
2499  }
2500 #endif
2501  else {
2502  lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
2503  }
2504 
2505 #if USE_ITT_BUILD
2506  __kmp_itt_lock_acquiring( lck );
2507 #endif /* USE_ITT_BUILD */
2508 
2509  rc = TEST_NESTED_LOCK( lck, gtid );
2510 #if USE_ITT_BUILD
2511  if ( rc ) {
2512  __kmp_itt_lock_acquired( lck );
2513  } else {
2514  __kmp_itt_lock_cancelled( lck );
2515  }
2516 #endif /* USE_ITT_BUILD */
2517  return rc;
2518 
2519  /* Can't use serial interval since not block structured */
2520 
2521 #endif // KMP_USE_DYNAMIC_LOCK
2522 }
2523 
2524 
2525 /*--------------------------------------------------------------------------------------------------------------------*/
2526 
2527 /*
2528  * Interface to fast scalable reduce methods routines
2529  */
2530 
2531 // keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
2532 // another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
2533 // AT: which solution is better?
2534 #define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2535  ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2536 
2537 #define __KMP_GET_REDUCTION_METHOD(gtid) \
2538  ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2539 
2540 // description of the packed_reduction_method variable: look at the macros in kmp.h
2541 
2542 
2543 // used in a critical section reduce block
2544 static __forceinline void
2545 __kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2546 
2547  // this lock was visible to a customer and to the threading profile tool as a serial overhead span
2548  // (although it's used for an internal purpose only)
2549  // why was it visible in previous implementation?
2550  // should we keep it visible in new reduce block?
2551  kmp_user_lock_p lck;
2552 
2553 #if KMP_USE_DYNAMIC_LOCK
2554 
2555  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
2556  // Check if it is initialized.
2557  if (*lk == 0) {
2558  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2559  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq));
2560  } else {
2561  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq));
2562  }
2563  }
2564  // Branch for accessing the actual lock object and set operation. This branching is inevitable since
2565  // this lock initialization does not follow the normal dispatch path (lock table is not used).
2566  if (KMP_EXTRACT_D_TAG(lk) != 0) {
2567  lck = (kmp_user_lock_p)lk;
2568  KMP_DEBUG_ASSERT(lck != NULL);
2569  if (__kmp_env_consistency_check) {
2570  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2571  }
2572  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
2573  } else {
2574  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
2575  lck = ilk->lock;
2576  KMP_DEBUG_ASSERT(lck != NULL);
2577  if (__kmp_env_consistency_check) {
2578  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2579  }
2580  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
2581  }
2582 
2583 #else // KMP_USE_DYNAMIC_LOCK
2584 
2585  // We know that the fast reduction code is only emitted by Intel compilers
2586  // with 32 byte critical sections. If there isn't enough space, then we
2587  // have to use a pointer.
2588  if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2589  lck = (kmp_user_lock_p)crit;
2590  }
2591  else {
2592  lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2593  }
2594  KMP_DEBUG_ASSERT( lck != NULL );
2595 
2596  if ( __kmp_env_consistency_check )
2597  __kmp_push_sync( global_tid, ct_critical, loc, lck );
2598 
2599  __kmp_acquire_user_lock_with_checks( lck, global_tid );
2600 
2601 #endif // KMP_USE_DYNAMIC_LOCK
2602 }
2603 
2604 // used in a critical section reduce block
2605 static __forceinline void
2606 __kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2607 
2608  kmp_user_lock_p lck;
2609 
2610 #if KMP_USE_DYNAMIC_LOCK
2611 
2612  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2613  lck = (kmp_user_lock_p)crit;
2614  if (__kmp_env_consistency_check)
2615  __kmp_pop_sync(global_tid, ct_critical, loc);
2616  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
2617  } else {
2618  kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2619  if (__kmp_env_consistency_check)
2620  __kmp_pop_sync(global_tid, ct_critical, loc);
2621  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
2622  }
2623 
2624 #else // KMP_USE_DYNAMIC_LOCK
2625 
2626  // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2627  // sections. If there isn't enough space, then we have to use a pointer.
2628  if ( __kmp_base_user_lock_size > 32 ) {
2629  lck = *( (kmp_user_lock_p *) crit );
2630  KMP_ASSERT( lck != NULL );
2631  } else {
2632  lck = (kmp_user_lock_p) crit;
2633  }
2634 
2635  if ( __kmp_env_consistency_check )
2636  __kmp_pop_sync( global_tid, ct_critical, loc );
2637 
2638  __kmp_release_user_lock_with_checks( lck, global_tid );
2639 
2640 #endif // KMP_USE_DYNAMIC_LOCK
2641 } // __kmp_end_critical_section_reduce_block
2642 
2643 
2644 /* 2.a.i. Reduce Block without a terminating barrier */
2658 kmp_int32
2660  ident_t *loc, kmp_int32 global_tid,
2661  kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2662  kmp_critical_name *lck ) {
2663 
2664  KMP_COUNT_BLOCK(REDUCE_nowait);
2665  int retval = 0;
2666  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2667 #if OMP_40_ENABLED
2668  kmp_team_t *team;
2669  kmp_info_t *th;
2670  int teams_swapped = 0, task_state;
2671 #endif
2672  KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2673 
2674  // why do we need this initialization here at all?
2675  // Reduction clause can not be used as a stand-alone directive.
2676 
2677  // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2678  // possible detection of false-positive race by the threadchecker ???
2679  if( ! TCR_4( __kmp_init_parallel ) )
2680  __kmp_parallel_initialize();
2681 
2682  // check correctness of reduce block nesting
2683 #if KMP_USE_DYNAMIC_LOCK
2684  if ( __kmp_env_consistency_check )
2685  __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2686 #else
2687  if ( __kmp_env_consistency_check )
2688  __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2689 #endif
2690 
2691 #if OMP_40_ENABLED
2692  th = __kmp_thread_from_gtid(global_tid);
2693  if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct?
2694  team = th->th.th_team;
2695  if( team->t.t_level == th->th.th_teams_level ) {
2696  // this is reduction at teams construct
2697  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2698  // Let's swap teams temporarily for the reduction barrier
2699  teams_swapped = 1;
2700  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2701  th->th.th_team = team->t.t_parent;
2702  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
2703  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
2704  task_state = th->th.th_task_state;
2705  th->th.th_task_state = 0;
2706  }
2707  }
2708 #endif // OMP_40_ENABLED
2709 
2710  // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2711  // the variable should be either a construct-specific or thread-specific property, not a team specific property
2712  // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2713  // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2714  // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2715  // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2716  // a thread-specific "th_local.reduction_method" variable is used currently
2717  // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2718 
2719  packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2720  __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2721 
2722  if( packed_reduction_method == critical_reduce_block ) {
2723 
2724  __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2725  retval = 1;
2726 
2727  } else if( packed_reduction_method == empty_reduce_block ) {
2728 
2729  // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2730  retval = 1;
2731 
2732  } else if( packed_reduction_method == atomic_reduce_block ) {
2733 
2734  retval = 2;
2735 
2736  // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2737  // (it's not quite good, because the checking block has been closed by this 'pop',
2738  // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2739  if ( __kmp_env_consistency_check )
2740  __kmp_pop_sync( global_tid, ct_reduce, loc );
2741 
2742  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2743 
2744  //AT: performance issue: a real barrier here
2745  //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2746  //AT: (it's not what a customer might expect specifying NOWAIT clause)
2747  //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2748  //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2749  // and be more in line with sense of NOWAIT
2750  //AT: TO DO: do epcc test and compare times
2751 
2752  // this barrier should be invisible to a customer and to the threading profile tool
2753  // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
2754 #if USE_ITT_NOTIFY
2755  __kmp_threads[global_tid]->th.th_ident = loc;
2756 #endif
2757  retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2758  retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2759 
2760  // all other workers except master should do this pop here
2761  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2762  if ( __kmp_env_consistency_check ) {
2763  if( retval == 0 ) {
2764  __kmp_pop_sync( global_tid, ct_reduce, loc );
2765  }
2766  }
2767 
2768  } else {
2769 
2770  // should never reach this block
2771  KMP_ASSERT( 0 ); // "unexpected method"
2772 
2773  }
2774 #if OMP_40_ENABLED
2775  if( teams_swapped ) {
2776  // Restore thread structure
2777  th->th.th_info.ds.ds_tid = 0;
2778  th->th.th_team = team;
2779  th->th.th_team_nproc = team->t.t_nproc;
2780  th->th.th_task_team = team->t.t_task_team[task_state];
2781  th->th.th_task_state = task_state;
2782  }
2783 #endif
2784  KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2785 
2786  return retval;
2787 }
2788 
2797 void
2798 __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2799 
2800  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2801 
2802  KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2803 
2804  packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2805 
2806  if( packed_reduction_method == critical_reduce_block ) {
2807 
2808  __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2809 
2810  } else if( packed_reduction_method == empty_reduce_block ) {
2811 
2812  // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2813 
2814  } else if( packed_reduction_method == atomic_reduce_block ) {
2815 
2816  // neither master nor other workers should get here
2817  // (code gen does not generate this call in case 2: atomic reduce block)
2818  // actually it's better to remove this elseif at all;
2819  // after removal this value will checked by the 'else' and will assert
2820 
2821  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2822 
2823  // only master gets here
2824 
2825  } else {
2826 
2827  // should never reach this block
2828  KMP_ASSERT( 0 ); // "unexpected method"
2829 
2830  }
2831 
2832  if ( __kmp_env_consistency_check )
2833  __kmp_pop_sync( global_tid, ct_reduce, loc );
2834 
2835  KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2836 
2837  return;
2838 }
2839 
2840 /* 2.a.ii. Reduce Block with a terminating barrier */
2841 
2855 kmp_int32
2857  ident_t *loc, kmp_int32 global_tid,
2858  kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2859  void (*reduce_func)(void *lhs_data, void *rhs_data),
2860  kmp_critical_name *lck )
2861 {
2862  KMP_COUNT_BLOCK(REDUCE_wait);
2863  int retval = 0;
2864  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2865 
2866  KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2867 
2868  // why do we need this initialization here at all?
2869  // Reduction clause can not be a stand-alone directive.
2870 
2871  // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2872  // possible detection of false-positive race by the threadchecker ???
2873  if( ! TCR_4( __kmp_init_parallel ) )
2874  __kmp_parallel_initialize();
2875 
2876  // check correctness of reduce block nesting
2877 #if KMP_USE_DYNAMIC_LOCK
2878  if ( __kmp_env_consistency_check )
2879  __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2880 #else
2881  if ( __kmp_env_consistency_check )
2882  __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2883 #endif
2884 
2885  packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2886  __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2887 
2888  if( packed_reduction_method == critical_reduce_block ) {
2889 
2890  __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2891  retval = 1;
2892 
2893  } else if( packed_reduction_method == empty_reduce_block ) {
2894 
2895  // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2896  retval = 1;
2897 
2898  } else if( packed_reduction_method == atomic_reduce_block ) {
2899 
2900  retval = 2;
2901 
2902  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2903 
2904  //case tree_reduce_block:
2905  // this barrier should be visible to a customer and to the threading profile tool
2906  // (it's a terminating barrier on constructs if NOWAIT not specified)
2907 #if USE_ITT_NOTIFY
2908  __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
2909 #endif
2910  retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2911  retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2912 
2913  // all other workers except master should do this pop here
2914  // ( none of other workers except master will enter __kmpc_end_reduce() )
2915  if ( __kmp_env_consistency_check ) {
2916  if( retval == 0 ) { // 0: all other workers; 1: master
2917  __kmp_pop_sync( global_tid, ct_reduce, loc );
2918  }
2919  }
2920 
2921  } else {
2922 
2923  // should never reach this block
2924  KMP_ASSERT( 0 ); // "unexpected method"
2925 
2926  }
2927 
2928  KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2929 
2930  return retval;
2931 }
2932 
2942 void
2943 __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2944 
2945  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2946 
2947  KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2948 
2949  packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2950 
2951  // this barrier should be visible to a customer and to the threading profile tool
2952  // (it's a terminating barrier on constructs if NOWAIT not specified)
2953 
2954  if( packed_reduction_method == critical_reduce_block ) {
2955 
2956  __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2957 
2958  // TODO: implicit barrier: should be exposed
2959 #if USE_ITT_NOTIFY
2960  __kmp_threads[global_tid]->th.th_ident = loc;
2961 #endif
2962  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2963 
2964  } else if( packed_reduction_method == empty_reduce_block ) {
2965 
2966  // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2967 
2968  // TODO: implicit barrier: should be exposed
2969 #if USE_ITT_NOTIFY
2970  __kmp_threads[global_tid]->th.th_ident = loc;
2971 #endif
2972  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2973 
2974  } else if( packed_reduction_method == atomic_reduce_block ) {
2975 
2976  // TODO: implicit barrier: should be exposed
2977 #if USE_ITT_NOTIFY
2978  __kmp_threads[global_tid]->th.th_ident = loc;
2979 #endif
2980  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2981 
2982  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2983 
2984  // only master executes here (master releases all other workers)
2985  __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2986 
2987  } else {
2988 
2989  // should never reach this block
2990  KMP_ASSERT( 0 ); // "unexpected method"
2991 
2992  }
2993 
2994  if ( __kmp_env_consistency_check )
2995  __kmp_pop_sync( global_tid, ct_reduce, loc );
2996 
2997  KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2998 
2999  return;
3000 }
3001 
3002 #undef __KMP_GET_REDUCTION_METHOD
3003 #undef __KMP_SET_REDUCTION_METHOD
3004 
3005 /*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
3006 
3007 kmp_uint64
3008 __kmpc_get_taskid() {
3009 
3010  kmp_int32 gtid;
3011  kmp_info_t * thread;
3012 
3013  gtid = __kmp_get_gtid();
3014  if ( gtid < 0 ) {
3015  return 0;
3016  }; // if
3017  thread = __kmp_thread_from_gtid( gtid );
3018  return thread->th.th_current_task->td_task_id;
3019 
3020 } // __kmpc_get_taskid
3021 
3022 
3023 kmp_uint64
3024 __kmpc_get_parent_taskid() {
3025 
3026  kmp_int32 gtid;
3027  kmp_info_t * thread;
3028  kmp_taskdata_t * parent_task;
3029 
3030  gtid = __kmp_get_gtid();
3031  if ( gtid < 0 ) {
3032  return 0;
3033  }; // if
3034  thread = __kmp_thread_from_gtid( gtid );
3035  parent_task = thread->th.th_current_task->td_parent;
3036  return ( parent_task == NULL ? 0 : parent_task->td_task_id );
3037 
3038 } // __kmpc_get_parent_taskid
3039 
3040 void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
3041 {
3042  if ( ! __kmp_init_serial ) {
3043  __kmp_serial_initialize();
3044  }
3045  __kmp_place_num_sockets = nS;
3046  __kmp_place_socket_offset = sO;
3047  __kmp_place_num_cores = nC;
3048  __kmp_place_core_offset = cO;
3049  __kmp_place_num_threads_per_core = nT;
3050 }
3051 
3052 // end of file //
3053 
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:733
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
Definition: kmp_csupport.c:98
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
Definition: kmp_stats.h:671
#define KMP_STOP_EXPLICIT_TIMER(name)
"Stops" an explicit timer.
Definition: kmp_stats.h:685
#define KMP_TIME_BLOCK(name)
Uses specified timer (name) to time code block.
Definition: kmp_stats.h:632
void __kmpc_flush(ident_t *loc)
Definition: kmp_csupport.c:631
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
Definition: kmp_csupport.c:64
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
Definition: kmp_csupport.c:889
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:489
#define KMP_IDENT_AUTOPAR
Definition: kmp.h:183
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
Definition: kmp_csupport.c:46
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
Definition: kmp_csupport.c:135
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
Definition: kmp_csupport.c:825
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:657
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:200
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:789
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
Definition: kmp_csupport.c:240
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.c:385
kmp_int32 __kmpc_in_parallel(ident_t *loc)
Definition: kmp_csupport.c:225
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
Definition: kmp_csupport.c:161
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
Definition: kmp_csupport.c:121
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
Definition: kmp_csupport.c:147
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:697
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
Definition: kmp_csupport.c:367
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1378
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:474
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.c:281
char const * psource
Definition: kmp.h:209
kmp_int32 flags
Definition: kmp.h:202