LLVM OpenMP* Runtime Library
kmp_tasking.c
1 /*
2  * kmp_tasking.c -- OpenMP 3.0 tasking support.
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_wait_release.h"
20 #include "kmp_stats.h"
21 
22 #if OMPT_SUPPORT
23 #include "ompt-specific.h"
24 #endif
25 
26 /* ------------------------------------------------------------------------ */
27 /* ------------------------------------------------------------------------ */
28 
29 
30 /* forward declaration */
31 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
34 
35 #ifdef OMP_45_ENABLED
36 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
37 #endif
38 
39 #ifdef BUILD_TIED_TASK_STACK
40 
41 //---------------------------------------------------------------------------
42 // __kmp_trace_task_stack: print the tied tasks from the task stack in order
43 // from top do bottom
44 //
45 // gtid: global thread identifier for thread containing stack
46 // thread_data: thread data for task team thread containing stack
47 // threshold: value above which the trace statement triggers
48 // location: string identifying call site of this function (for trace)
49 
50 static void
51 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
52 {
53  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
54  kmp_taskdata_t **stack_top = task_stack -> ts_top;
55  kmp_int32 entries = task_stack -> ts_entries;
56  kmp_taskdata_t *tied_task;
57 
58  KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
59  "first_block = %p, stack_top = %p \n",
60  location, gtid, entries, task_stack->ts_first_block, stack_top ) );
61 
62  KMP_DEBUG_ASSERT( stack_top != NULL );
63  KMP_DEBUG_ASSERT( entries > 0 );
64 
65  while ( entries != 0 )
66  {
67  KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
68  // fix up ts_top if we need to pop from previous block
69  if ( entries & TASK_STACK_INDEX_MASK == 0 )
70  {
71  kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
72 
73  stack_block = stack_block -> sb_prev;
74  stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
75  }
76 
77  // finish bookkeeping
78  stack_top--;
79  entries--;
80 
81  tied_task = * stack_top;
82 
83  KMP_DEBUG_ASSERT( tied_task != NULL );
84  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
85 
86  KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
87  "stack_top=%p, tied_task=%p\n",
88  location, gtid, entries, stack_top, tied_task ) );
89  }
90  KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
91 
92  KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
93  location, gtid ) );
94 }
95 
96 //---------------------------------------------------------------------------
97 // __kmp_init_task_stack: initialize the task stack for the first time
98 // after a thread_data structure is created.
99 // It should not be necessary to do this again (assuming the stack works).
100 //
101 // gtid: global thread identifier of calling thread
102 // thread_data: thread data for task team thread containing stack
103 
104 static void
105 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
106 {
107  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
108  kmp_stack_block_t *first_block;
109 
110  // set up the first block of the stack
111  first_block = & task_stack -> ts_first_block;
112  task_stack -> ts_top = (kmp_taskdata_t **) first_block;
113  memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
114 
115  // initialize the stack to be empty
116  task_stack -> ts_entries = TASK_STACK_EMPTY;
117  first_block -> sb_next = NULL;
118  first_block -> sb_prev = NULL;
119 }
120 
121 
122 //---------------------------------------------------------------------------
123 // __kmp_free_task_stack: free the task stack when thread_data is destroyed.
124 //
125 // gtid: global thread identifier for calling thread
126 // thread_data: thread info for thread containing stack
127 
128 static void
129 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
130 {
131  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
132  kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
133 
134  KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
135  // free from the second block of the stack
136  while ( stack_block != NULL ) {
137  kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
138 
139  stack_block -> sb_next = NULL;
140  stack_block -> sb_prev = NULL;
141  if (stack_block != & task_stack -> ts_first_block) {
142  __kmp_thread_free( thread, stack_block ); // free the block, if not the first
143  }
144  stack_block = next_block;
145  }
146  // initialize the stack to be empty
147  task_stack -> ts_entries = 0;
148  task_stack -> ts_top = NULL;
149 }
150 
151 
152 //---------------------------------------------------------------------------
153 // __kmp_push_task_stack: Push the tied task onto the task stack.
154 // Grow the stack if necessary by allocating another block.
155 //
156 // gtid: global thread identifier for calling thread
157 // thread: thread info for thread containing stack
158 // tied_task: the task to push on the stack
159 
160 static void
161 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
162 {
163  // GEH - need to consider what to do if tt_threads_data not allocated yet
164  kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
165  tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
166  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
167 
168  if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
169  return; // Don't push anything on stack if team or team tasks are serialized
170  }
171 
172  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
173  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
174 
175  KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
176  gtid, thread, tied_task ) );
177  // Store entry
178  * (task_stack -> ts_top) = tied_task;
179 
180  // Do bookkeeping for next push
181  task_stack -> ts_top++;
182  task_stack -> ts_entries++;
183 
184  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
185  {
186  // Find beginning of this task block
187  kmp_stack_block_t *stack_block =
188  (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
189 
190  // Check if we already have a block
191  if ( stack_block -> sb_next != NULL )
192  { // reset ts_top to beginning of next block
193  task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
194  }
195  else
196  { // Alloc new block and link it up
197  kmp_stack_block_t *new_block = (kmp_stack_block_t *)
198  __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
199 
200  task_stack -> ts_top = & new_block -> sb_block[0];
201  stack_block -> sb_next = new_block;
202  new_block -> sb_prev = stack_block;
203  new_block -> sb_next = NULL;
204 
205  KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
206  gtid, tied_task, new_block ) );
207  }
208  }
209  KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
210 }
211 
212 //---------------------------------------------------------------------------
213 // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
214 // the task, just check to make sure it matches the ending task passed in.
215 //
216 // gtid: global thread identifier for the calling thread
217 // thread: thread info structure containing stack
218 // tied_task: the task popped off the stack
219 // ending_task: the task that is ending (should match popped task)
220 
221 static void
222 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
223 {
224  // GEH - need to consider what to do if tt_threads_data not allocated yet
225  kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
226  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
227  kmp_taskdata_t *tied_task;
228 
229  if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
230  return; // Don't pop anything from stack if team or team tasks are serialized
231  }
232 
233  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
234  KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
235 
236  KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
237 
238  // fix up ts_top if we need to pop from previous block
239  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
240  {
241  kmp_stack_block_t *stack_block =
242  (kmp_stack_block_t *) (task_stack -> ts_top) ;
243 
244  stack_block = stack_block -> sb_prev;
245  task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
246  }
247 
248  // finish bookkeeping
249  task_stack -> ts_top--;
250  task_stack -> ts_entries--;
251 
252  tied_task = * (task_stack -> ts_top );
253 
254  KMP_DEBUG_ASSERT( tied_task != NULL );
255  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
256  KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
257 
258  KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
259  return;
260 }
261 #endif /* BUILD_TIED_TASK_STACK */
262 
263 //---------------------------------------------------
264 // __kmp_push_task: Add a task to the thread's deque
265 
266 static kmp_int32
267 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
268 {
269  kmp_info_t * thread = __kmp_threads[ gtid ];
270  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
271  kmp_task_team_t * task_team = thread->th.th_task_team;
272  kmp_int32 tid = __kmp_tid_from_gtid( gtid );
273  kmp_thread_data_t * thread_data;
274 
275  KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
276 
277  if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
278  // untied task needs to increment counter so that the task structure is not freed prematurely
279  kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
280  KA_TRACE(20, ( "__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
281  gtid, counter, taskdata ) );
282  }
283 
284  // The first check avoids building task_team thread data if serialized
285  if ( taskdata->td_flags.task_serial ) {
286  KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
287  gtid, taskdata ) );
288  return TASK_NOT_PUSHED;
289  }
290 
291  // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
292  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
293  if ( ! KMP_TASKING_ENABLED(task_team) ) {
294  __kmp_enable_tasking( task_team, thread );
295  }
296  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
297  KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
298 
299  // Find tasking deque specific to encountering thread
300  thread_data = & task_team -> tt.tt_threads_data[ tid ];
301 
302  // No lock needed since only owner can allocate
303  if (thread_data -> td.td_deque == NULL ) {
304  __kmp_alloc_task_deque( thread, thread_data );
305  }
306 
307  // Check if deque is full
308  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
309  {
310  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
311  gtid, taskdata ) );
312  return TASK_NOT_PUSHED;
313  }
314 
315  // Lock the deque for the task push operation
316  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
317 
318 #if OMP_45_ENABLED
319  // Need to recheck as we can get a proxy task from a thread outside of OpenMP
320  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
321  {
322  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
323  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
324  gtid, taskdata ) );
325  return TASK_NOT_PUSHED;
326  }
327 #else
328  // Must have room since no thread can add tasks but calling thread
329  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE(thread_data->td) );
330 #endif
331 
332  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
333  // Wrap index.
334  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
335  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
336 
337  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
338 
339  KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
340  "task=%p ntasks=%d head=%u tail=%u\n",
341  gtid, taskdata, thread_data->td.td_deque_ntasks,
342  thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
343 
344  return TASK_SUCCESSFULLY_PUSHED;
345 }
346 
347 
348 //-----------------------------------------------------------------------------------------
349 // __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
350 // this_thr: thread structure to set current_task in.
351 
352 void
353 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
354 {
355  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
356  "curtask_parent=%p\n",
357  0, this_thr, this_thr -> th.th_current_task,
358  this_thr -> th.th_current_task -> td_parent ) );
359 
360  this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
361 
362  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
363  "curtask_parent=%p\n",
364  0, this_thr, this_thr -> th.th_current_task,
365  this_thr -> th.th_current_task -> td_parent ) );
366 }
367 
368 
369 //---------------------------------------------------------------------------------------
370 // __kmp_push_current_task_to_thread: set up current task in called thread for a new team
371 // this_thr: thread structure to set up
372 // team: team for implicit task data
373 // tid: thread within team to set up
374 
375 void
376 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
377 {
378  // current task of the thread is a parent of the new just created implicit tasks of new team
379  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
380  "parent_task=%p\n",
381  tid, this_thr, this_thr->th.th_current_task,
382  team->t.t_implicit_task_taskdata[tid].td_parent ) );
383 
384  KMP_DEBUG_ASSERT (this_thr != NULL);
385 
386  if( tid == 0 ) {
387  if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
388  team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
389  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
390  }
391  } else {
392  team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
393  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
394  }
395 
396  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
397  "parent_task=%p\n",
398  tid, this_thr, this_thr->th.th_current_task,
399  team->t.t_implicit_task_taskdata[tid].td_parent ) );
400 }
401 
402 
403 //----------------------------------------------------------------------
404 // __kmp_task_start: bookkeeping for a task starting execution
405 // GTID: global thread id of calling thread
406 // task: task starting execution
407 // current_task: task suspending
408 
409 static void
410 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
411 {
412  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
413  kmp_info_t * thread = __kmp_threads[ gtid ];
414 
415  KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
416  gtid, taskdata, current_task) );
417 
418  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
419 
420  // mark currently executing task as suspended
421  // TODO: GEH - make sure root team implicit task is initialized properly.
422  // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
423  current_task -> td_flags.executing = 0;
424 
425  // Add task to stack if tied
426 #ifdef BUILD_TIED_TASK_STACK
427  if ( taskdata -> td_flags.tiedness == TASK_TIED )
428  {
429  __kmp_push_task_stack( gtid, thread, taskdata );
430  }
431 #endif /* BUILD_TIED_TASK_STACK */
432 
433  // mark starting task as executing and as current task
434  thread -> th.th_current_task = taskdata;
435 
436  KMP_DEBUG_ASSERT( taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
437  KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
438  taskdata -> td_flags.started = 1;
439  taskdata -> td_flags.executing = 1;
440  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
441  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
442 
443  // GEH TODO: shouldn't we pass some sort of location identifier here?
444  // APT: yes, we will pass location here.
445  // need to store current thread state (in a thread or taskdata structure)
446  // before setting work_state, otherwise wrong state is set after end of task
447 
448  KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
449  gtid, taskdata ) );
450 
451 #if OMPT_SUPPORT
452  if (ompt_enabled &&
453  ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
454  kmp_taskdata_t *parent = taskdata->td_parent;
455  ompt_callbacks.ompt_callback(ompt_event_task_begin)(
456  parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
457  parent ? &(parent->ompt_task_info.frame) : NULL,
458  taskdata->ompt_task_info.task_id,
459  taskdata->ompt_task_info.function);
460  }
461 #endif
462 #if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
463  /* OMPT emit all dependences if requested by the tool */
464  if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
465  ompt_callbacks.ompt_callback(ompt_event_task_dependences))
466  {
467  ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
468  taskdata->ompt_task_info.task_id,
469  taskdata->ompt_task_info.deps,
470  taskdata->ompt_task_info.ndeps
471  );
472  /* We can now free the allocated memory for the dependencies */
473  KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps);
474  taskdata->ompt_task_info.deps = NULL;
475  taskdata->ompt_task_info.ndeps = 0;
476  }
477 #endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
478 
479  return;
480 }
481 
482 
483 //----------------------------------------------------------------------
484 // __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
485 // loc_ref: source location information; points to beginning of task block.
486 // gtid: global thread number.
487 // task: task thunk for the started task.
488 
489 void
490 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
491 {
492  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
493  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
494 
495  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
496  gtid, loc_ref, taskdata, current_task ) );
497 
498  if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
499  // untied task needs to increment counter so that the task structure is not freed prematurely
500  kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
501  KA_TRACE(20, ( "__kmpc_omp_task_begin_if0: T#%d untied_count (%d) incremented for task %p\n",
502  gtid, counter, taskdata ) );
503  }
504 
505  taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
506  __kmp_task_start( gtid, task, current_task );
507 
508  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
509  gtid, loc_ref, taskdata ) );
510 
511  return;
512 }
513 
514 #ifdef TASK_UNUSED
515 //----------------------------------------------------------------------
516 // __kmpc_omp_task_begin: report that a given task has started execution
517 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
518 
519 void
520 __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
521 {
522  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
523 
524  KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
525  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
526 
527  __kmp_task_start( gtid, task, current_task );
528 
529  KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
530  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
531 
532  return;
533 }
534 #endif // TASK_UNUSED
535 
536 
537 //-------------------------------------------------------------------------------------
538 // __kmp_free_task: free the current task space and the space for shareds
539 // gtid: Global thread ID of calling thread
540 // taskdata: task to free
541 // thread: thread data structure of caller
542 
543 static void
544 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
545 {
546  KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
547  gtid, taskdata) );
548 
549  // Check to make sure all flags and counters have the correct values
550  KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
551  KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
552  KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
553  KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
554  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
555  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
556 
557  taskdata->td_flags.freed = 1;
558  // deallocate the taskdata and shared variable blocks associated with this task
559  #if USE_FAST_MEMORY
560  __kmp_fast_free( thread, taskdata );
561  #else /* ! USE_FAST_MEMORY */
562  __kmp_thread_free( thread, taskdata );
563  #endif
564 
565  KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
566  gtid, taskdata) );
567 }
568 
569 //-------------------------------------------------------------------------------------
570 // __kmp_free_task_and_ancestors: free the current task and ancestors without children
571 //
572 // gtid: Global thread ID of calling thread
573 // taskdata: task to free
574 // thread: thread data structure of caller
575 
576 static void
577 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
578 {
579  kmp_int32 children = 0;
580  kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
581 
582  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
583 
584  if ( !team_or_tasking_serialized ) {
585  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
586  KMP_DEBUG_ASSERT( children >= 0 );
587  }
588 
589  // Now, go up the ancestor tree to see if any ancestors can now be freed.
590  while ( children == 0 )
591  {
592  kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
593 
594  KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
595  "and freeing itself\n", gtid, taskdata) );
596 
597  // --- Deallocate my ancestor task ---
598  __kmp_free_task( gtid, taskdata, thread );
599 
600  taskdata = parent_taskdata;
601 
602  // Stop checking ancestors at implicit task or if tasking serialized
603  // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
604  if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
605  return;
606 
607  if ( !team_or_tasking_serialized ) {
608  // Predecrement simulated by "- 1" calculation
609  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
610  KMP_DEBUG_ASSERT( children >= 0 );
611  }
612  }
613 
614  KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
615  "not freeing it yet\n", gtid, taskdata, children) );
616 }
617 
618 //---------------------------------------------------------------------
619 // __kmp_task_finish: bookkeeping to do when a task finishes execution
620 // gtid: global thread ID for calling thread
621 // task: task to be finished
622 // resumed_task: task to be resumed. (may be NULL if task is serialized)
623 
624 static void
625 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
626 {
627  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
628  kmp_info_t * thread = __kmp_threads[ gtid ];
629  kmp_int32 children = 0;
630 
631 #if OMPT_SUPPORT
632  if (ompt_enabled &&
633  ompt_callbacks.ompt_callback(ompt_event_task_end)) {
634  kmp_taskdata_t *parent = taskdata->td_parent;
635  ompt_callbacks.ompt_callback(ompt_event_task_end)(
636  taskdata->ompt_task_info.task_id);
637  }
638 #endif
639 
640  KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
641  gtid, taskdata, resumed_task) );
642 
643  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
644 
645  // Pop task from stack if tied
646 #ifdef BUILD_TIED_TASK_STACK
647  if ( taskdata -> td_flags.tiedness == TASK_TIED )
648  {
649  __kmp_pop_task_stack( gtid, thread, taskdata );
650  }
651 #endif /* BUILD_TIED_TASK_STACK */
652 
653  if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
654  // untied task needs to check the counter so that the task structure is not freed prematurely
655  kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
656  KA_TRACE(20, ( "__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
657  gtid, counter, taskdata ) );
658  if ( counter > 0 ) {
659  // untied task is not done, to be continued possibly by other thread, do not free it now
660  if (resumed_task == NULL) {
661  KMP_DEBUG_ASSERT( taskdata->td_flags.task_serial );
662  resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
663  }
664  thread->th.th_current_task = resumed_task; // restore current_task
665  resumed_task->td_flags.executing = 1; // resume previous task
666  KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, resuming task %p\n",
667  gtid, taskdata, resumed_task) );
668  return;
669  }
670  }
671 
672  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
673  taskdata -> td_flags.complete = 1; // mark the task as completed
674  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
675  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
676 
677  // Only need to keep track of count if team parallel and tasking not serialized
678  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
679  // Predecrement simulated by "- 1" calculation
680  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
681  KMP_DEBUG_ASSERT( children >= 0 );
682 #if OMP_40_ENABLED
683  if ( taskdata->td_taskgroup )
684  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
685  __kmp_release_deps(gtid,taskdata);
686 #endif
687  }
688 
689  // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
690  // Othertwise, if a task is executed immediately from the release_deps code
691  // the flag will be reset to 1 again by this same function
692  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
693  taskdata -> td_flags.executing = 0; // suspend the finishing task
694 
695  KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
696  gtid, taskdata, children) );
697 
698 #if OMP_40_ENABLED
699  /* If the tasks' destructor thunk flag has been set, we need to invoke the
700  destructor thunk that has been generated by the compiler.
701  The code is placed here, since at this point other tasks might have been released
702  hence overlapping the destructor invokations with some other work in the
703  released tasks. The OpenMP spec is not specific on when the destructors are
704  invoked, so we should be free to choose.
705  */
706  if (taskdata->td_flags.destructors_thunk) {
707  kmp_routine_entry_t destr_thunk = task->data1.destructors;
708  KMP_ASSERT(destr_thunk);
709  destr_thunk(gtid, task);
710  }
711 #endif // OMP_40_ENABLED
712 
713  // bookkeeping for resuming task:
714  // GEH - note tasking_ser => task_serial
715  KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
716  taskdata->td_flags.task_serial);
717  if ( taskdata->td_flags.task_serial )
718  {
719  if (resumed_task == NULL) {
720  resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
721  }
722  else {
723  // verify resumed task passed in points to parent
724  KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
725  }
726  }
727  else {
728  KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
729  }
730 
731  // Free this task and then ancestor tasks if they have no children.
732  // Restore th_current_task first as suggested by John:
733  // johnmc: if an asynchronous inquiry peers into the runtime system
734  // it doesn't see the freed task as the current task.
735  thread->th.th_current_task = resumed_task;
736  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
737 
738  // TODO: GEH - make sure root team implicit task is initialized properly.
739  // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
740  resumed_task->td_flags.executing = 1; // resume previous task
741 
742  KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
743  gtid, taskdata, resumed_task) );
744 
745  return;
746 }
747 
748 //---------------------------------------------------------------------
749 // __kmpc_omp_task_complete_if0: report that a task has completed execution
750 // loc_ref: source location information; points to end of task block.
751 // gtid: global thread number.
752 // task: task thunk for the completed task.
753 
754 void
755 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
756 {
757  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
758  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
759 
760  __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
761 
762  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
763  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
764 
765  return;
766 }
767 
768 #ifdef TASK_UNUSED
769 //---------------------------------------------------------------------
770 // __kmpc_omp_task_complete: report that a task has completed execution
771 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
772 
773 void
774 __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
775 {
776  KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
777  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
778 
779  __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
780 
781  KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
782  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
783  return;
784 }
785 #endif // TASK_UNUSED
786 
787 
788 #if OMPT_SUPPORT
789 //----------------------------------------------------------------------------------------------------
790 // __kmp_task_init_ompt:
791 // Initialize OMPT fields maintained by a task. This will only be called after
792 // ompt_tool, so we already know whether ompt is enabled or not.
793 
794 static inline void
795 __kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
796 {
797  if (ompt_enabled) {
798  task->ompt_task_info.task_id = __ompt_task_id_new(tid);
799  task->ompt_task_info.function = function;
800  task->ompt_task_info.frame.exit_runtime_frame = NULL;
801  task->ompt_task_info.frame.reenter_runtime_frame = NULL;
802 #if OMP_40_ENABLED
803  task->ompt_task_info.ndeps = 0;
804  task->ompt_task_info.deps = NULL;
805 #endif /* OMP_40_ENABLED */
806  }
807 }
808 #endif
809 
810 
811 //----------------------------------------------------------------------------------------------------
812 // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
813 //
814 // loc_ref: reference to source location of parallel region
815 // this_thr: thread data structure corresponding to implicit task
816 // team: team for this_thr
817 // tid: thread id of given thread within team
818 // set_curr_task: TRUE if need to push current task to thread
819 // NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
820 // TODO: Get better loc_ref. Value passed in may be NULL
821 
822 void
823 __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
824 {
825  kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
826 
827  KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
828  tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
829 
830  task->td_task_id = KMP_GEN_TASK_ID();
831  task->td_team = team;
832 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
833  task->td_ident = loc_ref;
834  task->td_taskwait_ident = NULL;
835  task->td_taskwait_counter = 0;
836  task->td_taskwait_thread = 0;
837 
838  task->td_flags.tiedness = TASK_TIED;
839  task->td_flags.tasktype = TASK_IMPLICIT;
840 #if OMP_45_ENABLED
841  task->td_flags.proxy = TASK_FULL;
842 #endif
843 
844  // All implicit tasks are executed immediately, not deferred
845  task->td_flags.task_serial = 1;
846  task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
847  task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
848 
849  task->td_flags.started = 1;
850  task->td_flags.executing = 1;
851  task->td_flags.complete = 0;
852  task->td_flags.freed = 0;
853 
854 #if OMP_40_ENABLED
855  task->td_dephash = NULL;
856  task->td_depnode = NULL;
857 #endif
858 
859  if (set_curr_task) { // only do this initialization the first time a thread is created
860  task->td_incomplete_child_tasks = 0;
861  task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
862 #if OMP_40_ENABLED
863  task->td_taskgroup = NULL; // An implicit task does not have taskgroup
864 #endif
865  __kmp_push_current_task_to_thread( this_thr, team, tid );
866  } else {
867  KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
868  KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
869  }
870 
871 #if OMPT_SUPPORT
872  __kmp_task_init_ompt(task, tid, NULL);
873 #endif
874 
875  KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
876  tid, team, task ) );
877 }
878 
879 // Round up a size to a power of two specified by val
880 // Used to insert padding between structures co-allocated using a single malloc() call
881 static size_t
882 __kmp_round_up_to_val( size_t size, size_t val ) {
883  if ( size & ( val - 1 ) ) {
884  size &= ~ ( val - 1 );
885  if ( size <= KMP_SIZE_T_MAX - val ) {
886  size += val; // Round up if there is no overflow.
887  }; // if
888  }; // if
889  return size;
890 } // __kmp_round_up_to_va
891 
892 
893 //---------------------------------------------------------------------------------
894 // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
895 //
896 // loc_ref: source location information
897 // gtid: global thread number.
898 // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
899 // Converted from kmp_int32 to kmp_tasking_flags_t in routine.
900 // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
901 // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
902 // task_entry: Pointer to task code entry point generated by compiler.
903 // returns: a pointer to the allocated kmp_task_t structure (task).
904 
905 kmp_task_t *
906 __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
907  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
908  kmp_routine_entry_t task_entry )
909 {
910  kmp_task_t *task;
911  kmp_taskdata_t *taskdata;
912  kmp_info_t *thread = __kmp_threads[ gtid ];
913  kmp_team_t *team = thread->th.th_team;
914  kmp_taskdata_t *parent_task = thread->th.th_current_task;
915  size_t shareds_offset;
916 
917  KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
918  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
919  gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
920  sizeof_shareds, task_entry) );
921 
922  if ( parent_task->td_flags.final ) {
923  if (flags->merged_if0) {
924  }
925  flags->final = 1;
926  }
927 
928 #if OMP_45_ENABLED
929  if ( flags->proxy == TASK_PROXY ) {
930  flags->tiedness = TASK_UNTIED;
931  flags->merged_if0 = 1;
932 
933  /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
934  if ( (thread->th.th_task_team) == NULL ) {
935  /* This should only happen if the team is serialized
936  setup a task team and propagate it to the thread
937  */
938  KMP_DEBUG_ASSERT(team->t.t_serialized);
939  KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
940  __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
941  thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
942  }
943  kmp_task_team_t * task_team = thread->th.th_task_team;
944 
945  /* tasking must be enabled now as the task might not be pushed */
946  if ( !KMP_TASKING_ENABLED( task_team ) ) {
947  KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
948  __kmp_enable_tasking( task_team, thread );
949  kmp_int32 tid = thread->th.th_info.ds.ds_tid;
950  kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
951  // No lock needed since only owner can allocate
952  if (thread_data -> td.td_deque == NULL ) {
953  __kmp_alloc_task_deque( thread, thread_data );
954  }
955  }
956 
957  if ( task_team->tt.tt_found_proxy_tasks == FALSE )
958  TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
959  }
960 #endif
961 
962  // Calculate shared structure offset including padding after kmp_task_t struct
963  // to align pointers in shared struct
964  shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
965  shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
966 
967  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
968  KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
969  gtid, shareds_offset) );
970  KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
971  gtid, sizeof_shareds) );
972 
973  // Avoid double allocation here by combining shareds with taskdata
974  #if USE_FAST_MEMORY
975  taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
976  #else /* ! USE_FAST_MEMORY */
977  taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
978  #endif /* USE_FAST_MEMORY */
979 
980  task = KMP_TASKDATA_TO_TASK(taskdata);
981 
982  // Make sure task & taskdata are aligned appropriately
983 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
984  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
985  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
986 #else
987  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
988  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
989 #endif
990  if (sizeof_shareds > 0) {
991  // Avoid double allocation here by combining shareds with taskdata
992  task->shareds = & ((char *) taskdata)[ shareds_offset ];
993  // Make sure shareds struct is aligned to pointer size
994  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
995  } else {
996  task->shareds = NULL;
997  }
998  task->routine = task_entry;
999  task->part_id = 0; // AC: Always start with 0 part id
1000 
1001  taskdata->td_task_id = KMP_GEN_TASK_ID();
1002  taskdata->td_team = team;
1003  taskdata->td_alloc_thread = thread;
1004  taskdata->td_parent = parent_task;
1005  taskdata->td_level = parent_task->td_level + 1; // increment nesting level
1006  taskdata->td_untied_count = 0;
1007  taskdata->td_ident = loc_ref;
1008  taskdata->td_taskwait_ident = NULL;
1009  taskdata->td_taskwait_counter = 0;
1010  taskdata->td_taskwait_thread = 0;
1011  KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
1012 #if OMP_45_ENABLED
1013  // avoid copying icvs for proxy tasks
1014  if ( flags->proxy == TASK_FULL )
1015 #endif
1016  copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
1017 
1018  taskdata->td_flags.tiedness = flags->tiedness;
1019  taskdata->td_flags.final = flags->final;
1020  taskdata->td_flags.merged_if0 = flags->merged_if0;
1021 #if OMP_40_ENABLED
1022  taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1023 #endif // OMP_40_ENABLED
1024 #if OMP_45_ENABLED
1025  taskdata->td_flags.proxy = flags->proxy;
1026  taskdata->td_task_team = thread->th.th_task_team;
1027  taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1028 #endif
1029  taskdata->td_flags.tasktype = TASK_EXPLICIT;
1030 
1031  // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1032  taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
1033 
1034  // GEH - TODO: fix this to copy parent task's value of team_serial flag
1035  taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
1036 
1037  // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
1038  // tasks are not left until program termination to execute. Also, it helps locality to execute
1039  // immediately.
1040  taskdata->td_flags.task_serial = ( parent_task->td_flags.final
1041  || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
1042 
1043  taskdata->td_flags.started = 0;
1044  taskdata->td_flags.executing = 0;
1045  taskdata->td_flags.complete = 0;
1046  taskdata->td_flags.freed = 0;
1047 
1048  taskdata->td_flags.native = flags->native;
1049 
1050  taskdata->td_incomplete_child_tasks = 0;
1051  taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1052 #if OMP_40_ENABLED
1053  taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1054  taskdata->td_dephash = NULL;
1055  taskdata->td_depnode = NULL;
1056 #endif
1057 
1058  // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1059 #if OMP_45_ENABLED
1060  if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1061 #else
1062  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1063 #endif
1064  {
1065  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1066 #if OMP_40_ENABLED
1067  if ( parent_task->td_taskgroup )
1068  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1069 #endif
1070  // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1071  if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1072  KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1073  }
1074  }
1075 
1076  KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1077  gtid, taskdata, taskdata->td_parent) );
1078 
1079 #if OMPT_SUPPORT
1080  __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
1081 #endif
1082 
1083  return task;
1084 }
1085 
1086 
1087 kmp_task_t *
1088 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1089  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1090  kmp_routine_entry_t task_entry )
1091 {
1092  kmp_task_t *retval;
1093  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1094 
1095  input_flags->native = FALSE;
1096  // __kmp_task_alloc() sets up all other runtime flags
1097 
1098 #if OMP_45_ENABLED
1099  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
1100  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1101  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1102  input_flags->proxy ? "proxy" : "",
1103  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1104 #else
1105  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1106  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1107  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1108  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1109 #endif
1110 
1111  retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1112  sizeof_shareds, task_entry );
1113 
1114  KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1115 
1116  return retval;
1117 }
1118 
1119 //-----------------------------------------------------------
1120 // __kmp_invoke_task: invoke the specified task
1121 //
1122 // gtid: global thread ID of caller
1123 // task: the task to invoke
1124 // current_task: the task to resume after task invokation
1125 
1126 static void
1127 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1128 {
1129  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
1130  kmp_uint64 cur_time;
1131 #if OMP_40_ENABLED
1132  int discard = 0 /* false */;
1133 #endif
1134  KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1135  gtid, taskdata, current_task) );
1136  KMP_DEBUG_ASSERT(task);
1137 #if OMP_45_ENABLED
1138  if ( taskdata->td_flags.proxy == TASK_PROXY &&
1139  taskdata->td_flags.complete == 1)
1140  {
1141  // This is a proxy task that was already completed but it needs to run
1142  // its bottom-half finish
1143  KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1144  gtid, taskdata) );
1145 
1146  __kmp_bottom_half_finish_proxy(gtid,task);
1147 
1148  KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1149 
1150  return;
1151  }
1152 #endif
1153 
1154 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1155  if(__kmp_forkjoin_frames_mode == 3) {
1156  // Get the current time stamp to measure task execution time to correct barrier imbalance time
1157  cur_time = __itt_get_timestamp();
1158  }
1159 #endif
1160 
1161 #if OMP_45_ENABLED
1162  // Proxy tasks are not handled by the runtime
1163  if ( taskdata->td_flags.proxy != TASK_PROXY )
1164 #endif
1165  __kmp_task_start( gtid, task, current_task );
1166 
1167 #if OMPT_SUPPORT
1168  ompt_thread_info_t oldInfo;
1169  kmp_info_t * thread;
1170  if (ompt_enabled) {
1171  // Store the threads states and restore them after the task
1172  thread = __kmp_threads[ gtid ];
1173  oldInfo = thread->th.ompt_thread_info;
1174  thread->th.ompt_thread_info.wait_id = 0;
1175  thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1176  taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1177  }
1178 #endif
1179 
1180 #if OMP_40_ENABLED
1181  // TODO: cancel tasks if the parallel region has also been cancelled
1182  // TODO: check if this sequence can be hoisted above __kmp_task_start
1183  // if cancellation has been enabled for this run ...
1184  if (__kmp_omp_cancellation) {
1185  kmp_info_t *this_thr = __kmp_threads [ gtid ];
1186  kmp_team_t * this_team = this_thr->th.th_team;
1187  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1188  if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1189  KMP_COUNT_BLOCK(TASK_cancelled);
1190  // this task belongs to a task group and we need to cancel it
1191  discard = 1 /* true */;
1192  }
1193  }
1194 
1195  //
1196  // Invoke the task routine and pass in relevant data.
1197  // Thunks generated by gcc take a different argument list.
1198  //
1199  if (!discard) {
1200 #if KMP_STATS_ENABLED
1201  KMP_COUNT_BLOCK(TASK_executed);
1202  switch(KMP_GET_THREAD_STATE()) {
1203  case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); break;
1204  case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); break;
1205  case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); break;
1206  case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); break;
1207  case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); break;
1208  default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); break;
1209  }
1210 #endif // KMP_STATS_ENABLED
1211 #endif // OMP_40_ENABLED
1212 
1213 #if OMPT_SUPPORT && OMPT_TRACE
1214  /* let OMPT know that we're about to run this task */
1215  if (ompt_enabled &&
1216  ompt_callbacks.ompt_callback(ompt_event_task_switch))
1217  {
1218  ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1219  current_task->ompt_task_info.task_id,
1220  taskdata->ompt_task_info.task_id);
1221  }
1222 #endif
1223 
1224 #ifdef KMP_GOMP_COMPAT
1225  if (taskdata->td_flags.native) {
1226  ((void (*)(void *))(*(task->routine)))(task->shareds);
1227  }
1228  else
1229 #endif /* KMP_GOMP_COMPAT */
1230  {
1231  (*(task->routine))(gtid, task);
1232  }
1233  KMP_POP_PARTITIONED_TIMER();
1234 
1235 #if OMPT_SUPPORT && OMPT_TRACE
1236  /* let OMPT know that we're returning to the callee task */
1237  if (ompt_enabled &&
1238  ompt_callbacks.ompt_callback(ompt_event_task_switch))
1239  {
1240  ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1241  taskdata->ompt_task_info.task_id,
1242  current_task->ompt_task_info.task_id);
1243  }
1244 #endif
1245 
1246 #if OMP_40_ENABLED
1247  }
1248 #endif // OMP_40_ENABLED
1249 
1250 
1251 #if OMPT_SUPPORT
1252  if (ompt_enabled) {
1253  thread->th.ompt_thread_info = oldInfo;
1254  taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1255  }
1256 #endif
1257 
1258 #if OMP_45_ENABLED
1259  // Proxy tasks are not handled by the runtime
1260  if ( taskdata->td_flags.proxy != TASK_PROXY )
1261 #endif
1262  __kmp_task_finish( gtid, task, current_task );
1263 
1264 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1265  // Barrier imbalance - correct arrive time after the task finished
1266  if(__kmp_forkjoin_frames_mode == 3) {
1267  kmp_info_t *this_thr = __kmp_threads [ gtid ];
1268  if(this_thr->th.th_bar_arrive_time) {
1269  this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1270  }
1271  }
1272 #endif
1273  KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1274  gtid, taskdata, current_task) );
1275  return;
1276 }
1277 
1278 //-----------------------------------------------------------------------
1279 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1280 //
1281 // loc_ref: location of original task pragma (ignored)
1282 // gtid: Global Thread ID of encountering thread
1283 // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1284 // Returns:
1285 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1286 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1287 
1288 kmp_int32
1289 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1290 {
1291  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1292 
1293  KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1294  gtid, loc_ref, new_taskdata ) );
1295 
1296  /* Should we execute the new task or queue it? For now, let's just always try to
1297  queue it. If the queue fills up, then we'll execute it. */
1298 
1299  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1300  { // Execute this task immediately
1301  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1302  new_taskdata->td_flags.task_serial = 1;
1303  __kmp_invoke_task( gtid, new_task, current_task );
1304  }
1305 
1306  KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1307  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1308  new_taskdata ) );
1309 
1310  return TASK_CURRENT_NOT_QUEUED;
1311 }
1312 
1313 //---------------------------------------------------------------------
1314 // __kmp_omp_task: Schedule a non-thread-switchable task for execution
1315 // gtid: Global Thread ID of encountering thread
1316 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1317 // serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1318 // returns:
1319 //
1320 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1321 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1322 kmp_int32
1323 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1324 {
1325  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1326 
1327 #if OMPT_SUPPORT
1328  if (ompt_enabled) {
1329  new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1330  __builtin_frame_address(0);
1331  }
1332 #endif
1333 
1334  /* Should we execute the new task or queue it? For now, let's just always try to
1335  queue it. If the queue fills up, then we'll execute it. */
1336 #if OMP_45_ENABLED
1337  if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1338 #else
1339  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1340 #endif
1341  { // Execute this task immediately
1342  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1343  if ( serialize_immediate )
1344  new_taskdata -> td_flags.task_serial = 1;
1345  __kmp_invoke_task( gtid, new_task, current_task );
1346  }
1347 
1348 #if OMPT_SUPPORT
1349  if (ompt_enabled) {
1350  new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1351  }
1352 #endif
1353 
1354  return TASK_CURRENT_NOT_QUEUED;
1355 }
1356 
1357 //---------------------------------------------------------------------
1358 // __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1359 // the parent thread only!
1360 // loc_ref: location of original task pragma (ignored)
1361 // gtid: Global Thread ID of encountering thread
1362 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1363 // returns:
1364 //
1365 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1366 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1367 
1368 kmp_int32
1369 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1370 {
1371  kmp_int32 res;
1372  KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1373 
1374 #if KMP_DEBUG
1375  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1376 #endif
1377  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1378  gtid, loc_ref, new_taskdata ) );
1379 
1380  res = __kmp_omp_task(gtid,new_task,true);
1381 
1382  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1383  gtid, loc_ref, new_taskdata ) );
1384  return res;
1385 }
1386 
1387 //-------------------------------------------------------------------------------------
1388 // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1389 
1390 kmp_int32
1391 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1392 {
1393  kmp_taskdata_t * taskdata;
1394  kmp_info_t * thread;
1395  int thread_finished = FALSE;
1396  KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
1397 
1398  KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
1399 
1400  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1401  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1402 
1403  thread = __kmp_threads[ gtid ];
1404  taskdata = thread -> th.th_current_task;
1405 
1406 #if OMPT_SUPPORT && OMPT_TRACE
1407  ompt_task_id_t my_task_id;
1408  ompt_parallel_id_t my_parallel_id;
1409 
1410  if (ompt_enabled) {
1411  kmp_team_t *team = thread->th.th_team;
1412  my_task_id = taskdata->ompt_task_info.task_id;
1413  my_parallel_id = team->t.ompt_team_info.parallel_id;
1414 
1415  taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
1416  if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1417  ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
1418  my_parallel_id, my_task_id);
1419  }
1420  }
1421 #endif
1422 
1423  // Debugger: The taskwait is active. Store location and thread encountered the taskwait.
1424 #if USE_ITT_BUILD
1425  // Note: These values are used by ITT events as well.
1426 #endif /* USE_ITT_BUILD */
1427  taskdata->td_taskwait_counter += 1;
1428  taskdata->td_taskwait_ident = loc_ref;
1429  taskdata->td_taskwait_thread = gtid + 1;
1430 
1431 #if USE_ITT_BUILD
1432  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1433  if ( itt_sync_obj != NULL )
1434  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1435 #endif /* USE_ITT_BUILD */
1436 
1437 #if OMP_45_ENABLED
1438  if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1439 #else
1440  if ( ! taskdata->td_flags.team_serial )
1441 #endif
1442  {
1443  // GEH: if team serialized, avoid reading the volatile variable below.
1444  kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
1445  while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1446  flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1447  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1448  }
1449  }
1450 #if USE_ITT_BUILD
1451  if ( itt_sync_obj != NULL )
1452  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1453 #endif /* USE_ITT_BUILD */
1454 
1455  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1456  // Debugger: The taskwait is completed. Location remains, but thread is negated.
1457  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1458 
1459 #if OMPT_SUPPORT && OMPT_TRACE
1460  if (ompt_enabled) {
1461  if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1462  ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
1463  my_parallel_id, my_task_id);
1464  }
1465  taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1466  }
1467 #endif
1468  }
1469 
1470  KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1471  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1472 
1473  return TASK_CURRENT_NOT_QUEUED;
1474 }
1475 
1476 
1477 //-------------------------------------------------
1478 // __kmpc_omp_taskyield: switch to a different task
1479 
1480 kmp_int32
1481 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1482 {
1483  kmp_taskdata_t * taskdata;
1484  kmp_info_t * thread;
1485  int thread_finished = FALSE;
1486 
1487  KMP_COUNT_BLOCK(OMP_TASKYIELD);
1488  KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
1489 
1490  KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1491  gtid, loc_ref, end_part) );
1492 
1493  if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
1494  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1495 
1496  thread = __kmp_threads[ gtid ];
1497  taskdata = thread -> th.th_current_task;
1498  // Should we model this as a task wait or not?
1499  // Debugger: The taskwait is active. Store location and thread encountered the taskwait.
1500 #if USE_ITT_BUILD
1501  // Note: These values are used by ITT events as well.
1502 #endif /* USE_ITT_BUILD */
1503  taskdata->td_taskwait_counter += 1;
1504  taskdata->td_taskwait_ident = loc_ref;
1505  taskdata->td_taskwait_thread = gtid + 1;
1506 
1507 #if USE_ITT_BUILD
1508  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1509  if ( itt_sync_obj != NULL )
1510  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1511 #endif /* USE_ITT_BUILD */
1512  if ( ! taskdata->td_flags.team_serial ) {
1513  kmp_task_team_t * task_team = thread->th.th_task_team;
1514  if (task_team != NULL) {
1515  if (KMP_TASKING_ENABLED(task_team)) {
1516  __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1517  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1518  }
1519  }
1520  }
1521 #if USE_ITT_BUILD
1522  if ( itt_sync_obj != NULL )
1523  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1524 #endif /* USE_ITT_BUILD */
1525 
1526  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1527  // Debugger: The taskwait is completed. Location remains, but thread is negated.
1528  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1529  }
1530 
1531  KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1532  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1533 
1534  return TASK_CURRENT_NOT_QUEUED;
1535 }
1536 
1537 
1538 #if OMP_40_ENABLED
1539 //-------------------------------------------------------------------------------------
1540 // __kmpc_taskgroup: Start a new taskgroup
1541 
1542 void
1543 __kmpc_taskgroup( ident_t* loc, int gtid )
1544 {
1545  kmp_info_t * thread = __kmp_threads[ gtid ];
1546  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1547  kmp_taskgroup_t * tg_new =
1548  (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1549  KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1550  tg_new->count = 0;
1551  tg_new->cancel_request = cancel_noreq;
1552  tg_new->parent = taskdata->td_taskgroup;
1553  taskdata->td_taskgroup = tg_new;
1554 }
1555 
1556 
1557 //-------------------------------------------------------------------------------------
1558 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1559 // and its descendants are complete
1560 
1561 void
1562 __kmpc_end_taskgroup( ident_t* loc, int gtid )
1563 {
1564  kmp_info_t * thread = __kmp_threads[ gtid ];
1565  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1566  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1567  int thread_finished = FALSE;
1568 
1569  KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1570  KMP_DEBUG_ASSERT( taskgroup != NULL );
1571  KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
1572 
1573  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1574 #if USE_ITT_BUILD
1575  // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1576  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1577  if ( itt_sync_obj != NULL )
1578  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1579 #endif /* USE_ITT_BUILD */
1580 
1581 #if OMP_45_ENABLED
1582  if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1583 #else
1584  if ( ! taskdata->td_flags.team_serial )
1585 #endif
1586  {
1587  kmp_flag_32 flag(&(taskgroup->count), 0U);
1588  while ( TCR_4(taskgroup->count) != 0 ) {
1589  flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1590  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1591  }
1592  }
1593 
1594 #if USE_ITT_BUILD
1595  if ( itt_sync_obj != NULL )
1596  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1597 #endif /* USE_ITT_BUILD */
1598  }
1599  KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1600 
1601  // Restore parent taskgroup for the current task
1602  taskdata->td_taskgroup = taskgroup->parent;
1603  __kmp_thread_free( thread, taskgroup );
1604 
1605  KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1606 }
1607 #endif
1608 
1609 
1610 //------------------------------------------------------
1611 // __kmp_remove_my_task: remove a task from my own deque
1612 
1613 static kmp_task_t *
1614 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1615  kmp_int32 is_constrained )
1616 {
1617  kmp_task_t * task;
1618  kmp_taskdata_t * taskdata;
1619  kmp_thread_data_t *thread_data;
1620  kmp_uint32 tail;
1621 
1622  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1623  KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1624 
1625  thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1626 
1627  KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1628  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1629  thread_data->td.td_deque_tail) );
1630 
1631  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1632  KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1633  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1634  thread_data->td.td_deque_tail) );
1635  return NULL;
1636  }
1637 
1638  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1639 
1640  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1641  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1642  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1643  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1644  thread_data->td.td_deque_tail) );
1645  return NULL;
1646  }
1647 
1648  tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(thread_data->td); // Wrap index.
1649  taskdata = thread_data -> td.td_deque[ tail ];
1650 
1651  if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
1652  // we need to check if the candidate obeys task scheduling constraint:
1653  // only child of current task can be scheduled
1654  kmp_taskdata_t * current = thread->th.th_current_task;
1655  kmp_int32 level = current->td_level;
1656  kmp_taskdata_t * parent = taskdata->td_parent;
1657  while ( parent != current && parent->td_level > level ) {
1658  parent = parent->td_parent; // check generation up to the level of the current task
1659  KMP_DEBUG_ASSERT(parent != NULL);
1660  }
1661  if ( parent != current ) {
1662  // If the tail task is not a child, then no other child can appear in the deque.
1663  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1664  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1665  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1666  thread_data->td.td_deque_tail) );
1667  return NULL;
1668  }
1669  }
1670 
1671  thread_data -> td.td_deque_tail = tail;
1672  TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1673 
1674  __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1675 
1676  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1677  gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1678  thread_data->td.td_deque_tail) );
1679 
1680  task = KMP_TASKDATA_TO_TASK( taskdata );
1681  return task;
1682 }
1683 
1684 
1685 //-----------------------------------------------------------
1686 // __kmp_steal_task: remove a task from another thread's deque
1687 // Assume that calling thread has already checked existence of
1688 // task_team thread_data before calling this routine.
1689 
1690 static kmp_task_t *
1691 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1692  volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1693  kmp_int32 is_constrained )
1694 {
1695  kmp_task_t * task;
1696  kmp_taskdata_t * taskdata;
1697  kmp_thread_data_t *victim_td, *threads_data;
1698  kmp_int32 victim_tid;
1699 
1700  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1701 
1702  threads_data = task_team -> tt.tt_threads_data;
1703  KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1704 
1705  victim_tid = victim->th.th_info.ds.ds_tid;
1706  victim_td = & threads_data[ victim_tid ];
1707 
1708  KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1709  "head=%u tail=%u\n",
1710  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1711  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1712 
1713  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1714  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1715  {
1716  KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1717  "ntasks=%d head=%u tail=%u\n",
1718  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1719  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1720  return NULL;
1721  }
1722 
1723  __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1724 
1725  // Check again after we acquire the lock
1726  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1727  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1728  {
1729  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1730  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1731  "ntasks=%d head=%u tail=%u\n",
1732  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1733  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1734  return NULL;
1735  }
1736 
1737  KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1738 
1739  if ( !is_constrained ) {
1740  taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1741  KMP_ASSERT(taskdata);
1742  // Bump head pointer and Wrap.
1743  victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK(victim_td->td);
1744  } else {
1745  // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1746  kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(victim_td->td); // Wrap index.
1747  taskdata = victim_td -> td.td_deque[ tail ];
1748  KMP_ASSERT(taskdata);
1749  // we need to check if the candidate obeys task scheduling constraint:
1750  // only child of current task can be scheduled
1751  kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1752  kmp_int32 level = current->td_level;
1753  kmp_taskdata_t * parent = taskdata->td_parent;
1754  while ( parent != current && parent->td_level > level ) {
1755  parent = parent->td_parent; // check generation up to the level of the current task
1756  KMP_DEBUG_ASSERT(parent != NULL);
1757  }
1758  if ( parent != current && (taskdata->td_flags.tiedness == TASK_TIED) ) { // untied is always allowed to be stolen
1759  // If the tail task is not a child, then no other childs can appear in the deque (?).
1760  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1761  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1762  "ntasks=%d head=%u tail=%u\n",
1763  gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1764  task_team, victim_td->td.td_deque_ntasks,
1765  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1766  return NULL;
1767  }
1768  victim_td -> td.td_deque_tail = tail;
1769  }
1770  if (*thread_finished) {
1771  // We need to un-mark this victim as a finished victim. This must be done before
1772  // releasing the lock, or else other threads (starting with the master victim)
1773  // might be prematurely released from the barrier!!!
1774  kmp_uint32 count;
1775 
1776  count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1777 
1778  KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1779  gtid, count + 1, task_team) );
1780 
1781  *thread_finished = FALSE;
1782  }
1783  TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1784 
1785  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1786 
1787  KMP_COUNT_BLOCK(TASK_stolen);
1788  KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
1789  "ntasks=%d head=%u tail=%u\n",
1790  gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1791  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1792  victim_td->td.td_deque_tail) );
1793 
1794  task = KMP_TASKDATA_TO_TASK( taskdata );
1795  return task;
1796 }
1797 
1798 
1799 //-----------------------------------------------------------------------------
1800 // __kmp_execute_tasks_template: Choose and execute tasks until either the condition
1801 // is statisfied (return true) or there are none left (return false).
1802 // final_spin is TRUE if this is the spin at the release barrier.
1803 // thread_finished indicates whether the thread is finished executing all
1804 // the tasks it has on its deque, and is at the release barrier.
1805 // spinner is the location on which to spin.
1806 // spinner == NULL means only execute a single task and return.
1807 // checker is the value to check to terminate the spin.
1808 template <class C>
1809 static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1810  int *thread_finished
1811  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1812 {
1813  kmp_task_team_t * task_team = thread->th.th_task_team;
1814  kmp_thread_data_t * threads_data;
1815  kmp_task_t * task;
1816  kmp_info_t * other_thread;
1817  kmp_taskdata_t * current_task = thread -> th.th_current_task;
1818  volatile kmp_uint32 * unfinished_threads;
1819  kmp_int32 nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid;
1820 
1821  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1822  KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1823 
1824  if (task_team == NULL) return FALSE;
1825 
1826  KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
1827  gtid, final_spin, *thread_finished) );
1828 
1829  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1830  KMP_DEBUG_ASSERT( threads_data != NULL );
1831 
1832  nthreads = task_team -> tt.tt_nproc;
1833  unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1834 #if OMP_45_ENABLED
1835  KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1836 #else
1837  KMP_DEBUG_ASSERT( nthreads > 1 );
1838 #endif
1839  KMP_DEBUG_ASSERT( (int)(TCR_4(*unfinished_threads)) >= 0 );
1840 
1841  while (1) { // Outer loop keeps trying to find tasks in case of single thread getting tasks from target constructs
1842  while (1) { // Inner loop to find a task and execute it
1843  task = NULL;
1844  if (use_own_tasks) { // check on own queue first
1845  task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained );
1846  }
1847  if ((task == NULL) && (nthreads > 1)) { // Steal a task
1848  int asleep = 1;
1849  use_own_tasks = 0;
1850  // Try to steal from the last place I stole from successfully.
1851  if (victim == -2) { // haven't stolen anything yet
1852  victim = threads_data[tid].td.td_deque_last_stolen;
1853  if (victim != -1) // if we have a last stolen from victim, get the thread
1854  other_thread = threads_data[victim].td.td_thr;
1855  }
1856  if (victim != -1) { // found last victim
1857  asleep = 0;
1858  }
1859  else if (!new_victim) { // no recent steals and we haven't already used a new victim; select a random thread
1860  do { // Find a different thread to steal work from.
1861  // Pick a random thread. Initial plan was to cycle through all the threads, and only return if
1862  // we tried to steal from every thread, and failed. Arch says that's not such a great idea.
1863  victim = __kmp_get_random(thread) % (nthreads - 1);
1864  if (victim >= tid) {
1865  ++victim; // Adjusts random distribution to exclude self
1866  }
1867  // Found a potential victim
1868  other_thread = threads_data[victim].td.td_thr;
1869  // There is a slight chance that __kmp_enable_tasking() did not wake up all threads
1870  // waiting at the barrier. If victim is sleeping, then wake it up. Since we were going to
1871  // pay the cache miss penalty for referencing another thread's kmp_info_t struct anyway,
1872  // the check shouldn't cost too much performance at this point. In extra barrier mode, tasks
1873  // do not sleep at the separate tasking barrier, so this isn't a problem.
1874  asleep = 0;
1875  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1876  (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1877  (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
1878  asleep = 1;
1879  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1880  // A sleeping thread should not have any tasks on it's queue. There is a slight
1881  // possibility that it resumes, steals a task from another thread, which spawns more
1882  // tasks, all in the time that it takes this thread to check => don't write an assertion
1883  // that the victim's queue is empty. Try stealing from a different thread.
1884  }
1885  } while (asleep);
1886  }
1887 
1888  if (!asleep) {
1889  // We have a victim to try to steal from
1890  task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained);
1891  }
1892  if (task != NULL) { // set last stolen to victim
1893  if (threads_data[tid].td.td_deque_last_stolen != victim) {
1894  threads_data[tid].td.td_deque_last_stolen = victim;
1895  // The pre-refactored code did not try more than 1 successful new vicitm,
1896  // unless the last one generated more local tasks; new_victim keeps track of this
1897  new_victim = 1;
1898  }
1899  }
1900  else { // No tasks found; unset last_stolen
1901  KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
1902  victim = -2; // no successful victim found
1903  }
1904  }
1905 
1906  if (task == NULL) // break out of tasking loop
1907  break;
1908 
1909  // Found a task; execute it
1910 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1911  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1912  if ( itt_sync_obj == NULL ) { // we are at fork barrier where we could not get the object reliably
1913  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1914  }
1915  __kmp_itt_task_starting( itt_sync_obj );
1916  }
1917 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1918  __kmp_invoke_task( gtid, task, current_task );
1919 #if USE_ITT_BUILD
1920  if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj );
1921 #endif /* USE_ITT_BUILD */
1922  // If this thread is only partway through the barrier and the condition is met, then return now,
1923  // so that the barrier gather/release pattern can proceed. If this thread is in the last spin loop
1924  // in the barrier, waiting to be released, we know that the termination condition will not be
1925  // satisified, so don't waste any cycles checking it.
1926  if (flag == NULL || (!final_spin && flag->done_check())) {
1927  KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
1928  return TRUE;
1929  }
1930  if (thread->th.th_task_team == NULL) {
1931  break;
1932  }
1933  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1934  // If execution of a stolen task results in more tasks being placed on our run queue, reset use_own_tasks
1935  if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
1936  KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid));
1937  use_own_tasks = 1;
1938  new_victim = 0;
1939  }
1940  }
1941 
1942  // The task source has been exhausted. If in final spin loop of barrier, check if termination condition is satisfied.
1943 #if OMP_45_ENABLED
1944  // The work queue may be empty but there might be proxy tasks still executing
1945  if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
1946 #else
1947  if (final_spin)
1948 #endif
1949  {
1950  // First, decrement the #unfinished threads, if that has not already been done. This decrement
1951  // might be to the spin location, and result in the termination condition being satisfied.
1952  if (! *thread_finished) {
1953  kmp_uint32 count;
1954 
1955  count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1956  KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n",
1957  gtid, count, task_team) );
1958  *thread_finished = TRUE;
1959  }
1960 
1961  // It is now unsafe to reference thread->th.th_team !!!
1962  // Decrementing task_team->tt.tt_unfinished_threads can allow the master thread to pass through
1963  // the barrier, where it might reset each thread's th.th_team field for the next parallel region.
1964  // If we can steal more work, we know that this has not happened yet.
1965  if (flag != NULL && flag->done_check()) {
1966  KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
1967  return TRUE;
1968  }
1969  }
1970 
1971  // If this thread's task team is NULL, master has recognized that there are no more tasks; bail out
1972  if (thread->th.th_task_team == NULL) {
1973  KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) );
1974  return FALSE;
1975  }
1976 
1977 #if OMP_45_ENABLED
1978  // We could be getting tasks from target constructs; if this is the only thread, keep trying to execute
1979  // tasks from own queue
1980  if (nthreads == 1)
1981  use_own_tasks = 1;
1982  else
1983 #endif
1984  {
1985  KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid) );
1986  return FALSE;
1987  }
1988  }
1989 }
1990 
1991 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1992  int *thread_finished
1993  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1994 {
1995  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1996  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1997 }
1998 
1999 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
2000  int *thread_finished
2001  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2002 {
2003  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2004  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2005 }
2006 
2007 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
2008  int *thread_finished
2009  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2010 {
2011  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2012  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2013 }
2014 
2015 
2016 
2017 //-----------------------------------------------------------------------------
2018 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2019 // next barrier so they can assist in executing enqueued tasks.
2020 // First thread in allocates the task team atomically.
2021 
2022 static void
2023 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2024 {
2025  kmp_thread_data_t *threads_data;
2026  int nthreads, i, is_init_thread;
2027 
2028  KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2029  __kmp_gtid_from_thread( this_thr ) ) );
2030 
2031  KMP_DEBUG_ASSERT(task_team != NULL);
2032  KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
2033 
2034  nthreads = task_team->tt.tt_nproc;
2035  KMP_DEBUG_ASSERT(nthreads > 0);
2036  KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
2037 
2038  // Allocate or increase the size of threads_data if necessary
2039  is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2040 
2041  if (!is_init_thread) {
2042  // Some other thread already set up the array.
2043  KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2044  __kmp_gtid_from_thread( this_thr ) ) );
2045  return;
2046  }
2047  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2048  KMP_DEBUG_ASSERT( threads_data != NULL );
2049 
2050  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2051  ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2052  {
2053  // Release any threads sleeping at the barrier, so that they can steal
2054  // tasks and execute them. In extra barrier mode, tasks do not sleep
2055  // at the separate tasking barrier, so this isn't a problem.
2056  for (i = 0; i < nthreads; i++) {
2057  volatile void *sleep_loc;
2058  kmp_info_t *thread = threads_data[i].td.td_thr;
2059 
2060  if (i == this_thr->th.th_info.ds.ds_tid) {
2061  continue;
2062  }
2063  // Since we haven't locked the thread's suspend mutex lock at this
2064  // point, there is a small window where a thread might be putting
2065  // itself to sleep, but hasn't set the th_sleep_loc field yet.
2066  // To work around this, __kmp_execute_tasks_template() periodically checks
2067  // see if other threads are sleeping (using the same random
2068  // mechanism that is used for task stealing) and awakens them if
2069  // they are.
2070  if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
2071  {
2072  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2073  __kmp_gtid_from_thread( this_thr ),
2074  __kmp_gtid_from_thread( thread ) ) );
2075  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2076  }
2077  else {
2078  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2079  __kmp_gtid_from_thread( this_thr ),
2080  __kmp_gtid_from_thread( thread ) ) );
2081  }
2082  }
2083  }
2084 
2085  KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2086  __kmp_gtid_from_thread( this_thr ) ) );
2087 }
2088 
2089 
2090 /* ------------------------------------------------------------------------ */
2091 /* // TODO: Check the comment consistency
2092  * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2093  * like a shadow of the kmp_team_t data struct, with a different lifetime.
2094  * After a child * thread checks into a barrier and calls __kmp_release() from
2095  * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2096  * longer assume that the kmp_team_t structure is intact (at any moment, the
2097  * master thread may exit the barrier code and free the team data structure,
2098  * and return the threads to the thread pool).
2099  *
2100  * This does not work with the the tasking code, as the thread is still
2101  * expected to participate in the execution of any tasks that may have been
2102  * spawned my a member of the team, and the thread still needs access to all
2103  * to each thread in the team, so that it can steal work from it.
2104  *
2105  * Enter the existence of the kmp_task_team_t struct. It employs a reference
2106  * counting mechanims, and is allocated by the master thread before calling
2107  * __kmp_<barrier_kind>_release, and then is release by the last thread to
2108  * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2109  * of the kmp_task_team_t structs for consecutive barriers can overlap
2110  * (and will, unless the master thread is the last thread to exit the barrier
2111  * release phase, which is not typical).
2112  *
2113  * The existence of such a struct is useful outside the context of tasking,
2114  * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2115  * so that any performance differences show up when comparing the 2.5 vs. 3.0
2116  * libraries.
2117  *
2118  * We currently use the existence of the threads array as an indicator that
2119  * tasks were spawned since the last barrier. If the structure is to be
2120  * useful outside the context of tasking, then this will have to change, but
2121  * not settting the field minimizes the performance impact of tasking on
2122  * barriers, when no explicit tasks were spawned (pushed, actually).
2123  */
2124 
2125 
2126 static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2127 // Lock for task team data structures
2128 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2129 
2130 
2131 //------------------------------------------------------------------------------
2132 // __kmp_alloc_task_deque:
2133 // Allocates a task deque for a particular thread, and initialize the necessary
2134 // data structures relating to the deque. This only happens once per thread
2135 // per task team since task teams are recycled.
2136 // No lock is needed during allocation since each thread allocates its own
2137 // deque.
2138 
2139 static void
2140 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2141 {
2142  __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2143  KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2144 
2145  // Initialize last stolen task field to "none"
2146  thread_data -> td.td_deque_last_stolen = -1;
2147 
2148  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2149  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2150  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2151 
2152  KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2153  __kmp_gtid_from_thread( thread ), INITIAL_TASK_DEQUE_SIZE, thread_data ) );
2154  // Allocate space for task deque, and zero the deque
2155  // Cannot use __kmp_thread_calloc() because threads not around for
2156  // kmp_reap_task_team( ).
2157  thread_data -> td.td_deque = (kmp_taskdata_t **)
2158  __kmp_allocate( INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2159  thread_data -> td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
2160 }
2161 
2162 //------------------------------------------------------------------------------
2163 // __kmp_realloc_task_deque:
2164 // Re-allocates a task deque for a particular thread, copies the content from the old deque
2165 // and adjusts the necessary data structures relating to the deque.
2166 // This operation must be done with a the deque_lock being held
2167 
2168 static void __kmp_realloc_task_deque ( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2169 {
2170  kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
2171  kmp_int32 new_size = 2 * size;
2172 
2173  KE_TRACE( 10, ( "__kmp_realloc_task_deque: T#%d reallocating deque[from %d to %d] for thread_data %p\n",
2174  __kmp_gtid_from_thread( thread ), size, new_size, thread_data ) );
2175 
2176  kmp_taskdata_t ** new_deque = (kmp_taskdata_t **) __kmp_allocate( new_size * sizeof(kmp_taskdata_t *));
2177 
2178  int i,j;
2179  for ( i = thread_data->td.td_deque_head, j = 0; j < size; i = (i+1) & TASK_DEQUE_MASK(thread_data->td), j++ )
2180  new_deque[j] = thread_data->td.td_deque[i];
2181 
2182  __kmp_free(thread_data->td.td_deque);
2183 
2184  thread_data -> td.td_deque_head = 0;
2185  thread_data -> td.td_deque_tail = size;
2186  thread_data -> td.td_deque = new_deque;
2187  thread_data -> td.td_deque_size = new_size;
2188 }
2189 
2190 //------------------------------------------------------------------------------
2191 // __kmp_free_task_deque:
2192 // Deallocates a task deque for a particular thread.
2193 // Happens at library deallocation so don't need to reset all thread data fields.
2194 
2195 static void
2196 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
2197 {
2198  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2199 
2200  if ( thread_data -> td.td_deque != NULL ) {
2201  TCW_4(thread_data -> td.td_deque_ntasks, 0);
2202  __kmp_free( thread_data -> td.td_deque );
2203  thread_data -> td.td_deque = NULL;
2204  }
2205  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2206 
2207 #ifdef BUILD_TIED_TASK_STACK
2208  // GEH: Figure out what to do here for td_susp_tied_tasks
2209  if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2210  __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2211  }
2212 #endif // BUILD_TIED_TASK_STACK
2213 }
2214 
2215 
2216 //------------------------------------------------------------------------------
2217 // __kmp_realloc_task_threads_data:
2218 // Allocates a threads_data array for a task team, either by allocating an initial
2219 // array or enlarging an existing array. Only the first thread to get the lock
2220 // allocs or enlarges the array and re-initializes the array eleemnts.
2221 // That thread returns "TRUE", the rest return "FALSE".
2222 // Assumes that the new array size is given by task_team -> tt.tt_nproc.
2223 // The current size is given by task_team -> tt.tt_max_threads.
2224 
2225 static int
2226 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2227 {
2228  kmp_thread_data_t ** threads_data_p;
2229  kmp_int32 nthreads, maxthreads;
2230  int is_init_thread = FALSE;
2231 
2232  if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2233  // Already reallocated and initialized.
2234  return FALSE;
2235  }
2236 
2237  threads_data_p = & task_team -> tt.tt_threads_data;
2238  nthreads = task_team -> tt.tt_nproc;
2239  maxthreads = task_team -> tt.tt_max_threads;
2240 
2241  // All threads must lock when they encounter the first task of the implicit task
2242  // region to make sure threads_data fields are (re)initialized before used.
2243  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2244 
2245  if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2246  // first thread to enable tasking
2247  kmp_team_t *team = thread -> th.th_team;
2248  int i;
2249 
2250  is_init_thread = TRUE;
2251  if ( maxthreads < nthreads ) {
2252 
2253  if ( *threads_data_p != NULL ) {
2254  kmp_thread_data_t *old_data = *threads_data_p;
2255  kmp_thread_data_t *new_data = NULL;
2256 
2257  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2258  "threads data for task_team %p, new_size = %d, old_size = %d\n",
2259  __kmp_gtid_from_thread( thread ), task_team,
2260  nthreads, maxthreads ) );
2261  // Reallocate threads_data to have more elements than current array
2262  // Cannot use __kmp_thread_realloc() because threads not around for
2263  // kmp_reap_task_team( ). Note all new array entries are initialized
2264  // to zero by __kmp_allocate().
2265  new_data = (kmp_thread_data_t *)
2266  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2267  // copy old data to new data
2268  KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
2269  (void *) old_data,
2270  maxthreads * sizeof(kmp_taskdata_t *) );
2271 
2272 #ifdef BUILD_TIED_TASK_STACK
2273  // GEH: Figure out if this is the right thing to do
2274  for (i = maxthreads; i < nthreads; i++) {
2275  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2276  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2277  }
2278 #endif // BUILD_TIED_TASK_STACK
2279  // Install the new data and free the old data
2280  (*threads_data_p) = new_data;
2281  __kmp_free( old_data );
2282  }
2283  else {
2284  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2285  "threads data for task_team %p, size = %d\n",
2286  __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2287  // Make the initial allocate for threads_data array, and zero entries
2288  // Cannot use __kmp_thread_calloc() because threads not around for
2289  // kmp_reap_task_team( ).
2290  *threads_data_p = (kmp_thread_data_t *)
2291  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2292 #ifdef BUILD_TIED_TASK_STACK
2293  // GEH: Figure out if this is the right thing to do
2294  for (i = 0; i < nthreads; i++) {
2295  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2296  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2297  }
2298 #endif // BUILD_TIED_TASK_STACK
2299  }
2300  task_team -> tt.tt_max_threads = nthreads;
2301  }
2302  else {
2303  // If array has (more than) enough elements, go ahead and use it
2304  KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2305  }
2306 
2307  // initialize threads_data pointers back to thread_info structures
2308  for (i = 0; i < nthreads; i++) {
2309  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2310  thread_data -> td.td_thr = team -> t.t_threads[i];
2311 
2312  if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2313  // The last stolen field survives across teams / barrier, and the number
2314  // of threads may have changed. It's possible (likely?) that a new
2315  // parallel region will exhibit the same behavior as the previous region.
2316  thread_data -> td.td_deque_last_stolen = -1;
2317  }
2318  }
2319 
2320  KMP_MB();
2321  TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2322  }
2323 
2324  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2325  return is_init_thread;
2326 }
2327 
2328 
2329 //------------------------------------------------------------------------------
2330 // __kmp_free_task_threads_data:
2331 // Deallocates a threads_data array for a task team, including any attached
2332 // tasking deques. Only occurs at library shutdown.
2333 
2334 static void
2335 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2336 {
2337  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2338  if ( task_team -> tt.tt_threads_data != NULL ) {
2339  int i;
2340  for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2341  __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2342  }
2343  __kmp_free( task_team -> tt.tt_threads_data );
2344  task_team -> tt.tt_threads_data = NULL;
2345  }
2346  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2347 }
2348 
2349 
2350 //------------------------------------------------------------------------------
2351 // __kmp_allocate_task_team:
2352 // Allocates a task team associated with a specific team, taking it from
2353 // the global task team free list if possible. Also initializes data structures.
2354 
2355 static kmp_task_team_t *
2356 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2357 {
2358  kmp_task_team_t *task_team = NULL;
2359  int nthreads;
2360 
2361  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2362  (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2363 
2364  if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2365  // Take a task team from the task team pool
2366  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2367  if (__kmp_free_task_teams != NULL) {
2368  task_team = __kmp_free_task_teams;
2369  TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2370  task_team -> tt.tt_next = NULL;
2371  }
2372  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2373  }
2374 
2375  if (task_team == NULL) {
2376  KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2377  "task team for team %p\n",
2378  __kmp_gtid_from_thread( thread ), team ) );
2379  // Allocate a new task team if one is not available.
2380  // Cannot use __kmp_thread_malloc() because threads not around for
2381  // kmp_reap_task_team( ).
2382  task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2383  __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2384  //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2385  //task_team -> tt.tt_max_threads = 0;
2386  //task_team -> tt.tt_next = NULL;
2387  }
2388 
2389  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2390 #if OMP_45_ENABLED
2391  TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2392 #endif
2393  task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2394 
2395  TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2396  TCW_4( task_team -> tt.tt_active, TRUE );
2397 
2398  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
2399  (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
2400  return task_team;
2401 }
2402 
2403 
2404 //------------------------------------------------------------------------------
2405 // __kmp_free_task_team:
2406 // Frees the task team associated with a specific thread, and adds it
2407 // to the global task team free list.
2408 
2409 void
2410 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2411 {
2412  KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2413  thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2414 
2415  // Put task team back on free list
2416  __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2417 
2418  KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2419  task_team -> tt.tt_next = __kmp_free_task_teams;
2420  TCW_PTR(__kmp_free_task_teams, task_team);
2421 
2422  __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2423 }
2424 
2425 
2426 //------------------------------------------------------------------------------
2427 // __kmp_reap_task_teams:
2428 // Free all the task teams on the task team free list.
2429 // Should only be done during library shutdown.
2430 // Cannot do anything that needs a thread structure or gtid since they are already gone.
2431 
2432 void
2433 __kmp_reap_task_teams( void )
2434 {
2435  kmp_task_team_t *task_team;
2436 
2437  if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2438  // Free all task_teams on the free list
2439  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2440  while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2441  __kmp_free_task_teams = task_team -> tt.tt_next;
2442  task_team -> tt.tt_next = NULL;
2443 
2444  // Free threads_data if necessary
2445  if ( task_team -> tt.tt_threads_data != NULL ) {
2446  __kmp_free_task_threads_data( task_team );
2447  }
2448  __kmp_free( task_team );
2449  }
2450  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2451  }
2452 }
2453 
2454 //------------------------------------------------------------------------------
2455 // __kmp_wait_to_unref_task_teams:
2456 // Some threads could still be in the fork barrier release code, possibly
2457 // trying to steal tasks. Wait for each thread to unreference its task team.
2458 //
2459 void
2460 __kmp_wait_to_unref_task_teams(void)
2461 {
2462  kmp_info_t *thread;
2463  kmp_uint32 spins;
2464  int done;
2465 
2466  KMP_INIT_YIELD( spins );
2467 
2468  for (;;) {
2469  done = TRUE;
2470 
2471  // TODO: GEH - this may be is wrong because some sync would be necessary
2472  // in case threads are added to the pool during the traversal.
2473  // Need to verify that lock for thread pool is held when calling
2474  // this routine.
2475  for (thread = (kmp_info_t *)__kmp_thread_pool;
2476  thread != NULL;
2477  thread = thread->th.th_next_pool)
2478  {
2479 #if KMP_OS_WINDOWS
2480  DWORD exit_val;
2481 #endif
2482  if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2483  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2484  __kmp_gtid_from_thread( thread ) ) );
2485  continue;
2486  }
2487 #if KMP_OS_WINDOWS
2488  // TODO: GEH - add this check for Linux* OS / OS X* as well?
2489  if (!__kmp_is_thread_alive(thread, &exit_val)) {
2490  thread->th.th_task_team = NULL;
2491  continue;
2492  }
2493 #endif
2494 
2495  done = FALSE; // Because th_task_team pointer is not NULL for this thread
2496 
2497  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2498  __kmp_gtid_from_thread( thread ) ) );
2499 
2500  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2501  volatile void *sleep_loc;
2502  // If the thread is sleeping, awaken it.
2503  if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2504  KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2505  __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2506  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2507  }
2508  }
2509  }
2510  if (done) {
2511  break;
2512  }
2513 
2514  // If we are oversubscribed,
2515  // or have waited a bit (and library mode is throughput), yield.
2516  // Pause is in the following code.
2517  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2518  KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2519  }
2520 }
2521 
2522 
2523 //------------------------------------------------------------------------------
2524 // __kmp_task_team_setup: Create a task_team for the current team, but use
2525 // an already created, unused one if it already exists.
2526 void
2527 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
2528 {
2529  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2530 
2531  // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
2532  // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
2533  if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
2534  team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2535  KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
2536  __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
2537  ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2538  }
2539 
2540  // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
2541  // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
2542  // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
2543  // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
2544  // serialized teams.
2545  if (team->t.t_nproc > 1) {
2546  int other_team = 1 - this_thr->th.th_task_state;
2547  if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
2548  team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2549  KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2550  __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2551  ((team != NULL) ? team->t.t_id : -1), other_team ));
2552  }
2553  else { // Leave the old task team struct in place for the upcoming region; adjust as needed
2554  kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2555  if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
2556  TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2557  TCW_4(task_team->tt.tt_found_tasks, FALSE);
2558 #if OMP_45_ENABLED
2559  TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2560 #endif
2561  TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
2562  TCW_4(task_team->tt.tt_active, TRUE );
2563  }
2564  // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
2565  KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
2566  __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2567  ((team != NULL) ? team->t.t_id : -1), other_team ));
2568  }
2569  }
2570 }
2571 
2572 
2573 //------------------------------------------------------------------------------
2574 // __kmp_task_team_sync: Propagation of task team data from team to threads
2575 // which happens just after the release phase of a team barrier. This may be
2576 // called by any thread, but only for teams with # threads > 1.
2577 
2578 void
2579 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2580 {
2581  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2582 
2583  // Toggle the th_task_state field, to switch which task_team this thread refers to
2584  this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2585  // It is now safe to propagate the task team pointer from the team struct to the current thread.
2586  TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
2587  KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
2588  __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2589  ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2590 }
2591 
2592 
2593 //--------------------------------------------------------------------------------------------
2594 // __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
2595 // phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created.
2596 // wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
2597 // optionally as the last argument. When wait is zero, master thread does not wait for
2598 // unfinished_threads to reach 0.
2599 void
2600 __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
2601  USE_ITT_BUILD_ARG(void * itt_sync_obj)
2602  , int wait)
2603 {
2604  kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
2605 
2606  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2607  KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2608 
2609  if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
2610  if (wait) {
2611  KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2612  __kmp_gtid_from_thread(this_thr), task_team));
2613  // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
2614  // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
2615  kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2616  flag.wait(this_thr, TRUE
2617  USE_ITT_BUILD_ARG(itt_sync_obj));
2618  }
2619  // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
2620  KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
2621  "setting active to false, setting local and team's pointer to NULL\n",
2622  __kmp_gtid_from_thread(this_thr), task_team));
2623 #if OMP_45_ENABLED
2624  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2625  TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2626 #else
2627  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2628 #endif
2629  TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2630  KMP_MB();
2631 
2632  TCW_PTR(this_thr->th.th_task_team, NULL);
2633  }
2634 }
2635 
2636 
2637 //------------------------------------------------------------------------------
2638 // __kmp_tasking_barrier:
2639 // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2640 // Internal function to execute all tasks prior to a regular barrier or a
2641 // join barrier. It is a full barrier itself, which unfortunately turns
2642 // regular barriers into double barriers and join barriers into 1 1/2
2643 // barriers.
2644 void
2645 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2646 {
2647  volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
2648  int flag = FALSE;
2649  KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2650 
2651 #if USE_ITT_BUILD
2652  KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2653 #endif /* USE_ITT_BUILD */
2654  kmp_flag_32 spin_flag(spin, 0U);
2655  while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2656  USE_ITT_BUILD_ARG(NULL), 0 ) ) {
2657 #if USE_ITT_BUILD
2658  // TODO: What about itt_sync_obj??
2659  KMP_FSYNC_SPIN_PREPARE( spin );
2660 #endif /* USE_ITT_BUILD */
2661 
2662  if( TCR_4(__kmp_global.g.g_done) ) {
2663  if( __kmp_global.g.g_abort )
2664  __kmp_abort_thread( );
2665  break;
2666  }
2667  KMP_YIELD( TRUE ); // GH: We always yield here
2668  }
2669 #if USE_ITT_BUILD
2670  KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2671 #endif /* USE_ITT_BUILD */
2672 }
2673 
2674 
2675 #if OMP_45_ENABLED
2676 
2677 /* __kmp_give_task puts a task into a given thread queue if:
2678  - the queue for that thread was created
2679  - there's space in that queue
2680 
2681  Because of this, __kmp_push_task needs to check if there's space after getting the lock
2682  */
2683 static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task, kmp_int32 pass )
2684 {
2685  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2686  kmp_task_team_t * task_team = taskdata->td_task_team;
2687 
2688  KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2689 
2690  // If task_team is NULL something went really bad...
2691  KMP_DEBUG_ASSERT( task_team != NULL );
2692 
2693  bool result = false;
2694  kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2695 
2696  if (thread_data -> td.td_deque == NULL ) {
2697  // There's no queue in this thread, go find another one
2698  // We're guaranteed that at least one thread has a queue
2699  KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2700  return result;
2701  }
2702 
2703  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
2704  {
2705  KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2706 
2707  // if this deque is bigger than the pass ratio give a chance to another thread
2708  if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass ) return result;
2709 
2710  __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2711  __kmp_realloc_task_deque(thread,thread_data);
2712 
2713  } else {
2714 
2715  __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2716 
2717  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
2718  {
2719  KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2720 
2721  // if this deque is bigger than the pass ratio give a chance to another thread
2722  if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass )
2723  goto release_and_exit;
2724 
2725  __kmp_realloc_task_deque(thread,thread_data);
2726  }
2727  }
2728 
2729  // lock is held here, and there is space in the deque
2730 
2731  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2732  // Wrap index.
2733  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
2734  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2735 
2736  result = true;
2737  KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
2738 
2739 release_and_exit:
2740  __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2741 
2742  return result;
2743 }
2744 
2745 
2746 /* The finish of the a proxy tasks is divided in two pieces:
2747  - the top half is the one that can be done from a thread outside the team
2748  - the bottom half must be run from a them within the team
2749 
2750  In order to run the bottom half the task gets queued back into one of the threads of the team.
2751  Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2752  So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2753  - things that can be run before queuing the bottom half
2754  - things that must be run after queuing the bottom half
2755 
2756  This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2757  we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2758 */
2759 
2760 static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2761 {
2762  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2763  KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2764  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2765  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2766 
2767  taskdata -> td_flags.complete = 1; // mark the task as completed
2768 
2769  if ( taskdata->td_taskgroup )
2770  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2771 
2772  // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
2773  TCI_4(taskdata->td_incomplete_child_tasks);
2774 }
2775 
2776 static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2777 {
2778  kmp_int32 children = 0;
2779 
2780  // Predecrement simulated by "- 1" calculation
2781  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2782  KMP_DEBUG_ASSERT( children >= 0 );
2783 
2784  // Remove the imaginary children
2785  TCD_4(taskdata->td_incomplete_child_tasks);
2786 }
2787 
2788 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2789 {
2790  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2791  kmp_info_t * thread = __kmp_threads[ gtid ];
2792 
2793  KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2794  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2795 
2796  // We need to wait to make sure the top half is finished
2797  // Spinning here should be ok as this should happen quickly
2798  while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2799 
2800  __kmp_release_deps(gtid,taskdata);
2801  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2802 }
2803 
2811 void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2812 {
2813  KMP_DEBUG_ASSERT( ptask != NULL );
2814  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2815  KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2816 
2817  KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2818 
2819  __kmp_first_top_half_finish_proxy(taskdata);
2820  __kmp_second_top_half_finish_proxy(taskdata);
2821  __kmp_bottom_half_finish_proxy(gtid,ptask);
2822 
2823  KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2824 }
2825 
2832 void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2833 {
2834  KMP_DEBUG_ASSERT( ptask != NULL );
2835  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2836 
2837  KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2838 
2839  KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2840 
2841  __kmp_first_top_half_finish_proxy(taskdata);
2842 
2843  // Enqueue task to complete bottom half completion from a thread within the corresponding team
2844  kmp_team_t * team = taskdata->td_team;
2845  kmp_int32 nthreads = team->t.t_nproc;
2846  kmp_info_t *thread;
2847 
2848  //This should be similar to start_k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
2849  kmp_int32 start_k = 0;
2850  kmp_int32 pass = 1;
2851  kmp_int32 k = start_k;
2852 
2853  do {
2854  //For now we're just linearly trying to find a thread
2855  thread = team->t.t_threads[k];
2856  k = (k+1) % nthreads;
2857 
2858  // we did a full pass through all the threads
2859  if ( k == start_k ) pass = pass << 1;
2860 
2861  } while ( !__kmp_give_task( thread, k, ptask, pass ) );
2862 
2863  __kmp_second_top_half_finish_proxy(taskdata);
2864 
2865  KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2866 }
2867 
2868 //---------------------------------------------------------------------------------
2869 // __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop
2870 //
2871 // thread: allocating thread
2872 // task_src: pointer to source task to be duplicated
2873 // returns: a pointer to the allocated kmp_task_t structure (task).
2874 kmp_task_t *
2875 __kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src )
2876 {
2877  kmp_task_t *task;
2878  kmp_taskdata_t *taskdata;
2879  kmp_taskdata_t *taskdata_src;
2880  kmp_taskdata_t *parent_task = thread->th.th_current_task;
2881  size_t shareds_offset;
2882  size_t task_size;
2883 
2884  KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) );
2885  taskdata_src = KMP_TASK_TO_TASKDATA( task_src );
2886  KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task
2887  KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT );
2888  task_size = taskdata_src->td_size_alloc;
2889 
2890  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
2891  KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) );
2892  #if USE_FAST_MEMORY
2893  taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size );
2894  #else
2895  taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size );
2896  #endif /* USE_FAST_MEMORY */
2897  KMP_MEMCPY(taskdata, taskdata_src, task_size);
2898 
2899  task = KMP_TASKDATA_TO_TASK(taskdata);
2900 
2901  // Initialize new task (only specific fields not affected by memcpy)
2902  taskdata->td_task_id = KMP_GEN_TASK_ID();
2903  if( task->shareds != NULL ) { // need setup shareds pointer
2904  shareds_offset = (char*)task_src->shareds - (char*)taskdata_src;
2905  task->shareds = &((char*)taskdata)[shareds_offset];
2906  KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 );
2907  }
2908  taskdata->td_alloc_thread = thread;
2909  taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
2910 
2911  // Only need to keep track of child task counts if team parallel and tasking not serialized
2912  if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) {
2913  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
2914  if ( parent_task->td_taskgroup )
2915  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
2916  // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
2917  if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT )
2918  KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
2919  }
2920 
2921  KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
2922  thread, taskdata, taskdata->td_parent) );
2923 #if OMPT_SUPPORT
2924  __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine);
2925 #endif
2926  return task;
2927 }
2928 
2929 // Routine optionally generated by th ecompiler for setting the lastprivate flag
2930 // and calling needed constructors for private/firstprivate objects
2931 // (used to form taskloop tasks from pattern task)
2932 typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
2933 
2934 //---------------------------------------------------------------------------------
2935 // __kmp_taskloop_linear: Start tasks of the taskloop linearly
2936 //
2937 // loc Source location information
2938 // gtid Global thread ID
2939 // task Task with whole loop iteration range
2940 // lb Pointer to loop lower bound
2941 // ub Pointer to loop upper bound
2942 // st Loop stride
2943 // sched Schedule specified 0/1/2 for none/grainsize/num_tasks
2944 // grainsize Schedule value if specified
2945 // task_dup Tasks duplication routine
2946 void
2947 __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
2948  kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
2949  int sched, kmp_uint64 grainsize, void *task_dup )
2950 {
2951  KMP_COUNT_BLOCK(OMP_TASKLOOP);
2952  KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
2953  p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
2954  kmp_uint64 tc;
2955  kmp_uint64 lower = *lb; // compiler provides global bounds here
2956  kmp_uint64 upper = *ub;
2957  kmp_uint64 i, num_tasks = 0, extras = 0;
2958  kmp_info_t *thread = __kmp_threads[gtid];
2959  kmp_taskdata_t *current_task = thread->th.th_current_task;
2960  kmp_task_t *next_task;
2961  kmp_int32 lastpriv = 0;
2962  size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure
2963  size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure
2964 
2965  // compute trip count
2966  if ( st == 1 ) { // most common case
2967  tc = upper - lower + 1;
2968  } else if ( st < 0 ) {
2969  tc = (lower - upper) / (-st) + 1;
2970  } else { // st > 0
2971  tc = (upper - lower) / st + 1;
2972  }
2973  if(tc == 0) {
2974  KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
2975  // free the pattern task and exit
2976  __kmp_task_start( gtid, task, current_task );
2977  // do not execute anything for zero-trip loop
2978  __kmp_task_finish( gtid, task, current_task );
2979  return;
2980  }
2981 
2982  // compute num_tasks/grainsize based on the input provided
2983  switch( sched ) {
2984  case 0: // no schedule clause specified, we can choose the default
2985  // let's try to schedule (team_size*10) tasks
2986  grainsize = thread->th.th_team_nproc * 10;
2987  case 2: // num_tasks provided
2988  if( grainsize > tc ) {
2989  num_tasks = tc; // too big num_tasks requested, adjust values
2990  grainsize = 1;
2991  extras = 0;
2992  } else {
2993  num_tasks = grainsize;
2994  grainsize = tc / num_tasks;
2995  extras = tc % num_tasks;
2996  }
2997  break;
2998  case 1: // grainsize provided
2999  if( grainsize > tc ) {
3000  num_tasks = 1; // too big grainsize requested, adjust values
3001  grainsize = tc;
3002  extras = 0;
3003  } else {
3004  num_tasks = tc / grainsize;
3005  grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations
3006  extras = tc % num_tasks;
3007  }
3008  break;
3009  default:
3010  KMP_ASSERT2(0, "unknown scheduling of taskloop");
3011  }
3012  KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3013  KMP_DEBUG_ASSERT(num_tasks > extras);
3014  KMP_DEBUG_ASSERT(num_tasks > 0);
3015  KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize %lld, extras %lld\n",
3016  gtid, num_tasks, grainsize, extras));
3017 
3018  // Main loop, launch num_tasks tasks, assign grainsize iterations each task
3019  for( i = 0; i < num_tasks; ++i ) {
3020  kmp_uint64 chunk_minus_1;
3021  if( extras == 0 ) {
3022  chunk_minus_1 = grainsize - 1;
3023  } else {
3024  chunk_minus_1 = grainsize;
3025  --extras; // first extras iterations get bigger chunk (grainsize+1)
3026  }
3027  upper = lower + st * chunk_minus_1;
3028  if( i == num_tasks - 1 ) {
3029  // schedule the last task, set lastprivate flag
3030  lastpriv = 1;
3031 #if KMP_DEBUG
3032  if( st == 1 )
3033  KMP_DEBUG_ASSERT(upper == *ub);
3034  else if( st > 0 )
3035  KMP_DEBUG_ASSERT(upper+st > *ub);
3036  else
3037  KMP_DEBUG_ASSERT(upper+st < *ub);
3038 #endif
3039  }
3040  next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
3041  *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds
3042  *(kmp_uint64*)((char*)next_task + upper_offset) = upper;
3043  if( ptask_dup != NULL )
3044  ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc.
3045  KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper %lld (offsets %p %p)\n",
3046  gtid, next_task, lower, upper, lower_offset, upper_offset));
3047  __kmp_omp_task(gtid, next_task, true); // schedule new task
3048  lower = upper + st; // adjust lower bound for the next iteration
3049  }
3050  // free the pattern task and exit
3051  __kmp_task_start( gtid, task, current_task );
3052  // do not execute the pattern task, just do bookkeeping
3053  __kmp_task_finish( gtid, task, current_task );
3054 }
3055 
3072 void
3073 __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
3074  kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3075  int nogroup, int sched, kmp_uint64 grainsize, void *task_dup )
3076 {
3077  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
3078  KMP_DEBUG_ASSERT( task != NULL );
3079 
3080  KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n",
3081  gtid, taskdata, *lb, *ub, st, grainsize, sched));
3082 
3083  // check if clause value first
3084  if( if_val == 0 ) { // if(0) specified, mark task as serial
3085  taskdata->td_flags.task_serial = 1;
3086  taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
3087  }
3088  if( nogroup == 0 ) {
3089  __kmpc_taskgroup( loc, gtid );
3090  }
3091 
3092  if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) {
3093  __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup );
3094  }
3095 
3096  if( nogroup == 0 ) {
3097  __kmpc_end_taskgroup( loc, gtid );
3098  }
3099  KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
3100 }
3101 
3102 #endif
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:777
Definition: kmp.h:194