LLVM OpenMP* Runtime Library
kmp_itt.h
1 #if USE_ITT_BUILD
2 /*
3  * kmp_itt.h -- ITT Notify interface.
4  */
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef KMP_ITT_H
16 #define KMP_ITT_H
17 
18 #include "kmp_lock.h"
19 
20 #define INTEL_ITTNOTIFY_API_PRIVATE
21 #include "ittnotify.h"
22 #include "legacy/ittnotify.h"
23 
24 #if KMP_DEBUG
25 #define __kmp_inline // Turn off inlining in debug mode.
26 #else
27 #define __kmp_inline static inline
28 #endif
29 
30 #if USE_ITT_NOTIFY
31 extern kmp_int32 __kmp_itt_prepare_delay;
32 #ifdef __cplusplus
33 extern "C" void __kmp_itt_fini_ittlib(void);
34 #else
35 extern void __kmp_itt_fini_ittlib(void);
36 #endif
37 #endif
38 
39 // Simplify the handling of an argument that is only required when USE_ITT_BUILD
40 // is enabled.
41 #define USE_ITT_BUILD_ARG(x) , x
42 
43 void __kmp_itt_initialize();
44 void __kmp_itt_destroy();
45 void __kmp_itt_reset();
46 
47 // -----------------------------------------------------------------------------
48 // New stuff for reporting high-level constructs.
49 
50 // Note the naming convention:
51 // __kmp_itt_xxxing() function should be called before action, while
52 // __kmp_itt_xxxed() function should be called after action.
53 
54 // --- Parallel region reporting ---
55 __kmp_inline void
56 __kmp_itt_region_forking(int gtid, int team_size,
57  int barriers); // Master only, before forking threads.
58 __kmp_inline void
59 __kmp_itt_region_joined(int gtid); // Master only, after joining threads.
60 // (*) Note: A thread may execute tasks after this point, though.
61 
62 // --- Frame reporting ---
63 // region=0: no regions, region=1: parallel, region=2: serialized parallel
64 __kmp_inline void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
65  __itt_timestamp end, int imbalance,
66  ident_t *loc, int team_size,
67  int region = 0);
68 
69 // --- Metadata reporting ---
70 // begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated
71 // wait time value, reduction -if this is a reduction barrier
72 __kmp_inline void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
73  kmp_uint64 end,
74  kmp_uint64 imbalance,
75  kmp_uint64 reduction);
76 // sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others);
77 // iterations - loop trip count, chunk - chunk size
78 __kmp_inline void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
79  kmp_uint64 iterations,
80  kmp_uint64 chunk);
81 __kmp_inline void __kmp_itt_metadata_single(ident_t *loc);
82 
83 // --- Barrier reporting ---
84 __kmp_inline void *__kmp_itt_barrier_object(int gtid, int bt, int set_name = 0,
85  int delta = 0);
86 __kmp_inline void __kmp_itt_barrier_starting(int gtid, void *object);
87 __kmp_inline void __kmp_itt_barrier_middle(int gtid, void *object);
88 __kmp_inline void __kmp_itt_barrier_finished(int gtid, void *object);
89 
90 // --- Taskwait reporting ---
91 __kmp_inline void *__kmp_itt_taskwait_object(int gtid);
92 __kmp_inline void __kmp_itt_taskwait_starting(int gtid, void *object);
93 __kmp_inline void __kmp_itt_taskwait_finished(int gtid, void *object);
94 
95 // --- Task reporting ---
96 __kmp_inline void __kmp_itt_task_starting(void *object);
97 __kmp_inline void __kmp_itt_task_finished(void *object);
98 
99 // --- Lock reporting ---
100 #if KMP_USE_DYNAMIC_LOCK
101 __kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock,
102  const ident_t *);
103 #else
104 __kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock);
105 #endif
106 __kmp_inline void __kmp_itt_lock_acquiring(kmp_user_lock_p lock);
107 __kmp_inline void __kmp_itt_lock_acquired(kmp_user_lock_p lock);
108 __kmp_inline void __kmp_itt_lock_releasing(kmp_user_lock_p lock);
109 __kmp_inline void __kmp_itt_lock_cancelled(kmp_user_lock_p lock);
110 __kmp_inline void __kmp_itt_lock_destroyed(kmp_user_lock_p lock);
111 
112 // --- Critical reporting ---
113 #if KMP_USE_DYNAMIC_LOCK
114 __kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock,
115  const ident_t *);
116 #else
117 __kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock);
118 #endif
119 __kmp_inline void __kmp_itt_critical_acquiring(kmp_user_lock_p lock);
120 __kmp_inline void __kmp_itt_critical_acquired(kmp_user_lock_p lock);
121 __kmp_inline void __kmp_itt_critical_releasing(kmp_user_lock_p lock);
122 __kmp_inline void __kmp_itt_critical_destroyed(kmp_user_lock_p lock);
123 
124 // --- Single reporting ---
125 __kmp_inline void __kmp_itt_single_start(int gtid);
126 __kmp_inline void __kmp_itt_single_end(int gtid);
127 
128 // --- Ordered reporting ---
129 __kmp_inline void __kmp_itt_ordered_init(int gtid);
130 __kmp_inline void __kmp_itt_ordered_prep(int gtid);
131 __kmp_inline void __kmp_itt_ordered_start(int gtid);
132 __kmp_inline void __kmp_itt_ordered_end(int gtid);
133 
134 // --- Threads reporting ---
135 __kmp_inline void __kmp_itt_thread_ignore();
136 __kmp_inline void __kmp_itt_thread_name(int gtid);
137 
138 // --- System objects ---
139 __kmp_inline void __kmp_itt_system_object_created(void *object,
140  char const *name);
141 
142 // --- Stack stitching ---
143 __kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);
144 __kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);
145 __kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);
146 __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
147 
148 // -----------------------------------------------------------------------------
149 // Old stuff for reporting low-level internal synchronization.
150 
151 #if USE_ITT_NOTIFY
152 
153 /* Support for SSC marks, which are used by SDE
154  http://software.intel.com/en-us/articles/intel-software-development-emulator
155  to mark points in instruction traces that represent spin-loops and are
156  therefore uninteresting when collecting traces for architecture simulation.
157  */
158 #ifndef INCLUDE_SSC_MARKS
159 #define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)
160 #endif
161 
162 /* Linux 64 only for now */
163 #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)
164 // Portable (at least for gcc and icc) code to insert the necessary instructions
165 // to set %ebx and execute the unlikely no-op.
166 #if defined(__INTEL_COMPILER)
167 #define INSERT_SSC_MARK(tag) __SSC_MARK(tag)
168 #else
169 #define INSERT_SSC_MARK(tag) \
170  __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag) \
171  : "%ebx")
172 #endif
173 #else
174 #define INSERT_SSC_MARK(tag) ((void)0)
175 #endif
176 
177 /* Markers for the start and end of regions that represent polling and are
178  therefore uninteresting to architectural simulations 0x4376 and 0x4377 are
179  arbitrary numbers that should be unique in the space of SSC tags, but there
180  is no central issuing authority rather randomness is expected to work. */
181 #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)
182 #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377)
183 
184 // Markers for architecture simulation.
185 // FORKING : Before the master thread forks.
186 // JOINING : At the start of the join.
187 // INVOKING : Before the threads invoke microtasks.
188 // DISPATCH_INIT: At the start of dynamically scheduled loop.
189 // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.
190 #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693)
191 #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694)
192 #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695)
193 #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696)
194 #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697)
195 
196 // The object is an address that associates a specific set of the prepare,
197 // acquire, release, and cancel operations.
198 
199 /* Sync prepare indicates a thread is going to start waiting for another thread
200  to send a release event. This operation should be done just before the
201  thread begins checking for the existence of the release event */
202 
203 /* Sync cancel indicates a thread is cancelling a wait on another thread and
204  continuing execution without waiting for the other thread to release it */
205 
206 /* Sync acquired indicates a thread has received a release event from another
207  thread and has stopped waiting. This operation must occur only after the
208  release event is received. */
209 
210 /* Sync release indicates a thread is going to send a release event to another
211  thread so it will stop waiting and continue execution. This operation must
212  just happen before the release event. */
213 
214 #define KMP_FSYNC_PREPARE(obj) __itt_fsync_prepare((void *)(obj))
215 #define KMP_FSYNC_CANCEL(obj) __itt_fsync_cancel((void *)(obj))
216 #define KMP_FSYNC_ACQUIRED(obj) __itt_fsync_acquired((void *)(obj))
217 #define KMP_FSYNC_RELEASING(obj) __itt_fsync_releasing((void *)(obj))
218 
219 /* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called
220  with a delay (and not called at all if waiting time is small). So, in spin
221  loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before
222  spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and
223  KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT_YIELD() for example. */
224 
225 #undef KMP_FSYNC_SPIN_INIT
226 #define KMP_FSYNC_SPIN_INIT(obj, spin) \
227  int sync_iters = 0; \
228  if (__itt_fsync_prepare_ptr) { \
229  if (obj == NULL) { \
230  obj = spin; \
231  } /* if */ \
232  } /* if */ \
233  SSC_MARK_SPIN_START()
234 
235 #undef KMP_FSYNC_SPIN_PREPARE
236 #define KMP_FSYNC_SPIN_PREPARE(obj) \
237  do { \
238  if (__itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay) { \
239  ++sync_iters; \
240  if (sync_iters >= __kmp_itt_prepare_delay) { \
241  KMP_FSYNC_PREPARE((void *)obj); \
242  } /* if */ \
243  } /* if */ \
244  } while (0)
245 #undef KMP_FSYNC_SPIN_ACQUIRED
246 #define KMP_FSYNC_SPIN_ACQUIRED(obj) \
247  do { \
248  SSC_MARK_SPIN_END(); \
249  if (sync_iters >= __kmp_itt_prepare_delay) { \
250  KMP_FSYNC_ACQUIRED((void *)obj); \
251  } /* if */ \
252  } while (0)
253 
254 /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:
255  KMP_ITT_IGNORE(
256  ptr = malloc( size );
257  );
258 */
259 #define KMP_ITT_IGNORE(statement) \
260  do { \
261  __itt_state_t __itt_state_; \
262  if (__itt_state_get_ptr) { \
263  __itt_state_ = __itt_state_get(); \
264  __itt_obj_mode_set(__itt_obj_prop_ignore, __itt_obj_state_set); \
265  } /* if */ \
266  { statement } \
267  if (__itt_state_get_ptr) { \
268  __itt_state_set(__itt_state_); \
269  } /* if */ \
270  } while (0)
271 
272 const int KMP_MAX_FRAME_DOMAINS =
273  512; // Maximum number of frame domains to use (maps to
274 // different OpenMP regions in the user source code).
275 extern kmp_int32 __kmp_barrier_domain_count;
276 extern kmp_int32 __kmp_region_domain_count;
277 extern __itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS];
278 extern __itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS];
279 extern __itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS];
280 extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS];
281 extern __itt_domain *metadata_domain;
282 extern __itt_string_handle *string_handle_imbl;
283 extern __itt_string_handle *string_handle_loop;
284 extern __itt_string_handle *string_handle_sngl;
285 
286 #else
287 
288 // Null definitions of the synchronization tracing functions.
289 #define KMP_FSYNC_PREPARE(obj) ((void)0)
290 #define KMP_FSYNC_CANCEL(obj) ((void)0)
291 #define KMP_FSYNC_ACQUIRED(obj) ((void)0)
292 #define KMP_FSYNC_RELEASING(obj) ((void)0)
293 
294 #define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
295 #define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
296 #define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
297 
298 #define KMP_ITT_IGNORE(stmt) \
299  do { \
300  stmt \
301  } while (0)
302 
303 #endif // USE_ITT_NOTIFY
304 
305 #if !KMP_DEBUG
306 // In release mode include definitions of inline functions.
307 #include "kmp_itt.inl"
308 #endif
309 
310 #endif // KMP_ITT_H
311 
312 #else /* USE_ITT_BUILD */
313 
314 // Null definitions of the synchronization tracing functions.
315 // If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.
316 // By defining these we avoid unpleasant ifdef tests in many places.
317 #define KMP_FSYNC_PREPARE(obj) ((void)0)
318 #define KMP_FSYNC_CANCEL(obj) ((void)0)
319 #define KMP_FSYNC_ACQUIRED(obj) ((void)0)
320 #define KMP_FSYNC_RELEASING(obj) ((void)0)
321 
322 #define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
323 #define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
324 #define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
325 
326 #define KMP_ITT_IGNORE(stmt) \
327  do { \
328  stmt \
329  } while (0)
330 
331 #define USE_ITT_BUILD_ARG(x)
332 
333 #endif /* USE_ITT_BUILD */
sched_type
Definition: kmp.h:320
Definition: kmp.h:207