LLVM OpenMP* Runtime Library
kmp_stats.h
1 #ifndef KMP_STATS_H
2 #define KMP_STATS_H
3 
9 //===----------------------------------------------------------------------===//
10 //
11 // The LLVM Compiler Infrastructure
12 //
13 // This file is dual licensed under the MIT and the University of Illinois Open
14 // Source Licenses. See LICENSE.txt for details.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "kmp_config.h"
19 
20 #if KMP_STATS_ENABLED
21 /*
22  * Statistics accumulator.
23  * Accumulates number of samples and computes min, max, mean, standard deviation on the fly.
24  *
25  * Online variance calculation algorithm from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
26  */
27 
28 #include <limits>
29 #include <math.h>
30 #include <vector>
31 #include <string>
32 #include <stdint.h>
33 #include <new> // placement new
34 #include "kmp_stats_timing.h"
35 
36 /*
37  * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and
38  * are intended for the runtime library developer.
39  */
40 // #define KMP_DEVELOPER_STATS 1
41 
48  noTotal = 1<<0,
49  onlyInMaster = 1<<1,
50  noUnits = 1<<2,
51  notInMaster = 1<<3,
52  logEvent = 1<<4
53 };
54 
61  IDLE,
62  SERIAL_REGION,
63  FORK_JOIN_BARRIER,
64  PLAIN_BARRIER,
65  TASKWAIT,
66  TASKYIELD,
67  TASKGROUP,
68  IMPLICIT_TASK,
69  EXPLICIT_TASK
70 };
71 
88 #define KMP_FOREACH_COUNTER(macro, arg) \
89  macro (OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \
90  macro (OMP_NESTED_PARALLEL, 0, arg) \
91  macro (OMP_FOR_static, 0, arg) \
92  macro (OMP_FOR_dynamic, 0, arg) \
93  macro (OMP_DISTRIBUTE, 0, arg) \
94  macro (OMP_BARRIER, 0, arg) \
95  macro (OMP_CRITICAL,0, arg) \
96  macro (OMP_SINGLE, 0, arg) \
97  macro (OMP_MASTER, 0, arg) \
98  macro (OMP_TEAMS, 0, arg) \
99  macro (OMP_set_lock, 0, arg) \
100  macro (OMP_test_lock, 0, arg) \
101  macro (REDUCE_wait, 0, arg) \
102  macro (REDUCE_nowait, 0, arg) \
103  macro (OMP_TASKYIELD, 0, arg) \
104  macro (OMP_TASKLOOP, 0, arg) \
105  macro (TASK_executed, 0, arg) \
106  macro (TASK_cancelled, 0, arg) \
107  macro (TASK_stolen, 0, arg) \
108  macro (LAST,0,arg)
109 
125 #define KMP_FOREACH_TIMER(macro, arg) \
126  macro (OMP_worker_thread_life, 0, arg) \
127  macro (FOR_static_scheduling, 0, arg) \
128  macro (FOR_dynamic_scheduling, 0, arg) \
129  macro (OMP_critical, 0, arg) \
130  macro (OMP_critical_wait, 0, arg) \
131  macro (OMP_single, 0, arg) \
132  macro (OMP_master, 0, arg) \
133  macro (OMP_idle, 0, arg) \
134  macro (OMP_plain_barrier, 0, arg) \
135  macro (OMP_fork_join_barrier, 0, arg) \
136  macro (OMP_parallel, 0, arg) \
137  macro (OMP_task_immediate, 0, arg) \
138  macro (OMP_task_taskwait, 0, arg) \
139  macro (OMP_task_taskyield, 0, arg) \
140  macro (OMP_task_taskgroup, 0, arg) \
141  macro (OMP_task_join_bar, 0, arg) \
142  macro (OMP_task_plain_bar, 0, arg) \
143  macro (OMP_serial, 0, arg) \
144  macro (OMP_taskloop_scheduling, 0, arg) \
145  macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
146  macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
147  macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
148  macro (FOR_dynamic_iterations,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
149  KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
150  macro (LAST,0, arg)
151 
152 
153 // OMP_start_end -- Time from when OpenMP is initialized until the stats are printed at exit
154 // OMP_serial -- Thread zero time executing serial code
155 // OMP_work -- Elapsed time in code dispatched by a fork (measured in the thread)
156 // OMP_barrier -- Time at "real" barriers (includes task time)
157 // FOR_static_scheduling -- Time spent doing scheduling for a static "for"
158 // FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for"
159 // OMP_idle -- Worker threads time spent waiting for inclusion in a parallel region
160 // OMP_plain_barrier -- Time spent in a barrier construct
161 // OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a parallel region
162 // OMP_parallel -- Time spent inside a parallel construct
163 // OMP_task_immediate -- Time spent executing non-deferred tasks
164 // OMP_task_taskwait -- Time spent executing tasks inside a taskwait construct
165 // OMP_task_taskyield -- Time spent executing tasks inside a taskyield construct
166 // OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup construct
167 // OMP_task_join_bar -- Time spent executing tasks inside a join barrier
168 // OMP_task_plain_bar -- Time spent executing tasks inside a barrier construct
169 // OMP_single -- Time spent executing a "single" region
170 // OMP_master -- Time spent executing a "master" region
171 // OMP_set_numthreads -- Values passed to omp_set_num_threads
172 // OMP_PARALLEL_args -- Number of arguments passed to a parallel region
173 // FOR_static_iterations -- Number of available parallel chunks of work in a static for
174 // FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
175 // Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
176 
177 #if (KMP_DEVELOPER_STATS)
178 // Timers which are of interest to runtime library developers, not end users.
179 // These have to be explicitly enabled in addition to the other stats.
180 
181 // KMP_fork_barrier -- time in __kmp_fork_barrier
182 // KMP_join_barrier -- time in __kmp_join_barrier
183 // KMP_barrier -- time in __kmp_barrier
184 // KMP_end_split_barrier -- time in __kmp_end_split_barrier
185 // KMP_setup_icv_copy -- time in __kmp_setup_icv_copy
186 // KMP_icv_copy -- start/stop timer for any ICV copying
187 // KMP_linear_gather -- time in __kmp_linear_barrier_gather
188 // KMP_linear_release -- time in __kmp_linear_barrier_release
189 // KMP_tree_gather -- time in __kmp_tree_barrier_gather
190 // KMP_tree_release -- time in __kmp_tree_barrier_release
191 // KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
192 // KMP_hyper_release -- time in __kmp_hyper_barrier_release
193 # define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
194  macro (KMP_fork_call, 0, arg) \
195  macro (KMP_join_call, 0, arg) \
196  macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \
197  macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \
198  macro (KMP_barrier, 0, arg) \
199  macro (KMP_end_split_barrier, 0, arg) \
200  macro (KMP_hier_gather, 0, arg) \
201  macro (KMP_hier_release, 0, arg) \
202  macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \
203  macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \
204  macro (KMP_linear_gather, 0, arg) \
205  macro (KMP_linear_release, 0, arg) \
206  macro (KMP_tree_gather, 0, arg) \
207  macro (KMP_tree_release, 0, arg) \
208  macro (USER_master_invoke, stats_flags_e::logEvent, arg) \
209  macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \
210  macro (USER_resume, stats_flags_e::logEvent, arg) \
211  macro (USER_suspend, stats_flags_e::logEvent, arg) \
212  macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \
213  macro (KMP_allocate_team, 0, arg) \
214  macro (KMP_setup_icv_copy, 0, arg) \
215  macro (USER_icv_copy, 0, arg)
216 #else
217 # define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
218 #endif
219 
236 #define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
237  macro(OMP_worker_thread_life, 0, arg) \
238  macro(FOR_static_scheduling, 0, arg) \
239  macro(FOR_dynamic_scheduling, 0, arg) \
240  macro(OMP_critical, 0, arg) \
241  macro(OMP_critical_wait, 0, arg) \
242  macro(OMP_single, 0, arg) \
243  macro(OMP_master, 0, arg) \
244  macro(OMP_idle, 0, arg) \
245  macro(OMP_plain_barrier, 0, arg) \
246  macro(OMP_fork_join_barrier, 0, arg) \
247  macro(OMP_parallel, 0, arg) \
248  macro(OMP_task_immediate, 0, arg) \
249  macro(OMP_task_taskwait, 0, arg) \
250  macro(OMP_task_taskyield, 0, arg) \
251  macro(OMP_task_taskgroup, 0, arg) \
252  macro(OMP_task_join_bar, 0, arg) \
253  macro(OMP_task_plain_bar, 0, arg) \
254  macro(OMP_serial, 0, arg) \
255  macro(OMP_taskloop_scheduling, 0, arg) \
256  KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \
257  macro(LAST, 0, arg)
258 
259 #if (KMP_DEVELOPER_STATS)
260 # define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) \
261  macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg)
262 #else
263 # define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg)
264 #endif
265 
266 #define ENUMERATE(name,ignore,prefix) prefix##name,
267 enum timer_e {
268  KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
269 };
270 
271 enum explicit_timer_e {
272  KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_)
273 };
274 
275 enum counter_e {
276  KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_)
277 };
278 #undef ENUMERATE
279 
280 class timerPair {
281  explicit_timer_e timer_index;
282  timer_e timer;
283  public:
284  timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
285  inline explicit_timer_e get_index() const { return timer_index; }
286  inline timer_e get_timer() const { return timer; }
287  bool operator==(const timerPair & rhs) {
288  return this->get_index() == rhs.get_index();
289  }
290  bool operator!=(const timerPair & rhs) {
291  return !(*this == rhs);
292  }
293 };
294 
295 class statistic
296 {
297  double minVal;
298  double maxVal;
299  double meanVal;
300  double m2;
301  uint64_t sampleCount;
302 
303  public:
304  statistic() { reset(); }
305  statistic (statistic const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {}
306 
307  double getMin() const { return minVal; }
308  double getMean() const { return meanVal; }
309  double getMax() const { return maxVal; }
310  uint64_t getCount() const { return sampleCount; }
311  double getSD() const { return sqrt(m2/sampleCount); }
312  double getTotal() const { return sampleCount*meanVal; }
313 
314  void reset()
315  {
316  minVal = std::numeric_limits<double>::max();
317  maxVal = -std::numeric_limits<double>::max();
318  meanVal= 0.0;
319  m2 = 0.0;
320  sampleCount = 0;
321  }
322  void addSample(double sample);
323  void scale (double factor);
324  void scaleDown(double f) { scale (1./f); }
325  statistic & operator+= (statistic const & other);
326 
327  std::string format(char unit, bool total=false) const;
328 };
329 
330 struct statInfo
331 {
332  const char * name;
333  uint32_t flags;
334 };
335 
336 class timeStat : public statistic
337 {
338  static statInfo timerInfo[];
339 
340  public:
341  timeStat() : statistic() {}
342  static const char * name(timer_e e) { return timerInfo[e].name; }
343  static bool noTotal (timer_e e) { return timerInfo[e].flags & stats_flags_e::noTotal; }
344  static bool masterOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::onlyInMaster; }
345  static bool workerOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::notInMaster; }
346  static bool noUnits (timer_e e) { return timerInfo[e].flags & stats_flags_e::noUnits; }
347  static bool logEvent (timer_e e) { return timerInfo[e].flags & stats_flags_e::logEvent; }
348  static void clearEventFlags() {
349  for(int i=0;i<TIMER_LAST;i++) {
350  timerInfo[i].flags &= (~(stats_flags_e::logEvent));
351  }
352  }
353 };
354 
355 // Where we need explicitly to start and end the timer, this version can be used
356 // Since these timers normally aren't nicely scoped, so don't have a good place to live
357 // on the stack of the thread, they're more work to use.
358 class explicitTimer
359 {
360  timeStat * stat;
361  tsc_tick_count startTime;
362  tsc_tick_count pauseStartTime;
363  tsc_tick_count::tsc_interval_t totalPauseTime;
364 
365  public:
366  explicitTimer () : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() { }
367  explicitTimer (timeStat * s) : stat(s), startTime(), pauseStartTime(0), totalPauseTime() { }
368 
369  void setStat (timeStat *s) { stat = s; }
370  void start(timer_e timerEnumValue);
371  void pause() { pauseStartTime = tsc_tick_count::now(); }
372  void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
373  void stop(timer_e timerEnumValue);
374  void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; }
375 };
376 
377 // Where all you need is to time a block, this is enough.
378 // (It avoids the need to have an explicit end, leaving the scope suffices.)
379 class blockTimer : public explicitTimer
380 {
381  timer_e timerEnumValue;
382  public:
383  blockTimer (timeStat * s, timer_e newTimerEnumValue) : timerEnumValue(newTimerEnumValue), explicitTimer(s) { start(timerEnumValue); }
384  ~blockTimer() { stop(timerEnumValue); }
385 };
386 
387 // Where you need to partition a threads clock ticks into separate states
388 // e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and
389 // DOING_NOTHING would render these conditions:
390 // time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
391 // No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice versa
392 class partitionedTimers
393 {
394  private:
395  explicitTimer* timers[EXPLICIT_TIMER_LAST+1];
396  std::vector<timerPair> timer_stack;
397  public:
398  partitionedTimers();
399  void add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer);
400  void init(timerPair timer_index);
401  void push(timerPair timer_index);
402  void pop();
403  void windup();
404 };
405 
406 // Special wrapper around the partioned timers to aid timing code blocks
407 // It avoids the need to have an explicit end, leaving the scope suffices.
408 class blockPartitionedTimer
409 {
410  partitionedTimers* part_timers;
411  timerPair timer_pair;
412  public:
413  blockPartitionedTimer(partitionedTimers* pt, timerPair tp) : part_timers(pt), timer_pair(tp) { part_timers->push(timer_pair); }
414  ~blockPartitionedTimer() { part_timers->pop(); }
415 };
416 
417 // Special wrapper around the thread state to aid in keeping state in code blocks
418 // It avoids the need to have an explicit end, leaving the scope suffices.
419 class blockThreadState
420 {
421  stats_state_e* state_pointer;
422  stats_state_e old_state;
423  public:
424  blockThreadState(stats_state_e* thread_state_pointer, stats_state_e new_state) : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
425  *state_pointer = new_state;
426  }
427  ~blockThreadState() { *state_pointer = old_state; }
428 };
429 
430 // If all you want is a count, then you can use this...
431 // The individual per-thread counts will be aggregated into a statistic at program exit.
432 class counter
433 {
434  uint64_t value;
435  static const statInfo counterInfo[];
436 
437  public:
438  counter() : value(0) {}
439  void increment() { value++; }
440  uint64_t getValue() const { return value; }
441  void reset() { value = 0; }
442  static const char * name(counter_e e) { return counterInfo[e].name; }
443  static bool masterOnly (counter_e e) { return counterInfo[e].flags & stats_flags_e::onlyInMaster; }
444 };
445 
446 /* ****************************************************************
447  Class to implement an event
448 
449  There are four components to an event: start time, stop time
450  nest_level, and timer_name.
451  The start and stop time should be obvious (recorded in clock ticks).
452  The nest_level relates to the bar width in the timeline graph.
453  The timer_name is used to determine which timer event triggered this event.
454 
455  the interface to this class is through four read-only operations:
456  1) getStart() -- returns the start time as 64 bit integer
457  2) getStop() -- returns the stop time as 64 bit integer
458  3) getNestLevel() -- returns the nest level of the event
459  4) getTimerName() -- returns the timer name that triggered event
460 
461  *MORE ON NEST_LEVEL*
462  The nest level is used in the bar graph that represents the timeline.
463  Its main purpose is for showing how events are nested inside eachother.
464  For example, say events, A, B, and C are recorded. If the timeline
465  looks like this:
466 
467 Begin -------------------------------------------------------------> Time
468  | | | | | |
469  A B C C B A
470  start start start end end end
471 
472  Then A, B, C will have a nest level of 1, 2, 3 respectively.
473  These values are then used to calculate the barwidth so you can
474  see that inside A, B has occurred, and inside B, C has occurred.
475  Currently, this is shown with A's bar width being larger than B's
476  bar width, and B's bar width being larger than C's bar width.
477 
478 **************************************************************** */
479 class kmp_stats_event {
480  uint64_t start;
481  uint64_t stop;
482  int nest_level;
483  timer_e timer_name;
484  public:
485  kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
486  kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
487  inline uint64_t getStart() const { return start; }
488  inline uint64_t getStop() const { return stop; }
489  inline int getNestLevel() const { return nest_level; }
490  inline timer_e getTimerName() const { return timer_name; }
491 };
492 
493 /* ****************************************************************
494  Class to implement a dynamically expandable array of events
495 
496  ---------------------------------------------------------
497  | event 1 | event 2 | event 3 | event 4 | ... | event N |
498  ---------------------------------------------------------
499 
500  An event is pushed onto the back of this array at every
501  explicitTimer->stop() call. The event records the thread #,
502  start time, stop time, and nest level related to the bar width.
503 
504  The event vector starts at size INIT_SIZE and grows (doubles in size)
505  if needed. An implication of this behavior is that log(N)
506  reallocations are needed (where N is number of events). If you want
507  to avoid reallocations, then set INIT_SIZE to a large value.
508 
509  the interface to this class is through six operations:
510  1) reset() -- sets the internal_size back to 0 but does not deallocate any memory
511  2) size() -- returns the number of valid elements in the vector
512  3) push_back(start, stop, nest, timer_name) -- pushes an event onto
513  the back of the array
514  4) deallocate() -- frees all memory associated with the vector
515  5) sort() -- sorts the vector by start time
516  6) operator[index] or at(index) -- returns event reference at that index
517 
518 **************************************************************** */
519 class kmp_stats_event_vector {
520  kmp_stats_event* events;
521  int internal_size;
522  int allocated_size;
523  static const int INIT_SIZE = 1024;
524  public:
525  kmp_stats_event_vector() {
526  events = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*INIT_SIZE);
527  internal_size = 0;
528  allocated_size = INIT_SIZE;
529  }
530  ~kmp_stats_event_vector() {}
531  inline void reset() { internal_size = 0; }
532  inline int size() const { return internal_size; }
533  void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
534  int i;
535  if(internal_size == allocated_size) {
536  kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*allocated_size*2);
537  for(i=0;i<internal_size;i++) tmp[i] = events[i];
538  __kmp_free(events);
539  events = tmp;
540  allocated_size*=2;
541  }
542  events[internal_size] = kmp_stats_event(start_time, stop_time, nest_level, name);
543  internal_size++;
544  return;
545  }
546  void deallocate();
547  void sort();
548  const kmp_stats_event & operator[](int index) const { return events[index]; }
549  kmp_stats_event & operator[](int index) { return events[index]; }
550  const kmp_stats_event & at(int index) const { return events[index]; }
551  kmp_stats_event & at(int index) { return events[index]; }
552 };
553 
554 /* ****************************************************************
555  Class to implement a doubly-linked, circular, statistics list
556 
557  |---| ---> |---| ---> |---| ---> |---| ---> ... next
558  | | | | | | | |
559  |---| <--- |---| <--- |---| <--- |---| <--- ... prev
560  Sentinel first second third
561  Node node node node
562 
563  The Sentinel Node is the user handle on the list.
564  The first node corresponds to thread 0's statistics.
565  The second node corresponds to thread 1's statistics and so on...
566 
567  Each node has a _timers, _counters, and _explicitTimers array to
568  hold that thread's statistics. The _explicitTimers
569  point to the correct _timer and update its statistics at every stop() call.
570  The explicitTimers' pointers are set up in the constructor.
571  Each node also has an event vector to hold that thread's timing events.
572  The event vector expands as necessary and records the start-stop times
573  for each timer.
574 
575  The nestLevel variable is for plotting events and is related
576  to the bar width in the timeline graph.
577 
578  Every thread will have a __thread local pointer to its node in
579  the list. The sentinel node is used by the master thread to
580  store "dummy" statistics before __kmp_create_worker() is called.
581 
582 **************************************************************** */
583 class kmp_stats_list {
584  int gtid;
585  timeStat _timers[TIMER_LAST+1];
586  counter _counters[COUNTER_LAST+1];
587  explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1];
588  partitionedTimers _partitionedTimers;
589  int _nestLevel; // one per thread
590  kmp_stats_event_vector _event_vector;
591  kmp_stats_list* next;
592  kmp_stats_list* prev;
593  stats_state_e state;
594  int thread_is_idle_flag;
595  public:
596  kmp_stats_list() : _nestLevel(0), _event_vector(), next(this), prev(this),
597  state(IDLE), thread_is_idle_flag(0) {
598 #define doInit(name,ignore1,ignore2) \
599  getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \
600  _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, getExplicitTimer(EXPLICIT_TIMER_##name));
601  KMP_FOREACH_EXPLICIT_TIMER(doInit,0);
602 #undef doInit
603  }
604  ~kmp_stats_list() { }
605  inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; }
606  inline counter * getCounter(counter_e idx) { return &_counters[idx]; }
607  inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; }
608  inline partitionedTimers * getPartitionedTimers() { return &_partitionedTimers; }
609  inline timeStat * getTimers() { return _timers; }
610  inline counter * getCounters() { return _counters; }
611  inline explicitTimer * getExplicitTimers() { return _explicitTimers; }
612  inline kmp_stats_event_vector & getEventVector() { return _event_vector; }
613  inline void resetEventVector() { _event_vector.reset(); }
614  inline void incrementNestValue() { _nestLevel++; }
615  inline int getNestValue() { return _nestLevel; }
616  inline void decrementNestValue() { _nestLevel--; }
617  inline int getGtid() const { return gtid; }
618  inline void setGtid(int newgtid) { gtid = newgtid; }
619  inline void setState(stats_state_e newstate) { state = newstate; }
620  inline stats_state_e getState() const { return state; }
621  inline stats_state_e * getStatePointer() { return &state; }
622  inline bool isIdle() { return thread_is_idle_flag==1; }
623  inline void setIdleFlag() { thread_is_idle_flag = 1; }
624  inline void resetIdleFlag() { thread_is_idle_flag = 0; }
625  kmp_stats_list* push_back(int gtid); // returns newly created list node
626  inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
627  _event_vector.push_back(start_time, stop_time, nest_level, name);
628  }
629  void deallocate();
630  class iterator;
631  kmp_stats_list::iterator begin();
632  kmp_stats_list::iterator end();
633  int size();
634  class iterator {
635  kmp_stats_list* ptr;
636  friend kmp_stats_list::iterator kmp_stats_list::begin();
637  friend kmp_stats_list::iterator kmp_stats_list::end();
638  public:
639  iterator();
640  ~iterator();
641  iterator operator++();
642  iterator operator++(int dummy);
643  iterator operator--();
644  iterator operator--(int dummy);
645  bool operator!=(const iterator & rhs);
646  bool operator==(const iterator & rhs);
647  kmp_stats_list* operator*() const; // dereference operator
648  };
649 };
650 
651 /* ****************************************************************
652  Class to encapsulate all output functions and the environment variables
653 
654  This module holds filenames for various outputs (normal stats, events, plot file),
655  as well as coloring information for the plot file.
656 
657  The filenames and flags variables are read from environment variables.
658  These are read once by the constructor of the global variable __kmp_stats_output
659  which calls init().
660 
661  During this init() call, event flags for the timeStat::timerInfo[] global array
662  are cleared if KMP_STATS_EVENTS is not true (on, 1, yes).
663 
664  The only interface function that is public is outputStats(heading). This function
665  should print out everything it needs to, either to files or stderr,
666  depending on the environment variables described below
667 
668  ENVIRONMENT VARIABLES:
669  KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this file,
670  otherwise, print to stderr
671  KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to either
672  KMP_STATS_FILE or stderr
673  KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename,
674  otherwise, the plot file is sent to "events.plt"
675  KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log events
676  KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file,
677  otherwise, output is sent to "events.dat"
678 
679 **************************************************************** */
680 class kmp_stats_output_module {
681 
682  public:
683  struct rgb_color {
684  float r;
685  float g;
686  float b;
687  };
688 
689  private:
690  std::string outputFileName;
691  static const char* eventsFileName;
692  static const char* plotFileName;
693  static int printPerThreadFlag;
694  static int printPerThreadEventsFlag;
695  static const rgb_color globalColorArray[];
696  static rgb_color timerColorInfo[];
697 
698  void init();
699  static void setupEventColors();
700  static void printPloticusFile();
701  static void printHeaderInfo(FILE *statsOut);
702  static void printTimerStats(FILE *statsOut, statistic const * theStats, statistic const * totalStats);
703  static void printCounterStats(FILE *statsOut, statistic const * theStats);
704  static void printCounters(FILE * statsOut, counter const * theCounters);
705  static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents, int gtid);
706  static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; }
707  static void windupExplicitTimers();
708  bool eventPrintingEnabled() const { return printPerThreadEventsFlag; }
709 
710  public:
711  kmp_stats_output_module() { init(); }
712  void outputStats(const char* heading);
713 };
714 
715 #ifdef __cplusplus
716 extern "C" {
717 #endif
718 void __kmp_stats_init();
719 void __kmp_reset_stats();
720 void __kmp_output_stats(const char *);
721 void __kmp_accumulate_stats_at_exit(void);
722 // thread local pointer to stats node within list
723 extern __thread kmp_stats_list* __kmp_stats_thread_ptr;
724 // head to stats list.
725 extern kmp_stats_list __kmp_stats_list;
726 // lock for __kmp_stats_list
727 extern kmp_tas_lock_t __kmp_stats_lock;
728 // reference start time
729 extern tsc_tick_count __kmp_stats_start_time;
730 // interface to output
731 extern kmp_stats_output_module __kmp_stats_output;
732 
733 #ifdef __cplusplus
734 }
735 #endif
736 
737 // Simple, standard interfaces that drop out completely if stats aren't enabled
738 
739 
752 #define KMP_TIME_BLOCK(name) \
753  blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)
754 
765 #define KMP_COUNT_VALUE(name, value) \
766  __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value)
767 
777 #define KMP_COUNT_BLOCK(name) \
778  __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
779 
791 #define KMP_START_EXPLICIT_TIMER(name) \
792  __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name)
793 
805 #define KMP_STOP_EXPLICIT_TIMER(name) \
806  __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name)
807 
822 #define KMP_OUTPUT_STATS(heading_string) \
823  __kmp_output_stats(heading_string)
824 
832 #define KMP_INIT_PARTITIONED_TIMERS(name) \
833  __kmp_stats_thread_ptr->getPartitionedTimers()->init(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
834 
835 #define KMP_TIME_PARTITIONED_BLOCK(name) \
836  blockPartitionedTimer __PBLOCKTIME__(__kmp_stats_thread_ptr->getPartitionedTimers(), \
837  timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
838 
839 #define KMP_PUSH_PARTITIONED_TIMER(name) \
840  __kmp_stats_thread_ptr->getPartitionedTimers()->push(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
841 
842 #define KMP_POP_PARTITIONED_TIMER() \
843  __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
844 
845 #define KMP_SET_THREAD_STATE(state_name) \
846  __kmp_stats_thread_ptr->setState(state_name)
847 
848 #define KMP_GET_THREAD_STATE() \
849  __kmp_stats_thread_ptr->getState()
850 
851 #define KMP_SET_THREAD_STATE_BLOCK(state_name) \
852  blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), state_name)
853 
861 #define KMP_RESET_STATS() __kmp_reset_stats()
862 
863 #if (KMP_DEVELOPER_STATS)
864 # define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n)
865 # define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v)
866 # define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
867 # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n)
868 # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n)
869 #else
870 // Null definitions
871 # define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
872 # define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0)
873 # define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
874 # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
875 # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
876 #endif
877 
878 #else // KMP_STATS_ENABLED
879 
880 // Null definitions
881 #define KMP_TIME_BLOCK(n) ((void)0)
882 #define KMP_COUNT_VALUE(n,v) ((void)0)
883 #define KMP_COUNT_BLOCK(n) ((void)0)
884 #define KMP_START_EXPLICIT_TIMER(n) ((void)0)
885 #define KMP_STOP_EXPLICIT_TIMER(n) ((void)0)
886 
887 #define KMP_OUTPUT_STATS(heading_string) ((void)0)
888 #define KMP_RESET_STATS() ((void)0)
889 
890 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
891 #define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0)
892 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
893 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
894 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
895 #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
896 #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
897 #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
898 #define KMP_POP_PARTITIONED_TIMER() ((void)0)
899 #define KMP_SET_THREAD_STATE(state_name) ((void)0)
900 #define KMP_GET_THREAD_STATE() ((void)0)
901 #define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
902 #endif // KMP_STATS_ENABLED
903 
904 #endif // KMP_STATS_H
statistic is valid only for master
Definition: kmp_stats.h:49
statistic is valid only for non-master threads
Definition: kmp_stats.h:51
do not show a TOTAL_aggregation for this statistic
Definition: kmp_stats.h:48
statistic can be logged on the event timeline when KMP_STATS_EVENTS is on (valid only for timers) ...
Definition: kmp_stats.h:52
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)
Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
Definition: kmp_stats.h:236
statistic doesn&#39;t need units printed next to it in output
Definition: kmp_stats.h:50
stats_flags_e
flags to describe the statistic (timer or counter)
Definition: kmp_stats.h:47
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.
Definition: kmp_stats.h:88
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:60