18 #include "kmp_config.h" 34 #include "kmp_stats_timing.h" 88 #define KMP_FOREACH_COUNTER(macro, arg) \ 89 macro (OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \ 90 macro (OMP_NESTED_PARALLEL, 0, arg) \ 91 macro (OMP_FOR_static, 0, arg) \ 92 macro (OMP_FOR_dynamic, 0, arg) \ 93 macro (OMP_DISTRIBUTE, 0, arg) \ 94 macro (OMP_BARRIER, 0, arg) \ 95 macro (OMP_CRITICAL,0, arg) \ 96 macro (OMP_SINGLE, 0, arg) \ 97 macro (OMP_MASTER, 0, arg) \ 98 macro (OMP_TEAMS, 0, arg) \ 99 macro (OMP_set_lock, 0, arg) \ 100 macro (OMP_test_lock, 0, arg) \ 101 macro (REDUCE_wait, 0, arg) \ 102 macro (REDUCE_nowait, 0, arg) \ 103 macro (OMP_TASKYIELD, 0, arg) \ 104 macro (OMP_TASKLOOP, 0, arg) \ 105 macro (TASK_executed, 0, arg) \ 106 macro (TASK_cancelled, 0, arg) \ 107 macro (TASK_stolen, 0, arg) 124 #define KMP_FOREACH_TIMER(macro, arg) \ 125 macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \ 126 macro (FOR_static_scheduling, 0, arg) \ 127 macro (FOR_dynamic_scheduling, 0, arg) \ 128 macro (OMP_critical, 0, arg) \ 129 macro (OMP_critical_wait, 0, arg) \ 130 macro (OMP_single, 0, arg) \ 131 macro (OMP_master, 0, arg) \ 132 macro (OMP_idle, stats_flags_e::logEvent, arg) \ 133 macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \ 134 macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \ 135 macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \ 136 macro (OMP_parallel, stats_flags_e::logEvent, arg) \ 137 macro (OMP_task_immediate, 0, arg) \ 138 macro (OMP_task_taskwait, 0, arg) \ 139 macro (OMP_task_taskyield, 0, arg) \ 140 macro (OMP_task_taskgroup, 0, arg) \ 141 macro (OMP_task_join_bar, 0, arg) \ 142 macro (OMP_task_plain_bar, 0, arg) \ 143 macro (OMP_serial, stats_flags_e::logEvent, arg) \ 144 macro (OMP_taskloop_scheduling, 0, arg) \ 145 macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 146 macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 147 macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 148 macro (FOR_dynamic_iterations,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 149 KMP_FOREACH_DEVELOPER_TIMER(macro, arg) 176 #if (KMP_DEVELOPER_STATS) 192 # define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ 193 macro (KMP_fork_call, 0, arg) \ 194 macro (KMP_join_call, 0, arg) \ 195 macro (KMP_end_split_barrier, 0, arg) \ 196 macro (KMP_hier_gather, 0, arg) \ 197 macro (KMP_hier_release, 0, arg) \ 198 macro (KMP_hyper_gather, 0, arg) \ 199 macro (KMP_hyper_release, 0, arg) \ 200 macro (KMP_linear_gather, 0, arg) \ 201 macro (KMP_linear_release, 0, arg) \ 202 macro (KMP_tree_gather, 0, arg) \ 203 macro (KMP_tree_release, 0, arg) \ 204 macro (USER_resume, 0, arg) \ 205 macro (USER_suspend, 0, arg) \ 206 macro (KMP_allocate_team, 0, arg) \ 207 macro (KMP_setup_icv_copy, 0, arg) \ 208 macro (USER_icv_copy, 0, arg) 210 # define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) 229 #define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ 230 KMP_FOREACH_TIMER(macro, arg) 232 #define ENUMERATE(name,ignore,prefix) prefix##name, 234 KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
238 enum explicit_timer_e {
250 explicit_timer_e timer_index;
253 timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
254 inline explicit_timer_e get_index()
const {
return timer_index; }
255 inline timer_e get_timer()
const {
return timer; }
256 bool operator==(
const timerPair & rhs) {
257 return this->get_index() == rhs.get_index();
259 bool operator!=(
const timerPair & rhs) {
260 return !(*
this == rhs);
270 uint64_t sampleCount;
273 statistic() { reset(); }
274 statistic (statistic
const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {}
276 double getMin()
const {
return minVal; }
277 double getMean()
const {
return meanVal; }
278 double getMax()
const {
return maxVal; }
279 uint64_t getCount()
const {
return sampleCount; }
280 double getSD()
const {
return sqrt(m2/sampleCount); }
281 double getTotal()
const {
return sampleCount*meanVal; }
285 minVal = std::numeric_limits<double>::max();
286 maxVal = -std::numeric_limits<double>::max();
291 void addSample(
double sample);
292 void scale (
double factor);
293 void scaleDown(
double f) { scale (1./f); }
294 statistic & operator+= (statistic
const & other);
296 std::string format(
char unit,
bool total=
false)
const;
305 class timeStat :
public statistic
307 static statInfo timerInfo[];
310 timeStat() : statistic() {}
311 static const char * name(timer_e e) {
return timerInfo[e].name; }
317 static void clearEventFlags() {
318 for(
int i=0;i<TIMER_LAST;i++) {
330 tsc_tick_count startTime;
331 tsc_tick_count pauseStartTime;
332 tsc_tick_count::tsc_interval_t totalPauseTime;
335 explicitTimer () : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() { }
336 explicitTimer (timeStat * s) : stat(s), startTime(), pauseStartTime(0), totalPauseTime() { }
338 void setStat (timeStat *s) { stat = s; }
339 void start(timer_e timerEnumValue);
340 void pause() { pauseStartTime = tsc_tick_count::now(); }
341 void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
342 void stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr =
nullptr);
343 void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; }
348 class blockTimer :
public explicitTimer
350 timer_e timerEnumValue;
352 blockTimer (timeStat * s, timer_e newTimerEnumValue) : timerEnumValue(newTimerEnumValue), explicitTimer(s) { start(timerEnumValue); }
353 ~blockTimer() { stop(timerEnumValue); }
361 class partitionedTimers
364 explicitTimer* timers[EXPLICIT_TIMER_LAST+1];
365 std::vector<timerPair> timer_stack;
368 void add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer);
369 void init(timerPair timer_index);
370 void push(timerPair timer_index);
377 class blockPartitionedTimer
379 partitionedTimers* part_timers;
380 timerPair timer_pair;
382 blockPartitionedTimer(partitionedTimers* pt, timerPair tp) : part_timers(pt), timer_pair(tp) { part_timers->push(timer_pair); }
383 ~blockPartitionedTimer() { part_timers->pop(); }
388 class blockThreadState
393 blockThreadState(
stats_state_e* thread_state_pointer,
stats_state_e new_state) : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
394 *state_pointer = new_state;
396 ~blockThreadState() { *state_pointer = old_state; }
404 static const statInfo counterInfo[];
407 counter() : value(0) {}
408 void increment() { value++; }
409 uint64_t getValue()
const {
return value; }
410 void reset() { value = 0; }
411 static const char * name(counter_e e) {
return counterInfo[e].name; }
448 class kmp_stats_event {
454 kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
455 kmp_stats_event(uint64_t strt, uint64_t stp,
int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
456 inline uint64_t getStart()
const {
return start; }
457 inline uint64_t getStop()
const {
return stop; }
458 inline int getNestLevel()
const {
return nest_level; }
459 inline timer_e getTimerName()
const {
return timer_name; }
488 class kmp_stats_event_vector {
489 kmp_stats_event* events;
492 static const int INIT_SIZE = 1024;
494 kmp_stats_event_vector() {
495 events = (kmp_stats_event*)__kmp_allocate(
sizeof(kmp_stats_event)*INIT_SIZE);
497 allocated_size = INIT_SIZE;
499 ~kmp_stats_event_vector() {}
500 inline void reset() { internal_size = 0; }
501 inline int size()
const {
return internal_size; }
502 void push_back(uint64_t start_time, uint64_t stop_time,
int nest_level, timer_e name) {
504 if(internal_size == allocated_size) {
505 kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(
sizeof(kmp_stats_event)*allocated_size*2);
506 for(i=0;i<internal_size;i++) tmp[i] = events[i];
511 events[internal_size] = kmp_stats_event(start_time, stop_time, nest_level, name);
517 const kmp_stats_event & operator[](
int index)
const {
return events[index]; }
518 kmp_stats_event & operator[](
int index) {
return events[index]; }
519 const kmp_stats_event & at(
int index)
const {
return events[index]; }
520 kmp_stats_event & at(
int index) {
return events[index]; }
552 class kmp_stats_list {
554 timeStat _timers[TIMER_LAST+1];
555 counter _counters[COUNTER_LAST+1];
556 explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1];
557 partitionedTimers _partitionedTimers;
559 kmp_stats_event_vector _event_vector;
560 kmp_stats_list* next;
561 kmp_stats_list* prev;
563 int thread_is_idle_flag;
565 kmp_stats_list() : _nestLevel(0), _event_vector(), next(
this), prev(
this),
566 state(IDLE), thread_is_idle_flag(0) {
567 #define doInit(name,ignore1,ignore2) \ 568 getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \ 569 _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, getExplicitTimer(EXPLICIT_TIMER_##name)); 573 ~kmp_stats_list() { }
574 inline timeStat * getTimer(timer_e idx) {
return &_timers[idx]; }
575 inline counter * getCounter(counter_e idx) {
return &_counters[idx]; }
576 inline explicitTimer * getExplicitTimer(explicit_timer_e idx) {
return &_explicitTimers[idx]; }
577 inline partitionedTimers * getPartitionedTimers() {
return &_partitionedTimers; }
578 inline timeStat * getTimers() {
return _timers; }
579 inline counter * getCounters() {
return _counters; }
580 inline explicitTimer * getExplicitTimers() {
return _explicitTimers; }
581 inline kmp_stats_event_vector & getEventVector() {
return _event_vector; }
582 inline void resetEventVector() { _event_vector.reset(); }
583 inline void incrementNestValue() { _nestLevel++; }
584 inline int getNestValue() {
return _nestLevel; }
585 inline void decrementNestValue() { _nestLevel--; }
586 inline int getGtid()
const {
return gtid; }
587 inline void setGtid(
int newgtid) { gtid = newgtid; }
588 inline void setState(
stats_state_e newstate) { state = newstate; }
591 inline bool isIdle() {
return thread_is_idle_flag==1; }
592 inline void setIdleFlag() { thread_is_idle_flag = 1; }
593 inline void resetIdleFlag() { thread_is_idle_flag = 0; }
594 kmp_stats_list* push_back(
int gtid);
595 inline void push_event(uint64_t start_time, uint64_t stop_time,
int nest_level, timer_e name) {
596 _event_vector.push_back(start_time, stop_time, nest_level, name);
600 kmp_stats_list::iterator begin();
601 kmp_stats_list::iterator end();
605 friend kmp_stats_list::iterator kmp_stats_list::begin();
606 friend kmp_stats_list::iterator kmp_stats_list::end();
610 iterator operator++();
611 iterator operator++(
int dummy);
612 iterator operator--();
613 iterator operator--(
int dummy);
614 bool operator!=(
const iterator & rhs);
615 bool operator==(
const iterator & rhs);
616 kmp_stats_list* operator*()
const;
649 class kmp_stats_output_module {
659 std::string outputFileName;
660 static const char* eventsFileName;
661 static const char* plotFileName;
662 static int printPerThreadFlag;
663 static int printPerThreadEventsFlag;
664 static const rgb_color globalColorArray[];
665 static rgb_color timerColorInfo[];
668 static void setupEventColors();
669 static void printPloticusFile();
670 static void printHeaderInfo(FILE *statsOut);
671 static void printTimerStats(FILE *statsOut, statistic
const * theStats, statistic
const * totalStats);
672 static void printCounterStats(FILE *statsOut, statistic
const * theStats);
673 static void printCounters(FILE * statsOut, counter
const * theCounters);
674 static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents,
int gtid);
675 static rgb_color getEventColor(timer_e e) {
return timerColorInfo[e]; }
676 static void windupExplicitTimers();
677 bool eventPrintingEnabled()
const {
return printPerThreadEventsFlag; }
680 kmp_stats_output_module() { init(); }
681 void outputStats(
const char* heading);
687 void __kmp_stats_init();
688 void __kmp_stats_fini();
689 void __kmp_reset_stats();
690 void __kmp_output_stats(
const char *);
691 void __kmp_accumulate_stats_at_exit(
void);
693 extern __thread kmp_stats_list* __kmp_stats_thread_ptr;
695 extern kmp_stats_list* __kmp_stats_list;
697 extern kmp_tas_lock_t __kmp_stats_lock;
699 extern tsc_tick_count __kmp_stats_start_time;
701 extern kmp_stats_output_module __kmp_stats_output;
722 #define KMP_TIME_BLOCK(name) \ 723 blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name) 735 #define KMP_COUNT_VALUE(name, value) \ 736 __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) 747 #define KMP_COUNT_BLOCK(name) \ 748 __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() 761 #define KMP_START_EXPLICIT_TIMER(name) \ 762 __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name) 775 #define KMP_STOP_EXPLICIT_TIMER(name) \ 776 __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name) 792 #define KMP_OUTPUT_STATS(heading_string) \ 793 __kmp_output_stats(heading_string) 802 #define KMP_INIT_PARTITIONED_TIMERS(name) \ 803 __kmp_stats_thread_ptr->getPartitionedTimers()->init(timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) 805 #define KMP_TIME_PARTITIONED_BLOCK(name) \ 806 blockPartitionedTimer __PBLOCKTIME__(__kmp_stats_thread_ptr->getPartitionedTimers(), \ 807 timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) 809 #define KMP_PUSH_PARTITIONED_TIMER(name) \ 810 __kmp_stats_thread_ptr->getPartitionedTimers()->push(timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) 812 #define KMP_POP_PARTITIONED_TIMER() \ 813 __kmp_stats_thread_ptr->getPartitionedTimers()->pop() 815 #define KMP_SET_THREAD_STATE(state_name) \ 816 __kmp_stats_thread_ptr->setState(state_name) 818 #define KMP_GET_THREAD_STATE() \ 819 __kmp_stats_thread_ptr->getState() 821 #define KMP_SET_THREAD_STATE_BLOCK(state_name) \ 822 blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), state_name) 831 #define KMP_RESET_STATS() __kmp_reset_stats() 833 #if (KMP_DEVELOPER_STATS) 834 # define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n) 835 # define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v) 836 # define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) 837 # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) 838 # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) 839 # define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n) 842 # define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) 843 # define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) 844 # define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) 845 # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 846 # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 847 # define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) 850 #else // KMP_STATS_ENABLED 853 #define KMP_TIME_BLOCK(n) ((void)0) 854 #define KMP_COUNT_VALUE(n,v) ((void)0) 855 #define KMP_COUNT_BLOCK(n) ((void)0) 856 #define KMP_START_EXPLICIT_TIMER(n) ((void)0) 857 #define KMP_STOP_EXPLICIT_TIMER(n) ((void)0) 859 #define KMP_OUTPUT_STATS(heading_string) ((void)0) 860 #define KMP_RESET_STATS() ((void)0) 862 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) 863 #define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) 864 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) 865 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 866 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 867 #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) 868 #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) 869 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) 870 #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) 871 #define KMP_POP_PARTITIONED_TIMER() ((void)0) 872 #define KMP_SET_THREAD_STATE(state_name) ((void)0) 873 #define KMP_GET_THREAD_STATE() ((void)0) 874 #define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0) 875 #endif // KMP_STATS_ENABLED 877 #endif // KMP_STATS_H statistic is valid only for master
statistic is valid only for non-master threads
do not show a TOTAL_aggregation for this statistic
statistic can be logged on the event timeline when KMP_STATS_EVENTS is on (valid only for timers) ...
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)
Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
statistic doesn't need units printed next to it in output
stats_flags_e
flags to describe the statistic (timer or counter)
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.
stats_state_e
the states which a thread can be in