17 #include "kmp_config.h" 18 #include "kmp_debug.h" 29 #include "kmp_stats_timing.h" 40 #define KMP_DEVELOPER_STATS 0 43 #define KMP_STATS_HIST 0 95 #define KMP_FOREACH_COUNTER(macro, arg) \ 96 macro(OMP_PARALLEL,stats_flags_e::onlyInMaster|stats_flags_e::noTotal,arg) \ 97 macro(OMP_NESTED_PARALLEL, 0, arg) \ 98 macro(OMP_LOOP_STATIC, 0, arg) \ 99 macro(OMP_LOOP_STATIC_STEAL, 0, arg) \ 100 macro(OMP_LOOP_DYNAMIC, 0, arg) \ 101 macro(OMP_DISTRIBUTE, 0, arg) \ 102 macro(OMP_BARRIER, 0, arg) \ 103 macro(OMP_CRITICAL, 0, arg) \ 104 macro(OMP_SINGLE, 0, arg) \ 105 macro(OMP_MASTER, 0, arg) \ 106 macro(OMP_TEAMS, 0, arg) \ 107 macro(OMP_set_lock, 0, arg) \ 108 macro(OMP_test_lock, 0, arg) \ 109 macro(REDUCE_wait, 0, arg) \ 110 macro(REDUCE_nowait, 0, arg) \ 111 macro(OMP_TASKYIELD, 0, arg) \ 112 macro(OMP_TASKLOOP, 0, arg) \ 113 macro(TASK_executed, 0, arg) \ 114 macro(TASK_cancelled, 0, arg) \ 115 macro(TASK_stolen, 0, arg) 137 #define KMP_FOREACH_TIMER(macro, arg) \ 138 macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \ 139 macro (OMP_parallel, stats_flags_e::logEvent, arg) \ 140 macro (OMP_parallel_overhead, stats_flags_e::logEvent, arg) \ 141 macro (OMP_loop_static, 0, arg) \ 142 macro (OMP_loop_static_scheduling, 0, arg) \ 143 macro (OMP_loop_dynamic, 0, arg) \ 144 macro (OMP_loop_dynamic_scheduling, 0, arg) \ 145 macro (OMP_critical, 0, arg) \ 146 macro (OMP_critical_wait, 0, arg) \ 147 macro (OMP_single, 0, arg) \ 148 macro (OMP_master, 0, arg) \ 149 macro (OMP_task_immediate, 0, arg) \ 150 macro (OMP_task_taskwait, 0, arg) \ 151 macro (OMP_task_taskyield, 0, arg) \ 152 macro (OMP_task_taskgroup, 0, arg) \ 153 macro (OMP_task_join_bar, 0, arg) \ 154 macro (OMP_task_plain_bar, 0, arg) \ 155 macro (OMP_taskloop_scheduling, 0, arg) \ 156 macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \ 157 macro (OMP_idle, stats_flags_e::logEvent, arg) \ 158 macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \ 159 macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \ 160 macro (OMP_serial, stats_flags_e::logEvent, arg) \ 161 macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, \ 163 macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \ 165 macro (OMP_loop_static_iterations, \ 166 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 167 macro (OMP_loop_dynamic_iterations, \ 168 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 169 KMP_FOREACH_DEVELOPER_TIMER(macro, arg) 220 #if (KMP_DEVELOPER_STATS) 237 #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ 238 macro(KMP_fork_call, 0, arg) \ 239 macro(KMP_join_call, 0, arg) \ 240 macro(KMP_end_split_barrier, 0, arg) \ 241 macro(KMP_hier_gather, 0, arg) \ 242 macro(KMP_hier_release, 0, arg) \ 243 macro(KMP_hyper_gather, 0, arg) \ 244 macro(KMP_hyper_release, 0, arg) \ 245 macro(KMP_linear_gather, 0, arg) \ 246 macro(KMP_linear_release, 0, arg) \ 247 macro(KMP_tree_gather, 0, arg) \ 248 macro(KMP_tree_release, 0, arg) \ 249 macro(USER_resume, 0, arg) \ 250 macro(USER_suspend, 0, arg) \ 251 macro(KMP_allocate_team, 0, arg) \ 252 macro(KMP_setup_icv_copy, 0, arg) \ 253 macro(USER_icv_copy, 0, arg) \ 254 macro (FOR_static_steal_stolen, \ 255 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 256 macro (FOR_static_steal_chunks, \ 257 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) 259 #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) 282 #define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg) 284 #define ENUMERATE(name, ignore, prefix) prefix##name, 285 enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST };
287 enum explicit_timer_e {
314 uint32_t KMP_ALIGN_CACHE zeroCount;
320 static double binMax[numBins];
326 uint64_t t = zeroCount;
327 for (
int i = 0; i < numBins; i++)
329 KMP_DEBUG_ASSERT(t == _total);
332 void check()
const {}
336 logHistogram() { reset(); }
338 logHistogram(logHistogram
const &o) {
339 for (
int i = 0; i < numBins; i++)
348 for (
int i = 0; i < numBins; i++) {
357 uint32_t count(
int b)
const {
return bins[b + logOffset].count; }
358 double total(
int b)
const {
return bins[b + logOffset].total; }
359 static uint32_t findBin(
double sample);
361 logHistogram &operator+=(logHistogram
const &o) {
362 zeroCount += o.zeroCount;
363 for (
int i = 0; i < numBins; i++) {
364 bins[i].count += o.bins[i].count;
365 bins[i].total += o.bins[i].total;
375 void addSample(
double sample);
379 std::string format(
char)
const;
383 double KMP_ALIGN_CACHE minVal;
387 uint64_t sampleCount;
393 statistic(
bool doHist =
bool(KMP_STATS_HIST)) {
395 collectingHist = doHist;
397 statistic(statistic
const &o)
398 : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2),
399 sampleCount(o.sampleCount), offset(o.offset),
400 collectingHist(o.collectingHist), hist(o.hist) {}
401 statistic(
double minv,
double maxv,
double meanv, uint64_t sc,
double sd)
402 : minVal(minv), maxVal(maxv), meanVal(meanv), m2(sd * sd * sc),
403 sampleCount(sc), offset(0.0), collectingHist(
false) {}
404 bool haveHist()
const {
return collectingHist; }
405 double getMin()
const {
return minVal; }
406 double getMean()
const {
return meanVal; }
407 double getMax()
const {
return maxVal; }
408 uint64_t getCount()
const {
return sampleCount; }
409 double getSD()
const {
return sqrt(m2 / sampleCount); }
410 double getTotal()
const {
return sampleCount * meanVal; }
411 logHistogram
const *getHist()
const {
return &hist; }
412 void setOffset(
double d) { offset = d; }
415 minVal = std::numeric_limits<double>::max();
423 void addSample(
double sample);
424 void scale(
double factor);
425 void scaleDown(
double f) { scale(1. / f); }
426 void forceCount(uint64_t count) { sampleCount = count; }
427 statistic &operator+=(statistic
const &other);
429 std::string format(
char unit,
bool total =
false)
const;
430 std::string formatHist(
char unit)
const {
return hist.format(unit); }
438 class timeStat :
public statistic {
439 static statInfo timerInfo[];
442 timeStat() : statistic() {}
443 static const char *name(timer_e e) {
return timerInfo[e].name; }
444 static bool noTotal(timer_e e) {
447 static bool masterOnly(timer_e e) {
450 static bool workerOnly(timer_e e) {
453 static bool noUnits(timer_e e) {
459 static void clearEventFlags() {
460 for (
int i = 0; i < TIMER_LAST; i++) {
469 class explicitTimer {
471 timer_e timerEnumValue;
472 tsc_tick_count startTime;
473 tsc_tick_count pauseStartTime;
474 tsc_tick_count::tsc_interval_t totalPauseTime;
477 explicitTimer(timeStat *s, timer_e te)
478 : stat(s), timerEnumValue(te), startTime(), pauseStartTime(0),
482 void start(tsc_tick_count tick);
483 void pause(tsc_tick_count tick) { pauseStartTime = tick; }
484 void resume(tsc_tick_count tick) {
485 totalPauseTime += (tick - pauseStartTime);
487 void stop(tsc_tick_count tick, kmp_stats_list *stats_ptr =
nullptr);
493 timer_e get_type()
const {
return timerEnumValue; }
502 class partitionedTimers {
504 std::vector<explicitTimer> timer_stack;
508 void init(explicitTimer timer);
509 void exchange(explicitTimer timer);
510 void push(explicitTimer timer);
517 class blockPartitionedTimer {
518 partitionedTimers *part_timers;
521 blockPartitionedTimer(partitionedTimers *pt, explicitTimer timer)
523 part_timers->push(timer);
525 ~blockPartitionedTimer() { part_timers->pop(); }
531 class blockThreadState {
537 : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
538 *state_pointer = new_state;
540 ~blockThreadState() { *state_pointer = old_state; }
548 static const statInfo counterInfo[];
551 counter() : value(0) {}
552 void increment() { value++; }
553 uint64_t getValue()
const {
return value; }
554 void reset() { value = 0; }
555 static const char *name(counter_e e) {
return counterInfo[e].name; }
556 static bool masterOnly(counter_e e) {
594 class kmp_stats_event {
602 : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
603 kmp_stats_event(uint64_t strt, uint64_t stp,
int nst, timer_e nme)
604 : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
605 inline uint64_t getStart()
const {
return start; }
606 inline uint64_t getStop()
const {
return stop; }
607 inline int getNestLevel()
const {
return nest_level; }
608 inline timer_e getTimerName()
const {
return timer_name; }
637 class kmp_stats_event_vector {
638 kmp_stats_event *events;
641 static const int INIT_SIZE = 1024;
644 kmp_stats_event_vector() {
646 (kmp_stats_event *)__kmp_allocate(
sizeof(kmp_stats_event) * INIT_SIZE);
648 allocated_size = INIT_SIZE;
650 ~kmp_stats_event_vector() {}
651 inline void reset() { internal_size = 0; }
652 inline int size()
const {
return internal_size; }
653 void push_back(uint64_t start_time, uint64_t stop_time,
int nest_level,
656 if (internal_size == allocated_size) {
657 kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate(
658 sizeof(kmp_stats_event) * allocated_size * 2);
659 for (i = 0; i < internal_size; i++)
665 events[internal_size] =
666 kmp_stats_event(start_time, stop_time, nest_level, name);
672 const kmp_stats_event &operator[](
int index)
const {
return events[index]; }
673 kmp_stats_event &operator[](
int index) {
return events[index]; }
674 const kmp_stats_event &at(
int index)
const {
return events[index]; }
675 kmp_stats_event &at(
int index) {
return events[index]; }
705 class kmp_stats_list {
707 timeStat _timers[TIMER_LAST + 1];
708 counter _counters[COUNTER_LAST + 1];
709 explicitTimer thread_life_timer;
710 partitionedTimers _partitionedTimers;
712 kmp_stats_event_vector _event_vector;
713 kmp_stats_list *next;
714 kmp_stats_list *prev;
716 int thread_is_idle_flag;
720 : thread_life_timer(&_timers[TIMER_OMP_worker_thread_life],
721 TIMER_OMP_worker_thread_life),
722 _nestLevel(0), _event_vector(), next(
this), prev(
this), state(IDLE),
723 thread_is_idle_flag(0) {}
725 inline timeStat *getTimer(timer_e idx) {
return &_timers[idx]; }
726 inline counter *getCounter(counter_e idx) {
return &_counters[idx]; }
727 inline partitionedTimers *getPartitionedTimers() {
728 return &_partitionedTimers;
730 inline timeStat *getTimers() {
return _timers; }
731 inline counter *getCounters() {
return _counters; }
732 inline kmp_stats_event_vector &getEventVector() {
return _event_vector; }
733 inline void startLife() { thread_life_timer.start(tsc_tick_count::now()); }
734 inline void endLife() { thread_life_timer.stop(tsc_tick_count::now(),
this); }
735 inline void resetEventVector() { _event_vector.reset(); }
736 inline void incrementNestValue() { _nestLevel++; }
737 inline int getNestValue() {
return _nestLevel; }
738 inline void decrementNestValue() { _nestLevel--; }
739 inline int getGtid()
const {
return gtid; }
740 inline void setGtid(
int newgtid) { gtid = newgtid; }
741 inline void setState(
stats_state_e newstate) { state = newstate; }
744 inline bool isIdle() {
return thread_is_idle_flag == 1; }
745 inline void setIdleFlag() { thread_is_idle_flag = 1; }
746 inline void resetIdleFlag() { thread_is_idle_flag = 0; }
747 kmp_stats_list *push_back(
int gtid);
748 inline void push_event(uint64_t start_time, uint64_t stop_time,
749 int nest_level, timer_e name) {
750 _event_vector.push_back(start_time, stop_time, nest_level, name);
754 kmp_stats_list::iterator begin();
755 kmp_stats_list::iterator end();
759 friend kmp_stats_list::iterator kmp_stats_list::begin();
760 friend kmp_stats_list::iterator kmp_stats_list::end();
765 iterator operator++();
766 iterator operator++(
int dummy);
767 iterator operator--();
768 iterator operator--(
int dummy);
769 bool operator!=(
const iterator &rhs);
770 bool operator==(
const iterator &rhs);
771 kmp_stats_list *operator*()
const;
804 class kmp_stats_output_module {
814 std::string outputFileName;
815 static const char *eventsFileName;
816 static const char *plotFileName;
817 static int printPerThreadFlag;
818 static int printPerThreadEventsFlag;
819 static const rgb_color globalColorArray[];
820 static rgb_color timerColorInfo[];
823 static void setupEventColors();
824 static void printPloticusFile();
825 static void printHeaderInfo(FILE *statsOut);
826 static void printTimerStats(FILE *statsOut, statistic
const *theStats,
827 statistic
const *totalStats);
828 static void printCounterStats(FILE *statsOut, statistic
const *theStats);
829 static void printCounters(FILE *statsOut, counter
const *theCounters);
830 static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents,
832 static rgb_color getEventColor(timer_e e) {
return timerColorInfo[e]; }
833 static void windupExplicitTimers();
834 bool eventPrintingEnabled()
const {
return printPerThreadEventsFlag; }
837 kmp_stats_output_module() { init(); }
838 void outputStats(
const char *heading);
844 void __kmp_stats_init();
845 void __kmp_stats_fini();
846 void __kmp_reset_stats();
847 void __kmp_output_stats(
const char *);
848 void __kmp_accumulate_stats_at_exit(
void);
850 extern KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr;
852 extern kmp_stats_list *__kmp_stats_list;
854 extern kmp_tas_lock_t __kmp_stats_lock;
856 extern tsc_tick_count __kmp_stats_start_time;
858 extern kmp_stats_output_module __kmp_stats_output;
877 #define KMP_COUNT_VALUE(name, value) \ 878 __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) 890 #define KMP_COUNT_BLOCK(name) \ 891 __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() 910 #define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string) 919 #define KMP_INIT_PARTITIONED_TIMERS(name) \ 920 __kmp_stats_thread_ptr->getPartitionedTimers()->init(explicitTimer( \ 921 __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)) 923 #define KMP_TIME_PARTITIONED_BLOCK(name) \ 924 blockPartitionedTimer __PBLOCKTIME__( \ 925 __kmp_stats_thread_ptr->getPartitionedTimers(), \ 926 explicitTimer(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \ 929 #define KMP_PUSH_PARTITIONED_TIMER(name) \ 930 __kmp_stats_thread_ptr->getPartitionedTimers()->push(explicitTimer( \ 931 __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)) 933 #define KMP_POP_PARTITIONED_TIMER() \ 934 __kmp_stats_thread_ptr->getPartitionedTimers()->pop() 936 #define KMP_EXCHANGE_PARTITIONED_TIMER(name) \ 937 __kmp_stats_thread_ptr->getPartitionedTimers()->exchange(explicitTimer( \ 938 __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)) 940 #define KMP_SET_THREAD_STATE(state_name) \ 941 __kmp_stats_thread_ptr->setState(state_name) 943 #define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState() 945 #define KMP_SET_THREAD_STATE_BLOCK(state_name) \ 946 blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \ 956 #define KMP_RESET_STATS() __kmp_reset_stats() 958 #if (KMP_DEVELOPER_STATS) 959 #define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n) 960 #define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v) 961 #define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) 962 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) 963 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) 964 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n) 967 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) 968 #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) 969 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) 970 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 971 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 972 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) 975 #else // KMP_STATS_ENABLED 978 #define KMP_TIME_BLOCK(n) ((void)0) 979 #define KMP_COUNT_VALUE(n, v) ((void)0) 980 #define KMP_COUNT_BLOCK(n) ((void)0) 981 #define KMP_START_EXPLICIT_TIMER(n) ((void)0) 982 #define KMP_STOP_EXPLICIT_TIMER(n) ((void)0) 984 #define KMP_OUTPUT_STATS(heading_string) ((void)0) 985 #define KMP_RESET_STATS() ((void)0) 987 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) 988 #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) 989 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) 990 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 991 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 992 #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) 993 #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) 994 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) 995 #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) 996 #define KMP_POP_PARTITIONED_TIMER() ((void)0) 997 #define KMP_SET_THREAD_STATE(state_name) ((void)0) 998 #define KMP_GET_THREAD_STATE() ((void)0) 999 #define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0) 1000 #endif // KMP_STATS_ENABLED 1002 #endif // KMP_STATS_H statistic is valid only for master
statistic is valid only for non-master threads
do not show a TOTAL_aggregation for this statistic
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)
Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
statistic doesn't need units printed next to it
stats_flags_e
flags to describe the statistic (timer or counter)
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.
stats_state_e
the states which a thread can be in