LLVM OpenMP* Runtime Library
z_Windows_NT_util.c
1 /*
2  * z_Windows_NT_util.c -- platform specific routines.
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_i18n.h"
19 #include "kmp_io.h"
20 #include "kmp_wait_release.h"
21 
22 
23 
24 /* ----------------------------------------------------------------------------------- */
25 /* ----------------------------------------------------------------------------------- */
26 
27 /* This code is related to NtQuerySystemInformation() function. This function
28  is used in the Load balance algorithm for OMP_DYNAMIC=true to find the
29  number of running threads in the system. */
30 
31 #include <ntstatus.h>
32 #include <ntsecapi.h> // UNICODE_STRING
33 
34 enum SYSTEM_INFORMATION_CLASS {
35  SystemProcessInformation = 5
36 }; // SYSTEM_INFORMATION_CLASS
37 
38 struct CLIENT_ID {
39  HANDLE UniqueProcess;
40  HANDLE UniqueThread;
41 }; // struct CLIENT_ID
42 
43 enum THREAD_STATE {
44  StateInitialized,
45  StateReady,
46  StateRunning,
47  StateStandby,
48  StateTerminated,
49  StateWait,
50  StateTransition,
51  StateUnknown
52 }; // enum THREAD_STATE
53 
54 struct VM_COUNTERS {
55  SIZE_T PeakVirtualSize;
56  SIZE_T VirtualSize;
57  ULONG PageFaultCount;
58  SIZE_T PeakWorkingSetSize;
59  SIZE_T WorkingSetSize;
60  SIZE_T QuotaPeakPagedPoolUsage;
61  SIZE_T QuotaPagedPoolUsage;
62  SIZE_T QuotaPeakNonPagedPoolUsage;
63  SIZE_T QuotaNonPagedPoolUsage;
64  SIZE_T PagefileUsage;
65  SIZE_T PeakPagefileUsage;
66  SIZE_T PrivatePageCount;
67 }; // struct VM_COUNTERS
68 
69 struct SYSTEM_THREAD {
70  LARGE_INTEGER KernelTime;
71  LARGE_INTEGER UserTime;
72  LARGE_INTEGER CreateTime;
73  ULONG WaitTime;
74  LPVOID StartAddress;
75  CLIENT_ID ClientId;
76  DWORD Priority;
77  LONG BasePriority;
78  ULONG ContextSwitchCount;
79  THREAD_STATE State;
80  ULONG WaitReason;
81 }; // SYSTEM_THREAD
82 
83 KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, KernelTime ) == 0 );
84 #if KMP_ARCH_X86
85  KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 28 );
86  KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State ) == 52 );
87 #else
88  KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 32 );
89  KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State ) == 68 );
90 #endif
91 
92 struct SYSTEM_PROCESS_INFORMATION {
93  ULONG NextEntryOffset;
94  ULONG NumberOfThreads;
95  LARGE_INTEGER Reserved[ 3 ];
96  LARGE_INTEGER CreateTime;
97  LARGE_INTEGER UserTime;
98  LARGE_INTEGER KernelTime;
99  UNICODE_STRING ImageName;
100  DWORD BasePriority;
101  HANDLE ProcessId;
102  HANDLE ParentProcessId;
103  ULONG HandleCount;
104  ULONG Reserved2[ 2 ];
105  VM_COUNTERS VMCounters;
106  IO_COUNTERS IOCounters;
107  SYSTEM_THREAD Threads[ 1 ];
108 }; // SYSTEM_PROCESS_INFORMATION
109 typedef SYSTEM_PROCESS_INFORMATION * PSYSTEM_PROCESS_INFORMATION;
110 
111 KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, NextEntryOffset ) == 0 );
112 KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, CreateTime ) == 32 );
113 KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ImageName ) == 56 );
114 #if KMP_ARCH_X86
115  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId ) == 68 );
116  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount ) == 76 );
117  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters ) == 88 );
118  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters ) == 136 );
119  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads ) == 184 );
120 #else
121  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId ) == 80 );
122  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount ) == 96 );
123  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters ) == 112 );
124  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters ) == 208 );
125  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads ) == 256 );
126 #endif
127 
128 typedef NTSTATUS (NTAPI *NtQuerySystemInformation_t)( SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG );
129 NtQuerySystemInformation_t NtQuerySystemInformation = NULL;
130 
131 HMODULE ntdll = NULL;
132 
133 /* End of NtQuerySystemInformation()-related code */
134 
135 #if KMP_GROUP_AFFINITY
136 static HMODULE kernel32 = NULL;
137 #endif /* KMP_GROUP_AFFINITY */
138 
139 /* ----------------------------------------------------------------------------------- */
140 /* ----------------------------------------------------------------------------------- */
141 
142 #if KMP_HANDLE_SIGNALS
143  typedef void (* sig_func_t )( int );
144  static sig_func_t __kmp_sighldrs[ NSIG ];
145  static int __kmp_siginstalled[ NSIG ];
146 #endif
147 
148 static HANDLE __kmp_monitor_ev;
149 static kmp_int64 __kmp_win32_time;
150 double __kmp_win32_tick;
151 
152 int __kmp_init_runtime = FALSE;
153 CRITICAL_SECTION __kmp_win32_section;
154 
155 void
156 __kmp_win32_mutex_init( kmp_win32_mutex_t *mx )
157 {
158  InitializeCriticalSection( & mx->cs );
159 #if USE_ITT_BUILD
160  __kmp_itt_system_object_created( & mx->cs, "Critical Section" );
161 #endif /* USE_ITT_BUILD */
162 }
163 
164 void
165 __kmp_win32_mutex_destroy( kmp_win32_mutex_t *mx )
166 {
167  DeleteCriticalSection( & mx->cs );
168 }
169 
170 void
171 __kmp_win32_mutex_lock( kmp_win32_mutex_t *mx )
172 {
173  EnterCriticalSection( & mx->cs );
174 }
175 
176 void
177 __kmp_win32_mutex_unlock( kmp_win32_mutex_t *mx )
178 {
179  LeaveCriticalSection( & mx->cs );
180 }
181 
182 void
183 __kmp_win32_cond_init( kmp_win32_cond_t *cv )
184 {
185  cv->waiters_count_ = 0;
186  cv->wait_generation_count_ = 0;
187  cv->release_count_ = 0;
188 
189  /* Initialize the critical section */
190  __kmp_win32_mutex_init( & cv->waiters_count_lock_ );
191 
192  /* Create a manual-reset event. */
193  cv->event_ = CreateEvent( NULL, // no security
194  TRUE, // manual-reset
195  FALSE, // non-signaled initially
196  NULL ); // unnamed
197 #if USE_ITT_BUILD
198  __kmp_itt_system_object_created( cv->event_, "Event" );
199 #endif /* USE_ITT_BUILD */
200 }
201 
202 void
203 __kmp_win32_cond_destroy( kmp_win32_cond_t *cv )
204 {
205  __kmp_win32_mutex_destroy( & cv->waiters_count_lock_ );
206  __kmp_free_handle( cv->event_ );
207  memset( cv, '\0', sizeof( *cv ) );
208 }
209 
210 /* TODO associate cv with a team instead of a thread so as to optimize
211  * the case where we wake up a whole team */
212 
213 void
214 __kmp_win32_cond_wait( kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx, kmp_info_t *th, int need_decrease_load )
215 {
216  int my_generation;
217  int last_waiter;
218 
219  /* Avoid race conditions */
220  __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
221 
222  /* Increment count of waiters */
223  cv->waiters_count_++;
224 
225  /* Store current generation in our activation record. */
226  my_generation = cv->wait_generation_count_;
227 
228  __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ );
229  __kmp_win32_mutex_unlock( mx );
230 
231 
232  for (;;) {
233  int wait_done;
234 
235  /* Wait until the event is signaled */
236  WaitForSingleObject( cv->event_, INFINITE );
237 
238  __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
239 
240  /* Exit the loop when the <cv->event_> is signaled and
241  * there are still waiting threads from this <wait_generation>
242  * that haven't been released from this wait yet. */
243  wait_done = ( cv->release_count_ > 0 ) &&
244  ( cv->wait_generation_count_ != my_generation );
245 
246  __kmp_win32_mutex_unlock( &cv->waiters_count_lock_);
247 
248  /* there used to be a semicolon after the if statement,
249  * it looked like a bug, so i removed it */
250  if( wait_done )
251  break;
252  }
253 
254  __kmp_win32_mutex_lock( mx );
255  __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
256 
257  cv->waiters_count_--;
258  cv->release_count_--;
259 
260  last_waiter = ( cv->release_count_ == 0 );
261 
262  __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ );
263 
264  if( last_waiter ) {
265  /* We're the last waiter to be notified, so reset the manual event. */
266  ResetEvent( cv->event_ );
267  }
268 }
269 
270 void
271 __kmp_win32_cond_broadcast( kmp_win32_cond_t *cv )
272 {
273  __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
274 
275  if( cv->waiters_count_ > 0 ) {
276  SetEvent( cv->event_ );
277  /* Release all the threads in this generation. */
278 
279  cv->release_count_ = cv->waiters_count_;
280 
281  /* Start a new generation. */
282  cv->wait_generation_count_++;
283  }
284 
285  __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ );
286 }
287 
288 void
289 __kmp_win32_cond_signal( kmp_win32_cond_t *cv )
290 {
291  __kmp_win32_cond_broadcast( cv );
292 }
293 
294 /* ------------------------------------------------------------------------ */
295 /* ------------------------------------------------------------------------ */
296 
297 void
298 __kmp_enable( int new_state )
299 {
300  if (__kmp_init_runtime)
301  LeaveCriticalSection( & __kmp_win32_section );
302 }
303 
304 void
305 __kmp_disable( int *old_state )
306 {
307  *old_state = 0;
308 
309  if (__kmp_init_runtime)
310  EnterCriticalSection( & __kmp_win32_section );
311 }
312 
313 void
314 __kmp_suspend_initialize( void )
315 {
316  /* do nothing */
317 }
318 
319 static void
320 __kmp_suspend_initialize_thread( kmp_info_t *th )
321 {
322  if ( ! TCR_4( th->th.th_suspend_init ) ) {
323  /* this means we haven't initialized the suspension pthread objects for this thread
324  in this instance of the process */
325  __kmp_win32_cond_init( &th->th.th_suspend_cv );
326  __kmp_win32_mutex_init( &th->th.th_suspend_mx );
327  TCW_4( th->th.th_suspend_init, TRUE );
328  }
329 }
330 
331 void
332 __kmp_suspend_uninitialize_thread( kmp_info_t *th )
333 {
334  if ( TCR_4( th->th.th_suspend_init ) ) {
335  /* this means we have initialize the suspension pthread objects for this thread
336  in this instance of the process */
337  __kmp_win32_cond_destroy( & th->th.th_suspend_cv );
338  __kmp_win32_mutex_destroy( & th->th.th_suspend_mx );
339  TCW_4( th->th.th_suspend_init, FALSE );
340  }
341 }
342 
343 /* This routine puts the calling thread to sleep after setting the
344  * sleep bit for the indicated flag variable to true.
345  */
346 template <class C>
347 static inline void __kmp_suspend_template( int th_gtid, C *flag )
348 {
349  kmp_info_t *th = __kmp_threads[th_gtid];
350  int status;
351  typename C::flag_t old_spin;
352 
353  KF_TRACE( 30, ("__kmp_suspend_template: T#%d enter for flag's loc(%p)\n", th_gtid, flag->get() ) );
354 
355  __kmp_suspend_initialize_thread( th );
356  __kmp_win32_mutex_lock( &th->th.th_suspend_mx );
357 
358  KF_TRACE( 10, ( "__kmp_suspend_template: T#%d setting sleep bit for flag's loc(%p)\n",
359  th_gtid, flag->get() ) );
360 
361  /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread
362  gets called first?
363  */
364  old_spin = flag->set_sleeping();
365 
366  KF_TRACE( 5, ( "__kmp_suspend_template: T#%d set sleep bit for flag's loc(%p)==%d\n",
367  th_gtid, flag->get(), *(flag->get()) ) );
368 
369  if ( flag->done_check_val(old_spin) ) {
370  old_spin = flag->unset_sleeping();
371  KF_TRACE( 5, ( "__kmp_suspend_template: T#%d false alarm, reset sleep bit for flag's loc(%p)\n",
372  th_gtid, flag->get()) );
373  } else {
374 #ifdef DEBUG_SUSPEND
375  __kmp_suspend_count++;
376 #endif
377  /* Encapsulate in a loop as the documentation states that this may
378  * "with low probability" return when the condition variable has
379  * not been signaled or broadcast
380  */
381  int deactivated = FALSE;
382  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
383  while ( flag->is_sleeping() ) {
384  KF_TRACE( 15, ("__kmp_suspend_template: T#%d about to perform kmp_win32_cond_wait()\n",
385  th_gtid ) );
386  // Mark the thread as no longer active (only in the first iteration of the loop).
387  if ( ! deactivated ) {
388  th->th.th_active = FALSE;
389  if ( th->th.th_active_in_pool ) {
390  th->th.th_active_in_pool = FALSE;
391  KMP_TEST_THEN_DEC32(
392  (kmp_int32 *) &__kmp_thread_pool_active_nth );
393  KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
394  }
395  deactivated = TRUE;
396 
397 
398  __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 );
399  }
400  else {
401  __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 );
402  }
403 
404 #ifdef KMP_DEBUG
405  if( flag->is_sleeping() ) {
406  KF_TRACE( 100, ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid ));
407  }
408 #endif /* KMP_DEBUG */
409 
410  } // while
411 
412  // Mark the thread as active again (if it was previous marked as inactive)
413  if ( deactivated ) {
414  th->th.th_active = TRUE;
415  if ( TCR_4(th->th.th_in_pool) ) {
416  KMP_TEST_THEN_INC32(
417  (kmp_int32 *) &__kmp_thread_pool_active_nth );
418  th->th.th_active_in_pool = TRUE;
419  }
420  }
421  }
422 
423 
424  __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
425 
426  KF_TRACE( 30, ("__kmp_suspend_template: T#%d exit\n", th_gtid ) );
427 }
428 
429 void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) {
430  __kmp_suspend_template(th_gtid, flag);
431 }
432 void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) {
433  __kmp_suspend_template(th_gtid, flag);
434 }
435 void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
436  __kmp_suspend_template(th_gtid, flag);
437 }
438 
439 
440 /* This routine signals the thread specified by target_gtid to wake up
441  * after setting the sleep bit indicated by the flag argument to FALSE
442  */
443 template <class C>
444 static inline void __kmp_resume_template( int target_gtid, C *flag )
445 {
446  kmp_info_t *th = __kmp_threads[target_gtid];
447  int status;
448 
449 #ifdef KMP_DEBUG
450  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
451 #endif
452 
453  KF_TRACE( 30, ( "__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", gtid, target_gtid ) );
454 
455  __kmp_suspend_initialize_thread( th );
456  __kmp_win32_mutex_lock( &th->th.th_suspend_mx );
457 
458  if (!flag) { // coming from __kmp_null_resume_wrapper
459  flag = (C *)th->th.th_sleep_loc;
460  }
461 
462  // First, check if the flag is null or its type has changed. If so, someone else woke it up.
463  if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type simply shows what flag was cast to
464  KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p)\n",
465  gtid, target_gtid, NULL ) );
466  __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
467  return;
468  }
469  else {
470  typename C::flag_t old_spin = flag->unset_sleeping();
471  if ( !flag->is_sleeping_val(old_spin) ) {
472  KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p): "
473  "%u => %u\n",
474  gtid, target_gtid, flag->get(), old_spin, *(flag->get()) ) );
475  __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
476  return;
477  }
478  }
479  TCW_PTR(th->th.th_sleep_loc, NULL);
480 
481  KF_TRACE( 5, ( "__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep bit for flag's loc(%p)\n",
482  gtid, target_gtid, flag->get() ) );
483 
484 
485  __kmp_win32_cond_signal( &th->th.th_suspend_cv );
486  __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
487 
488  KF_TRACE( 30, ( "__kmp_resume_template: T#%d exiting after signaling wake up for T#%d\n",
489  gtid, target_gtid ) );
490 }
491 
492 void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) {
493  __kmp_resume_template(target_gtid, flag);
494 }
495 void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) {
496  __kmp_resume_template(target_gtid, flag);
497 }
498 void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
499  __kmp_resume_template(target_gtid, flag);
500 }
501 
502 
503 /* ------------------------------------------------------------------------ */
504 /* ------------------------------------------------------------------------ */
505 
506 void
507 __kmp_yield( int cond )
508 {
509  if (cond)
510  Sleep(0);
511 }
512 
513 /* ------------------------------------------------------------------------ */
514 /* ------------------------------------------------------------------------ */
515 
516 void
517 __kmp_gtid_set_specific( int gtid )
518 {
519  KA_TRACE( 50, ("__kmp_gtid_set_specific: T#%d key:%d\n",
520  gtid, __kmp_gtid_threadprivate_key ));
521  KMP_ASSERT( __kmp_init_runtime );
522  if( ! TlsSetValue( __kmp_gtid_threadprivate_key, (LPVOID)(gtid+1)) )
523  KMP_FATAL( TLSSetValueFailed );
524 }
525 
526 int
527 __kmp_gtid_get_specific()
528 {
529  int gtid;
530  if( !__kmp_init_runtime ) {
531  KA_TRACE( 50, ("__kmp_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) );
532  return KMP_GTID_SHUTDOWN;
533  }
534  gtid = (int)(kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key );
535  if ( gtid == 0 ) {
536  gtid = KMP_GTID_DNE;
537  }
538  else {
539  gtid--;
540  }
541  KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n",
542  __kmp_gtid_threadprivate_key, gtid ));
543  return gtid;
544 }
545 
546 /* ------------------------------------------------------------------------ */
547 /* ------------------------------------------------------------------------ */
548 
549 #if KMP_GROUP_AFFINITY
550 
551 //
552 // Only 1 DWORD in the mask should have any procs set.
553 // Return the appropriate index, or -1 for an invalid mask.
554 //
555 int
556 __kmp_get_proc_group( kmp_affin_mask_t const *mask )
557 {
558  int i;
559  int group = -1;
560  for (i = 0; i < __kmp_num_proc_groups; i++) {
561  if (mask[i] == 0) {
562  continue;
563  }
564  if (group >= 0) {
565  return -1;
566  }
567  group = i;
568  }
569  return group;
570 }
571 
572 #endif /* KMP_GROUP_AFFINITY */
573 
574 int
575 __kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
576 {
577 
578 #if KMP_GROUP_AFFINITY
579 
580  if (__kmp_num_proc_groups > 1) {
581  //
582  // Check for a valid mask.
583  //
584  GROUP_AFFINITY ga;
585  int group = __kmp_get_proc_group( mask );
586  if (group < 0) {
587  if (abort_on_error) {
588  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
589  }
590  return -1;
591  }
592 
593  //
594  // Transform the bit vector into a GROUP_AFFINITY struct
595  // and make the system call to set affinity.
596  //
597  ga.Group = group;
598  ga.Mask = mask[group];
599  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
600 
601  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
602  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
603  DWORD error = GetLastError();
604  if (abort_on_error) {
605  __kmp_msg(
606  kmp_ms_fatal,
607  KMP_MSG( CantSetThreadAffMask ),
608  KMP_ERR( error ),
609  __kmp_msg_null
610  );
611  }
612  return error;
613  }
614  }
615  else
616 
617 #endif /* KMP_GROUP_AFFINITY */
618 
619  {
620  if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
621  DWORD error = GetLastError();
622  if (abort_on_error) {
623  __kmp_msg(
624  kmp_ms_fatal,
625  KMP_MSG( CantSetThreadAffMask ),
626  KMP_ERR( error ),
627  __kmp_msg_null
628  );
629  }
630  return error;
631  }
632  }
633  return 0;
634 }
635 
636 int
637 __kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
638 {
639 
640 #if KMP_GROUP_AFFINITY
641 
642  if (__kmp_num_proc_groups > 1) {
643  KMP_CPU_ZERO(mask);
644  GROUP_AFFINITY ga;
645  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
646 
647  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
648  DWORD error = GetLastError();
649  if (abort_on_error) {
650  __kmp_msg(
651  kmp_ms_fatal,
652  KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
653  KMP_ERR(error),
654  __kmp_msg_null
655  );
656  }
657  return error;
658  }
659 
660  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups)
661  || (ga.Mask == 0)) {
662  return -1;
663  }
664 
665  mask[ga.Group] = ga.Mask;
666  }
667  else
668 
669 #endif /* KMP_GROUP_AFFINITY */
670 
671  {
672  kmp_affin_mask_t newMask, sysMask, retval;
673 
674  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
675  DWORD error = GetLastError();
676  if (abort_on_error) {
677  __kmp_msg(
678  kmp_ms_fatal,
679  KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
680  KMP_ERR(error),
681  __kmp_msg_null
682  );
683  }
684  return error;
685  }
686  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
687  if (! retval) {
688  DWORD error = GetLastError();
689  if (abort_on_error) {
690  __kmp_msg(
691  kmp_ms_fatal,
692  KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
693  KMP_ERR(error),
694  __kmp_msg_null
695  );
696  }
697  return error;
698  }
699  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
700  if (! newMask) {
701  DWORD error = GetLastError();
702  if (abort_on_error) {
703  __kmp_msg(
704  kmp_ms_fatal,
705  KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
706  KMP_ERR(error),
707  __kmp_msg_null
708  );
709  }
710  }
711  *mask = retval;
712  }
713  return 0;
714 }
715 
716 void
717 __kmp_affinity_bind_thread( int proc )
718 {
719 
720 #if KMP_GROUP_AFFINITY
721 
722  if (__kmp_num_proc_groups > 1) {
723  //
724  // Form the GROUP_AFFINITY struct directly, rather than filling
725  // out a bit vector and calling __kmp_set_system_affinity().
726  //
727  GROUP_AFFINITY ga;
728  KMP_DEBUG_ASSERT((proc >= 0) && (proc < (__kmp_num_proc_groups
729  * CHAR_BIT * sizeof(DWORD_PTR))));
730  ga.Group = proc / (CHAR_BIT * sizeof(DWORD_PTR));
731  ga.Mask = (unsigned long long)1 << (proc % (CHAR_BIT * sizeof(DWORD_PTR)));
732  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
733 
734  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
735  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
736  DWORD error = GetLastError();
737  if (__kmp_affinity_verbose) { // AC: continue silently if not verbose
738  __kmp_msg(
739  kmp_ms_warning,
740  KMP_MSG( CantSetThreadAffMask ),
741  KMP_ERR( error ),
742  __kmp_msg_null
743  );
744  }
745  }
746  }
747  else
748 
749 #endif /* KMP_GROUP_AFFINITY */
750 
751  {
752  kmp_affin_mask_t mask;
753  KMP_CPU_ZERO(&mask);
754  KMP_CPU_SET(proc, &mask);
755  __kmp_set_system_affinity(&mask, TRUE);
756  }
757 }
758 
759 void
760 __kmp_affinity_determine_capable( const char *env_var )
761 {
762  //
763  // All versions of Windows* OS (since Win '95) support SetThreadAffinityMask().
764  //
765 
766 #if KMP_GROUP_AFFINITY
767  KMP_AFFINITY_ENABLE(__kmp_num_proc_groups*sizeof(kmp_affin_mask_t));
768 #else
769  KMP_AFFINITY_ENABLE(sizeof(kmp_affin_mask_t));
770 #endif
771 
772  KA_TRACE( 10, (
773  "__kmp_affinity_determine_capable: "
774  "Windows* OS affinity interface functional (mask size = %" KMP_SIZE_T_SPEC ").\n",
775  __kmp_affin_mask_size
776  ) );
777 }
778 
779 double
780 __kmp_read_cpu_time( void )
781 {
782  FILETIME CreationTime, ExitTime, KernelTime, UserTime;
783  int status;
784  double cpu_time;
785 
786  cpu_time = 0;
787 
788  status = GetProcessTimes( GetCurrentProcess(), &CreationTime,
789  &ExitTime, &KernelTime, &UserTime );
790 
791  if (status) {
792  double sec = 0;
793 
794  sec += KernelTime.dwHighDateTime;
795  sec += UserTime.dwHighDateTime;
796 
797  /* Shift left by 32 bits */
798  sec *= (double) (1 << 16) * (double) (1 << 16);
799 
800  sec += KernelTime.dwLowDateTime;
801  sec += UserTime.dwLowDateTime;
802 
803  cpu_time += (sec * 100.0) / KMP_NSEC_PER_SEC;
804  }
805 
806  return cpu_time;
807 }
808 
809 int
810 __kmp_read_system_info( struct kmp_sys_info *info )
811 {
812  info->maxrss = 0; /* the maximum resident set size utilized (in kilobytes) */
813  info->minflt = 0; /* the number of page faults serviced without any I/O */
814  info->majflt = 0; /* the number of page faults serviced that required I/O */
815  info->nswap = 0; /* the number of times a process was "swapped" out of memory */
816  info->inblock = 0; /* the number of times the file system had to perform input */
817  info->oublock = 0; /* the number of times the file system had to perform output */
818  info->nvcsw = 0; /* the number of times a context switch was voluntarily */
819  info->nivcsw = 0; /* the number of times a context switch was forced */
820 
821  return 1;
822 }
823 
824 /* ------------------------------------------------------------------------ */
825 /* ------------------------------------------------------------------------ */
826 
827 
828 void
829 __kmp_runtime_initialize( void )
830 {
831  SYSTEM_INFO info;
832  kmp_str_buf_t path;
833  UINT path_size;
834 
835  if ( __kmp_init_runtime ) {
836  return;
837  };
838 
839 #if KMP_DYNAMIC_LIB
840  /* Pin dynamic library for the lifetime of application */
841  {
842  // First, turn off error message boxes
843  UINT err_mode = SetErrorMode (SEM_FAILCRITICALERRORS);
844  HMODULE h;
845  BOOL ret = GetModuleHandleEx( GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
846  |GET_MODULE_HANDLE_EX_FLAG_PIN,
847  (LPCTSTR)&__kmp_serial_initialize, &h);
848  KMP_DEBUG_ASSERT2(h && ret, "OpenMP RTL cannot find itself loaded");
849  SetErrorMode (err_mode); // Restore error mode
850  KA_TRACE( 10, ("__kmp_runtime_initialize: dynamic library pinned\n") );
851  }
852 #endif
853 
854  InitializeCriticalSection( & __kmp_win32_section );
855 #if USE_ITT_BUILD
856  __kmp_itt_system_object_created( & __kmp_win32_section, "Critical Section" );
857 #endif /* USE_ITT_BUILD */
858  __kmp_initialize_system_tick();
859 
860  #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
861  if ( ! __kmp_cpuinfo.initialized ) {
862  __kmp_query_cpuid( & __kmp_cpuinfo );
863  }; // if
864  #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
865 
866  /* Set up minimum number of threads to switch to TLS gtid */
867  #if KMP_OS_WINDOWS && ! defined KMP_DYNAMIC_LIB
868  // Windows* OS, static library.
869  /*
870  New thread may use stack space previously used by another thread, currently terminated.
871  On Windows* OS, in case of static linking, we do not know the moment of thread termination,
872  and our structures (__kmp_threads and __kmp_root arrays) are still keep info about dead
873  threads. This leads to problem in __kmp_get_global_thread_id() function: it wrongly
874  finds gtid (by searching through stack addresses of all known threads) for unregistered
875  foreign tread.
876 
877  Setting __kmp_tls_gtid_min to 0 workarounds this problem: __kmp_get_global_thread_id()
878  does not search through stacks, but get gtid from TLS immediately.
879 
880  --ln
881  */
882  __kmp_tls_gtid_min = 0;
883  #else
884  __kmp_tls_gtid_min = KMP_TLS_GTID_MIN;
885  #endif
886 
887  /* for the static library */
888  if ( !__kmp_gtid_threadprivate_key ) {
889  __kmp_gtid_threadprivate_key = TlsAlloc();
890  if( __kmp_gtid_threadprivate_key == TLS_OUT_OF_INDEXES ) {
891  KMP_FATAL( TLSOutOfIndexes );
892  }
893  }
894 
895 
896  //
897  // Load ntdll.dll.
898  //
899  /*
900  Simple
901  GetModuleHandle( "ntdll.dl" )
902  is not suitable due to security issue (see
903  http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full
904  path to the library.
905  */
906  __kmp_str_buf_init( & path );
907  path_size = GetSystemDirectory( path.str, path.size );
908  KMP_DEBUG_ASSERT( path_size > 0 );
909  if ( path_size >= path.size ) {
910  //
911  // Buffer is too short. Expand the buffer and try again.
912  //
913  __kmp_str_buf_reserve( & path, path_size );
914  path_size = GetSystemDirectory( path.str, path.size );
915  KMP_DEBUG_ASSERT( path_size > 0 );
916  }; // if
917  if ( path_size > 0 && path_size < path.size ) {
918  //
919  // Now we have system directory name in the buffer.
920  // Append backslash and name of dll to form full path,
921  //
922  path.used = path_size;
923  __kmp_str_buf_print( & path, "\\%s", "ntdll.dll" );
924 
925  //
926  // Now load ntdll using full path.
927  //
928  ntdll = GetModuleHandle( path.str );
929  }
930 
931  KMP_DEBUG_ASSERT( ntdll != NULL );
932  if ( ntdll != NULL ) {
933  NtQuerySystemInformation = (NtQuerySystemInformation_t) GetProcAddress( ntdll, "NtQuerySystemInformation" );
934  }
935  KMP_DEBUG_ASSERT( NtQuerySystemInformation != NULL );
936 
937 #if KMP_GROUP_AFFINITY
938  //
939  // Load kernel32.dll.
940  // Same caveat - must use full system path name.
941  //
942  if ( path_size > 0 && path_size < path.size ) {
943  //
944  // Truncate the buffer back to just the system path length,
945  // discarding "\\ntdll.dll", and replacing it with "kernel32.dll".
946  //
947  path.used = path_size;
948  __kmp_str_buf_print( & path, "\\%s", "kernel32.dll" );
949 
950  //
951  // Load kernel32.dll using full path.
952  //
953  kernel32 = GetModuleHandle( path.str );
954  KA_TRACE( 10, ("__kmp_runtime_initialize: kernel32.dll = %s\n", path.str ) );
955 
956  //
957  // Load the function pointers to kernel32.dll routines
958  // that may or may not exist on this system.
959  //
960  if ( kernel32 != NULL ) {
961  __kmp_GetActiveProcessorCount = (kmp_GetActiveProcessorCount_t) GetProcAddress( kernel32, "GetActiveProcessorCount" );
962  __kmp_GetActiveProcessorGroupCount = (kmp_GetActiveProcessorGroupCount_t) GetProcAddress( kernel32, "GetActiveProcessorGroupCount" );
963  __kmp_GetThreadGroupAffinity = (kmp_GetThreadGroupAffinity_t) GetProcAddress( kernel32, "GetThreadGroupAffinity" );
964  __kmp_SetThreadGroupAffinity = (kmp_SetThreadGroupAffinity_t) GetProcAddress( kernel32, "SetThreadGroupAffinity" );
965 
966  KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorCount = %p\n", __kmp_GetActiveProcessorCount ) );
967  KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorGroupCount = %p\n", __kmp_GetActiveProcessorGroupCount ) );
968  KA_TRACE( 10, ("__kmp_runtime_initialize:__kmp_GetThreadGroupAffinity = %p\n", __kmp_GetThreadGroupAffinity ) );
969  KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_SetThreadGroupAffinity = %p\n", __kmp_SetThreadGroupAffinity ) );
970  KA_TRACE( 10, ("__kmp_runtime_initialize: sizeof(kmp_affin_mask_t) = %d\n", sizeof(kmp_affin_mask_t) ) );
971 
972  //
973  // See if group affinity is supported on this system.
974  // If so, calculate the #groups and #procs.
975  //
976  // Group affinity was introduced with Windows* 7 OS and
977  // Windows* Server 2008 R2 OS.
978  //
979  if ( ( __kmp_GetActiveProcessorCount != NULL )
980  && ( __kmp_GetActiveProcessorGroupCount != NULL )
981  && ( __kmp_GetThreadGroupAffinity != NULL )
982  && ( __kmp_SetThreadGroupAffinity != NULL )
983  && ( ( __kmp_num_proc_groups
984  = __kmp_GetActiveProcessorGroupCount() ) > 1 ) ) {
985  //
986  // Calculate the total number of active OS procs.
987  //
988  int i;
989 
990  KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) );
991 
992  __kmp_xproc = 0;
993 
994  for ( i = 0; i < __kmp_num_proc_groups; i++ ) {
995  DWORD size = __kmp_GetActiveProcessorCount( i );
996  __kmp_xproc += size;
997  KA_TRACE( 10, ("__kmp_runtime_initialize: proc group %d size = %d\n", i, size ) );
998  }
999  }
1000  else {
1001  KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) );
1002  }
1003  }
1004  }
1005  if ( __kmp_num_proc_groups <= 1 ) {
1006  GetSystemInfo( & info );
1007  __kmp_xproc = info.dwNumberOfProcessors;
1008  }
1009 #else
1010  GetSystemInfo( & info );
1011  __kmp_xproc = info.dwNumberOfProcessors;
1012 #endif /* KMP_GROUP_AFFINITY */
1013 
1014  //
1015  // If the OS said there were 0 procs, take a guess and use a value of 2.
1016  // This is done for Linux* OS, also. Do we need error / warning?
1017  //
1018  if ( __kmp_xproc <= 0 ) {
1019  __kmp_xproc = 2;
1020  }
1021 
1022  KA_TRACE( 5, ("__kmp_runtime_initialize: total processors = %d\n", __kmp_xproc) );
1023 
1024  __kmp_str_buf_free( & path );
1025 
1026 #if USE_ITT_BUILD
1027  __kmp_itt_initialize();
1028 #endif /* USE_ITT_BUILD */
1029 
1030  __kmp_init_runtime = TRUE;
1031 } // __kmp_runtime_initialize
1032 
1033 void
1034 __kmp_runtime_destroy( void )
1035 {
1036  if ( ! __kmp_init_runtime ) {
1037  return;
1038  }
1039 
1040 #if USE_ITT_BUILD
1041  __kmp_itt_destroy();
1042 #endif /* USE_ITT_BUILD */
1043 
1044  /* we can't DeleteCriticalsection( & __kmp_win32_section ); */
1045  /* due to the KX_TRACE() commands */
1046  KA_TRACE( 40, ("__kmp_runtime_destroy\n" ));
1047 
1048  if( __kmp_gtid_threadprivate_key ) {
1049  TlsFree( __kmp_gtid_threadprivate_key );
1050  __kmp_gtid_threadprivate_key = 0;
1051  }
1052 
1053  __kmp_affinity_uninitialize();
1054  DeleteCriticalSection( & __kmp_win32_section );
1055 
1056  ntdll = NULL;
1057  NtQuerySystemInformation = NULL;
1058 
1059 #if KMP_ARCH_X86_64
1060  kernel32 = NULL;
1061  __kmp_GetActiveProcessorCount = NULL;
1062  __kmp_GetActiveProcessorGroupCount = NULL;
1063  __kmp_GetThreadGroupAffinity = NULL;
1064  __kmp_SetThreadGroupAffinity = NULL;
1065 #endif // KMP_ARCH_X86_64
1066 
1067  __kmp_init_runtime = FALSE;
1068 }
1069 
1070 
1071 void
1072 __kmp_terminate_thread( int gtid )
1073 {
1074  kmp_info_t *th = __kmp_threads[ gtid ];
1075 
1076  if( !th ) return;
1077 
1078  KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) );
1079 
1080  if (TerminateThread( th->th.th_info.ds.ds_thread, (DWORD) -1) == FALSE) {
1081  /* It's OK, the thread may have exited already */
1082  }
1083  __kmp_free_handle( th->th.th_info.ds.ds_thread );
1084 }
1085 
1086 /* ------------------------------------------------------------------------ */
1087 /* ------------------------------------------------------------------------ */
1088 
1089 void
1090 __kmp_clear_system_time( void )
1091 {
1092  BOOL status;
1093  LARGE_INTEGER time;
1094  status = QueryPerformanceCounter( & time );
1095  __kmp_win32_time = (kmp_int64) time.QuadPart;
1096 }
1097 
1098 void
1099 __kmp_initialize_system_tick( void )
1100 {
1101  {
1102  BOOL status;
1103  LARGE_INTEGER freq;
1104 
1105  status = QueryPerformanceFrequency( & freq );
1106  if (! status) {
1107  DWORD error = GetLastError();
1108  __kmp_msg(
1109  kmp_ms_fatal,
1110  KMP_MSG( FunctionError, "QueryPerformanceFrequency()" ),
1111  KMP_ERR( error ),
1112  __kmp_msg_null
1113  );
1114 
1115  }
1116  else {
1117  __kmp_win32_tick = ((double) 1.0) / (double) freq.QuadPart;
1118  }
1119  }
1120 }
1121 
1122 /* Calculate the elapsed wall clock time for the user */
1123 
1124 void
1125 __kmp_elapsed( double *t )
1126 {
1127  BOOL status;
1128  LARGE_INTEGER now;
1129  status = QueryPerformanceCounter( & now );
1130  *t = ((double) now.QuadPart) * __kmp_win32_tick;
1131 }
1132 
1133 /* Calculate the elapsed wall clock tick for the user */
1134 
1135 void
1136 __kmp_elapsed_tick( double *t )
1137 {
1138  *t = __kmp_win32_tick;
1139 }
1140 
1141 void
1142 __kmp_read_system_time( double *delta )
1143 {
1144 
1145  if (delta != NULL) {
1146  BOOL status;
1147  LARGE_INTEGER now;
1148 
1149  status = QueryPerformanceCounter( & now );
1150 
1151  *delta = ((double) (((kmp_int64) now.QuadPart) - __kmp_win32_time))
1152  * __kmp_win32_tick;
1153  }
1154 }
1155 
1156 /* ------------------------------------------------------------------------ */
1157 /* ------------------------------------------------------------------------ */
1158 
1159 void * __stdcall
1160 __kmp_launch_worker( void *arg )
1161 {
1162  volatile void *stack_data;
1163  void *exit_val;
1164  void *padding = 0;
1165  kmp_info_t *this_thr = (kmp_info_t *) arg;
1166  int gtid;
1167 
1168  gtid = this_thr->th.th_info.ds.ds_gtid;
1169  __kmp_gtid_set_specific( gtid );
1170 #ifdef KMP_TDATA_GTID
1171  #error "This define causes problems with LoadLibrary() + declspec(thread) " \
1172  "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \
1173  "reference: http://support.microsoft.com/kb/118816"
1174  //__kmp_gtid = gtid;
1175 #endif
1176 
1177 #if USE_ITT_BUILD
1178  __kmp_itt_thread_name( gtid );
1179 #endif /* USE_ITT_BUILD */
1180 
1181  __kmp_affinity_set_init_mask( gtid, FALSE );
1182 
1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184  //
1185  // Set the FP control regs to be a copy of
1186  // the parallel initialization thread's.
1187  //
1188  __kmp_clear_x87_fpu_status_word();
1189  __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
1190  __kmp_load_mxcsr( &__kmp_init_mxcsr );
1191 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1192 
1193  if ( __kmp_stkoffset > 0 && gtid > 0 ) {
1194  padding = KMP_ALLOCA( gtid * __kmp_stkoffset );
1195  }
1196 
1197  KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive );
1198  this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId();
1199  TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE );
1200 
1201  if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid
1202  TCW_PTR(this_thr->th.th_info.ds.ds_stackbase, &stack_data);
1203  KMP_ASSERT( this_thr -> th.th_info.ds.ds_stackgrow == FALSE );
1204  __kmp_check_stack_overlap( this_thr );
1205  }
1206  KMP_MB();
1207  exit_val = __kmp_launch_thread( this_thr );
1208  KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive );
1209  TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE );
1210  KMP_MB();
1211  return exit_val;
1212 }
1213 
1214 
1215 /* The monitor thread controls all of the threads in the complex */
1216 
1217 void * __stdcall
1218 __kmp_launch_monitor( void *arg )
1219 {
1220  DWORD wait_status;
1221  kmp_thread_t monitor;
1222  int status;
1223  int interval;
1224  kmp_info_t *this_thr = (kmp_info_t *) arg;
1225 
1226  KMP_DEBUG_ASSERT(__kmp_init_monitor);
1227  TCW_4( __kmp_init_monitor, 2 ); // AC: Signal the library that monitor has started
1228  // TODO: hide "2" in enum (like {true,false,started})
1229  this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId();
1230  TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE );
1231 
1232  KMP_MB(); /* Flush all pending memory write invalidates. */
1233  KA_TRACE( 10, ("__kmp_launch_monitor: launched\n" ) );
1234 
1235  monitor = GetCurrentThread();
1236 
1237  /* set thread priority */
1238  status = SetThreadPriority( monitor, THREAD_PRIORITY_HIGHEST );
1239  if (! status) {
1240  DWORD error = GetLastError();
1241  __kmp_msg(
1242  kmp_ms_fatal,
1243  KMP_MSG( CantSetThreadPriority ),
1244  KMP_ERR( error ),
1245  __kmp_msg_null
1246  );
1247  }
1248 
1249  /* register us as monitor */
1250  __kmp_gtid_set_specific( KMP_GTID_MONITOR );
1251 #ifdef KMP_TDATA_GTID
1252  #error "This define causes problems with LoadLibrary() + declspec(thread) " \
1253  "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \
1254  "reference: http://support.microsoft.com/kb/118816"
1255  //__kmp_gtid = KMP_GTID_MONITOR;
1256 #endif
1257 
1258 #if USE_ITT_BUILD
1259  __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore monitor thread.
1260 #endif /* USE_ITT_BUILD */
1261 
1262  KMP_MB(); /* Flush all pending memory write invalidates. */
1263 
1264  interval = ( 1000 / __kmp_monitor_wakeups ); /* in milliseconds */
1265 
1266  while (! TCR_4(__kmp_global.g.g_done)) {
1267  /* This thread monitors the state of the system */
1268 
1269  KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) );
1270 
1271  wait_status = WaitForSingleObject( __kmp_monitor_ev, interval );
1272 
1273  if (wait_status == WAIT_TIMEOUT) {
1274  TCW_4( __kmp_global.g.g_time.dt.t_value,
1275  TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 );
1276  }
1277 
1278  KMP_MB(); /* Flush all pending memory write invalidates. */
1279  }
1280 
1281  KA_TRACE( 10, ("__kmp_launch_monitor: finished\n" ) );
1282 
1283  status = SetThreadPriority( monitor, THREAD_PRIORITY_NORMAL );
1284  if (! status) {
1285  DWORD error = GetLastError();
1286  __kmp_msg(
1287  kmp_ms_fatal,
1288  KMP_MSG( CantSetThreadPriority ),
1289  KMP_ERR( error ),
1290  __kmp_msg_null
1291  );
1292  }
1293 
1294  if (__kmp_global.g.g_abort != 0) {
1295  /* now we need to terminate the worker threads */
1296  /* the value of t_abort is the signal we caught */
1297 
1298  int gtid;
1299 
1300  KA_TRACE( 10, ("__kmp_launch_monitor: terminate sig=%d\n", (__kmp_global.g.g_abort) ) );
1301 
1302  /* terminate the OpenMP worker threads */
1303  /* TODO this is not valid for sibling threads!!
1304  * the uber master might not be 0 anymore.. */
1305  for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
1306  __kmp_terminate_thread( gtid );
1307 
1308  __kmp_cleanup();
1309 
1310  Sleep( 0 );
1311 
1312  KA_TRACE( 10, ("__kmp_launch_monitor: raise sig=%d\n", (__kmp_global.g.g_abort) ) );
1313 
1314  if (__kmp_global.g.g_abort > 0) {
1315  raise( __kmp_global.g.g_abort );
1316  }
1317  }
1318 
1319  TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE );
1320 
1321  KMP_MB();
1322  return arg;
1323 }
1324 
1325 void
1326 __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size )
1327 {
1328  kmp_thread_t handle;
1329  DWORD idThread;
1330 
1331  KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) );
1332 
1333  th->th.th_info.ds.ds_gtid = gtid;
1334 
1335  if ( KMP_UBER_GTID(gtid) ) {
1336  int stack_data;
1337 
1338  /* TODO: GetCurrentThread() returns a pseudo-handle that is unsuitable for other threads to use.
1339  Is it appropriate to just use GetCurrentThread? When should we close this handle? When
1340  unregistering the root?
1341  */
1342  {
1343  BOOL rc;
1344  rc = DuplicateHandle(
1345  GetCurrentProcess(),
1346  GetCurrentThread(),
1347  GetCurrentProcess(),
1348  &th->th.th_info.ds.ds_thread,
1349  0,
1350  FALSE,
1351  DUPLICATE_SAME_ACCESS
1352  );
1353  KMP_ASSERT( rc );
1354  KA_TRACE( 10, (" __kmp_create_worker: ROOT Handle duplicated, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
1355  (LPVOID)th,
1356  th->th.th_info.ds.ds_thread ) );
1357  th->th.th_info.ds.ds_thread_id = GetCurrentThreadId();
1358  }
1359  if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid
1360  /* we will dynamically update the stack range if gtid_mode == 1 */
1361  TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
1362  TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
1363  TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE);
1364  __kmp_check_stack_overlap( th );
1365  }
1366  }
1367  else {
1368  KMP_MB(); /* Flush all pending memory write invalidates. */
1369 
1370  /* Set stack size for this thread now. */
1371  KA_TRACE( 10, ( "__kmp_create_worker: stack_size = %" KMP_SIZE_T_SPEC
1372  " bytes\n", stack_size ) );
1373 
1374  stack_size += gtid * __kmp_stkoffset;
1375 
1376  TCW_PTR(th->th.th_info.ds.ds_stacksize, stack_size);
1377  TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
1378 
1379  KA_TRACE( 10, ( "__kmp_create_worker: (before) stack_size = %"
1380  KMP_SIZE_T_SPEC
1381  " bytes, &__kmp_launch_worker = %p, th = %p, "
1382  "&idThread = %p\n",
1383  (SIZE_T) stack_size,
1384  (LPTHREAD_START_ROUTINE) & __kmp_launch_worker,
1385  (LPVOID) th, &idThread ) );
1386 
1387  {
1388  handle = CreateThread( NULL, (SIZE_T) stack_size,
1389  (LPTHREAD_START_ROUTINE) __kmp_launch_worker,
1390  (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread );
1391  }
1392 
1393  KA_TRACE( 10, ( "__kmp_create_worker: (after) stack_size = %"
1394  KMP_SIZE_T_SPEC
1395  " bytes, &__kmp_launch_worker = %p, th = %p, "
1396  "idThread = %u, handle = %" KMP_UINTPTR_SPEC "\n",
1397  (SIZE_T) stack_size,
1398  (LPTHREAD_START_ROUTINE) & __kmp_launch_worker,
1399  (LPVOID) th, idThread, handle ) );
1400 
1401  {
1402  if ( handle == 0 ) {
1403  DWORD error = GetLastError();
1404  __kmp_msg(
1405  kmp_ms_fatal,
1406  KMP_MSG( CantCreateThread ),
1407  KMP_ERR( error ),
1408  __kmp_msg_null
1409  );
1410  } else {
1411  th->th.th_info.ds.ds_thread = handle;
1412  }
1413  }
1414  KMP_MB(); /* Flush all pending memory write invalidates. */
1415  }
1416 
1417  KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) );
1418 }
1419 
1420 int
1421 __kmp_still_running(kmp_info_t *th) {
1422  return (WAIT_TIMEOUT == WaitForSingleObject( th->th.th_info.ds.ds_thread, 0));
1423 }
1424 
1425 void
1426 __kmp_create_monitor( kmp_info_t *th )
1427 {
1428  kmp_thread_t handle;
1429  DWORD idThread;
1430  int ideal, new_ideal;
1431 
1432  if( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
1433  // We don't need monitor thread in case of MAX_BLOCKTIME
1434  KA_TRACE( 10, ("__kmp_create_monitor: skipping monitor thread because of MAX blocktime\n" ) );
1435  th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op
1436  th->th.th_info.ds.ds_gtid = 0;
1437  TCW_4( __kmp_init_monitor, 2 ); // Signal to stop waiting for monitor creation
1438  return;
1439  }
1440  KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) );
1441 
1442  KMP_MB(); /* Flush all pending memory write invalidates. */
1443 
1444  __kmp_monitor_ev = CreateEvent( NULL, TRUE, FALSE, NULL );
1445  if ( __kmp_monitor_ev == NULL ) {
1446  DWORD error = GetLastError();
1447  __kmp_msg(
1448  kmp_ms_fatal,
1449  KMP_MSG( CantCreateEvent ),
1450  KMP_ERR( error ),
1451  __kmp_msg_null
1452  );
1453  }; // if
1454 #if USE_ITT_BUILD
1455  __kmp_itt_system_object_created( __kmp_monitor_ev, "Event" );
1456 #endif /* USE_ITT_BUILD */
1457 
1458  th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR;
1459  th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
1460 
1461  // FIXME - on Windows* OS, if __kmp_monitor_stksize = 0, figure out how
1462  // to automatically expand stacksize based on CreateThread error code.
1463  if ( __kmp_monitor_stksize == 0 ) {
1464  __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
1465  }
1466  if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) {
1467  __kmp_monitor_stksize = __kmp_sys_min_stksize;
1468  }
1469 
1470  KA_TRACE( 10, ("__kmp_create_monitor: requested stacksize = %d bytes\n",
1471  (int) __kmp_monitor_stksize ) );
1472 
1473  TCW_4( __kmp_global.g.g_time.dt.t_value, 0 );
1474 
1475  handle = CreateThread( NULL, (SIZE_T) __kmp_monitor_stksize,
1476  (LPTHREAD_START_ROUTINE) __kmp_launch_monitor,
1477  (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread );
1478  if (handle == 0) {
1479  DWORD error = GetLastError();
1480  __kmp_msg(
1481  kmp_ms_fatal,
1482  KMP_MSG( CantCreateThread ),
1483  KMP_ERR( error ),
1484  __kmp_msg_null
1485  );
1486  }
1487  else
1488  th->th.th_info.ds.ds_thread = handle;
1489 
1490  KMP_MB(); /* Flush all pending memory write invalidates. */
1491 
1492  KA_TRACE( 10, ("__kmp_create_monitor: monitor created %p\n",
1493  (void *) th->th.th_info.ds.ds_thread ) );
1494 }
1495 
1496 /*
1497  Check to see if thread is still alive.
1498 
1499  NOTE: The ExitProcess(code) system call causes all threads to Terminate
1500  with a exit_val = code. Because of this we can not rely on
1501  exit_val having any particular value. So this routine may
1502  return STILL_ALIVE in exit_val even after the thread is dead.
1503 */
1504 
1505 int
1506 __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val )
1507 {
1508  DWORD rc;
1509  rc = GetExitCodeThread( th->th.th_info.ds.ds_thread, exit_val );
1510  if ( rc == 0 ) {
1511  DWORD error = GetLastError();
1512  __kmp_msg(
1513  kmp_ms_fatal,
1514  KMP_MSG( FunctionError, "GetExitCodeThread()" ),
1515  KMP_ERR( error ),
1516  __kmp_msg_null
1517  );
1518  }; // if
1519  return ( *exit_val == STILL_ACTIVE );
1520 }
1521 
1522 
1523 void
1524 __kmp_exit_thread(
1525  int exit_status
1526 ) {
1527  ExitThread( exit_status );
1528 } // __kmp_exit_thread
1529 
1530 /*
1531  This is a common part for both __kmp_reap_worker() and __kmp_reap_monitor().
1532 */
1533 static void
1534 __kmp_reap_common( kmp_info_t * th )
1535 {
1536  DWORD exit_val;
1537 
1538  KMP_MB(); /* Flush all pending memory write invalidates. */
1539 
1540  KA_TRACE( 10, ( "__kmp_reap_common: try to reap (%d)\n", th->th.th_info.ds.ds_gtid ) );
1541 
1542  /*
1543  2006-10-19:
1544 
1545  There are two opposite situations:
1546 
1547  1. Windows* OS keep thread alive after it resets ds_alive flag and exits from thread
1548  function. (For example, see C70770/Q394281 "unloading of dll based on OMP is very
1549  slow".)
1550  2. Windows* OS may kill thread before it resets ds_alive flag.
1551 
1552  Right solution seems to be waiting for *either* thread termination *or* ds_alive resetting.
1553 
1554  */
1555 
1556  {
1557  // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize KMP_WAIT_YIELD to
1558  // cover this usage also.
1559  void * obj = NULL;
1560  register kmp_uint32 spins;
1561 #if USE_ITT_BUILD
1562  KMP_FSYNC_SPIN_INIT( obj, (void*) & th->th.th_info.ds.ds_alive );
1563 #endif /* USE_ITT_BUILD */
1564  KMP_INIT_YIELD( spins );
1565  do {
1566 #if USE_ITT_BUILD
1567  KMP_FSYNC_SPIN_PREPARE( obj );
1568 #endif /* USE_ITT_BUILD */
1569  __kmp_is_thread_alive( th, &exit_val );
1570  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
1571  KMP_YIELD_SPIN( spins );
1572  } while ( exit_val == STILL_ACTIVE && TCR_4( th->th.th_info.ds.ds_alive ) );
1573 #if USE_ITT_BUILD
1574  if ( exit_val == STILL_ACTIVE ) {
1575  KMP_FSYNC_CANCEL( obj );
1576  } else {
1577  KMP_FSYNC_SPIN_ACQUIRED( obj );
1578  }; // if
1579 #endif /* USE_ITT_BUILD */
1580  }
1581 
1582  __kmp_free_handle( th->th.th_info.ds.ds_thread );
1583 
1584  /*
1585  * NOTE: The ExitProcess(code) system call causes all threads to Terminate
1586  * with a exit_val = code. Because of this we can not rely on
1587  * exit_val having any particular value.
1588  */
1589  if ( exit_val == STILL_ACTIVE ) {
1590  KA_TRACE( 1, ( "__kmp_reap_common: thread still active.\n" ) );
1591  } else if ( (void *) exit_val != (void *) th) {
1592  KA_TRACE( 1, ( "__kmp_reap_common: ExitProcess / TerminateThread used?\n" ) );
1593  }; // if
1594 
1595  KA_TRACE( 10,
1596  (
1597  "__kmp_reap_common: done reaping (%d), handle = %" KMP_UINTPTR_SPEC "\n",
1598  th->th.th_info.ds.ds_gtid,
1599  th->th.th_info.ds.ds_thread
1600  )
1601  );
1602 
1603  th->th.th_info.ds.ds_thread = 0;
1604  th->th.th_info.ds.ds_tid = KMP_GTID_DNE;
1605  th->th.th_info.ds.ds_gtid = KMP_GTID_DNE;
1606  th->th.th_info.ds.ds_thread_id = 0;
1607 
1608  KMP_MB(); /* Flush all pending memory write invalidates. */
1609 }
1610 
1611 void
1612 __kmp_reap_monitor( kmp_info_t *th )
1613 {
1614  int status;
1615 
1616  KA_TRACE( 10, ("__kmp_reap_monitor: try to reap %p\n",
1617  (void *) th->th.th_info.ds.ds_thread ) );
1618 
1619  // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
1620  // If both tid and gtid are 0, it means the monitor did not ever start.
1621  // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
1622  KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid );
1623  if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) {
1624  KA_TRACE( 10, ("__kmp_reap_monitor: monitor did not start, returning\n") );
1625  return;
1626  }; // if
1627 
1628  KMP_MB(); /* Flush all pending memory write invalidates. */
1629 
1630  status = SetEvent( __kmp_monitor_ev );
1631  if ( status == FALSE ) {
1632  DWORD error = GetLastError();
1633  __kmp_msg(
1634  kmp_ms_fatal,
1635  KMP_MSG( CantSetEvent ),
1636  KMP_ERR( error ),
1637  __kmp_msg_null
1638  );
1639  }
1640  KA_TRACE( 10, ( "__kmp_reap_monitor: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) );
1641  __kmp_reap_common( th );
1642 
1643  __kmp_free_handle( __kmp_monitor_ev );
1644 
1645  KMP_MB(); /* Flush all pending memory write invalidates. */
1646 }
1647 
1648 void
1649 __kmp_reap_worker( kmp_info_t * th )
1650 {
1651  KA_TRACE( 10, ( "__kmp_reap_worker: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) );
1652  __kmp_reap_common( th );
1653 }
1654 
1655 /* ------------------------------------------------------------------------ */
1656 /* ------------------------------------------------------------------------ */
1657 
1658 #if KMP_HANDLE_SIGNALS
1659 
1660 
1661 static void
1662 __kmp_team_handler( int signo )
1663 {
1664  if ( __kmp_global.g.g_abort == 0 ) {
1665  // Stage 1 signal handler, let's shut down all of the threads.
1666  if ( __kmp_debug_buf ) {
1667  __kmp_dump_debug_buffer();
1668  }; // if
1669  KMP_MB(); // Flush all pending memory write invalidates.
1670  TCW_4( __kmp_global.g.g_abort, signo );
1671  KMP_MB(); // Flush all pending memory write invalidates.
1672  TCW_4( __kmp_global.g.g_done, TRUE );
1673  KMP_MB(); // Flush all pending memory write invalidates.
1674  }
1675 } // __kmp_team_handler
1676 
1677 
1678 
1679 static
1680 sig_func_t __kmp_signal( int signum, sig_func_t handler ) {
1681  sig_func_t old = signal( signum, handler );
1682  if ( old == SIG_ERR ) {
1683  int error = errno;
1684  __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "signal" ), KMP_ERR( error ), __kmp_msg_null );
1685  }; // if
1686  return old;
1687 }
1688 
1689 static void
1690 __kmp_install_one_handler(
1691  int sig,
1692  sig_func_t handler,
1693  int parallel_init
1694 ) {
1695  sig_func_t old;
1696  KMP_MB(); /* Flush all pending memory write invalidates. */
1697  KB_TRACE( 60, ("__kmp_install_one_handler: called: sig=%d\n", sig ) );
1698  if ( parallel_init ) {
1699  old = __kmp_signal( sig, handler );
1700  // SIG_DFL on Windows* OS in NULL or 0.
1701  if ( old == __kmp_sighldrs[ sig ] ) {
1702  __kmp_siginstalled[ sig ] = 1;
1703  } else {
1704  // Restore/keep user's handler if one previously installed.
1705  old = __kmp_signal( sig, old );
1706  }; // if
1707  } else {
1708  // Save initial/system signal handlers to see if user handlers installed.
1709  // 2009-09-23: It is a dead code. On Windows* OS __kmp_install_signals called once with
1710  // parallel_init == TRUE.
1711  old = __kmp_signal( sig, SIG_DFL );
1712  __kmp_sighldrs[ sig ] = old;
1713  __kmp_signal( sig, old );
1714  }; // if
1715  KMP_MB(); /* Flush all pending memory write invalidates. */
1716 } // __kmp_install_one_handler
1717 
1718 static void
1719 __kmp_remove_one_handler( int sig ) {
1720  if ( __kmp_siginstalled[ sig ] ) {
1721  sig_func_t old;
1722  KMP_MB(); // Flush all pending memory write invalidates.
1723  KB_TRACE( 60, ( "__kmp_remove_one_handler: called: sig=%d\n", sig ) );
1724  old = __kmp_signal( sig, __kmp_sighldrs[ sig ] );
1725  if ( old != __kmp_team_handler ) {
1726  KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) );
1727  old = __kmp_signal( sig, old );
1728  }; // if
1729  __kmp_sighldrs[ sig ] = NULL;
1730  __kmp_siginstalled[ sig ] = 0;
1731  KMP_MB(); // Flush all pending memory write invalidates.
1732  }; // if
1733 } // __kmp_remove_one_handler
1734 
1735 
1736 void
1737 __kmp_install_signals( int parallel_init )
1738 {
1739  KB_TRACE( 10, ( "__kmp_install_signals: called\n" ) );
1740  if ( ! __kmp_handle_signals ) {
1741  KB_TRACE( 10, ( "__kmp_install_signals: KMP_HANDLE_SIGNALS is false - handlers not installed\n" ) );
1742  return;
1743  }; // if
1744  __kmp_install_one_handler( SIGINT, __kmp_team_handler, parallel_init );
1745  __kmp_install_one_handler( SIGILL, __kmp_team_handler, parallel_init );
1746  __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init );
1747  __kmp_install_one_handler( SIGFPE, __kmp_team_handler, parallel_init );
1748  __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init );
1749  __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init );
1750 } // __kmp_install_signals
1751 
1752 
1753 void
1754 __kmp_remove_signals( void )
1755 {
1756  int sig;
1757  KB_TRACE( 10, ("__kmp_remove_signals: called\n" ) );
1758  for ( sig = 1; sig < NSIG; ++ sig ) {
1759  __kmp_remove_one_handler( sig );
1760  }; // for sig
1761 } // __kmp_remove_signals
1762 
1763 
1764 #endif // KMP_HANDLE_SIGNALS
1765 
1766 /* Put the thread to sleep for a time period */
1767 void
1768 __kmp_thread_sleep( int millis )
1769 {
1770  DWORD status;
1771 
1772  status = SleepEx( (DWORD) millis, FALSE );
1773  if ( status ) {
1774  DWORD error = GetLastError();
1775  __kmp_msg(
1776  kmp_ms_fatal,
1777  KMP_MSG( FunctionError, "SleepEx()" ),
1778  KMP_ERR( error ),
1779  __kmp_msg_null
1780  );
1781  }
1782 }
1783 
1784 /* Determine whether the given address is mapped into the current address space. */
1785 int
1786 __kmp_is_address_mapped( void * addr )
1787 {
1788  DWORD status;
1789  MEMORY_BASIC_INFORMATION lpBuffer;
1790  SIZE_T dwLength;
1791 
1792  dwLength = sizeof(MEMORY_BASIC_INFORMATION);
1793 
1794  status = VirtualQuery( addr, &lpBuffer, dwLength );
1795 
1796  return !((( lpBuffer.State == MEM_RESERVE) || ( lpBuffer.State == MEM_FREE )) ||
1797  (( lpBuffer.Protect == PAGE_NOACCESS ) || ( lpBuffer.Protect == PAGE_EXECUTE )));
1798 }
1799 
1800 kmp_uint64
1801 __kmp_hardware_timestamp(void)
1802 {
1803  kmp_uint64 r = 0;
1804 
1805  QueryPerformanceCounter((LARGE_INTEGER*) &r);
1806  return r;
1807 }
1808 
1809 /* Free handle and check the error code */
1810 void
1811 __kmp_free_handle( kmp_thread_t tHandle )
1812 {
1813 /* called with parameter type HANDLE also, thus suppose kmp_thread_t defined as HANDLE */
1814  BOOL rc;
1815  rc = CloseHandle( tHandle );
1816  if ( !rc ) {
1817  DWORD error = GetLastError();
1818  __kmp_msg(
1819  kmp_ms_fatal,
1820  KMP_MSG( CantCloseHandle ),
1821  KMP_ERR( error ),
1822  __kmp_msg_null
1823  );
1824  }
1825 }
1826 
1827 int
1828 __kmp_get_load_balance( int max ) {
1829 
1830  static ULONG glb_buff_size = 100 * 1024;
1831 
1832  static int glb_running_threads = 0; /* Saved count of the running threads for the thread balance algortihm */
1833  static double glb_call_time = 0; /* Thread balance algorithm call time */
1834 
1835  int running_threads = 0; // Number of running threads in the system.
1836  NTSTATUS status = 0;
1837  ULONG buff_size = 0;
1838  ULONG info_size = 0;
1839  void * buffer = NULL;
1840  PSYSTEM_PROCESS_INFORMATION spi = NULL;
1841  int first_time = 1;
1842 
1843  double call_time = 0.0; //start, finish;
1844 
1845  __kmp_elapsed( & call_time );
1846 
1847  if ( glb_call_time &&
1848  ( call_time - glb_call_time < __kmp_load_balance_interval ) ) {
1849  running_threads = glb_running_threads;
1850  goto finish;
1851  }
1852  glb_call_time = call_time;
1853 
1854  // Do not spend time on running algorithm if we have a permanent error.
1855  if ( NtQuerySystemInformation == NULL ) {
1856  running_threads = -1;
1857  goto finish;
1858  }; // if
1859 
1860  if ( max <= 0 ) {
1861  max = INT_MAX;
1862  }; // if
1863 
1864  do {
1865 
1866  if ( first_time ) {
1867  buff_size = glb_buff_size;
1868  } else {
1869  buff_size = 2 * buff_size;
1870  }
1871 
1872  buffer = KMP_INTERNAL_REALLOC( buffer, buff_size );
1873  if ( buffer == NULL ) {
1874  running_threads = -1;
1875  goto finish;
1876  }; // if
1877  status = NtQuerySystemInformation( SystemProcessInformation, buffer, buff_size, & info_size );
1878  first_time = 0;
1879 
1880  } while ( status == STATUS_INFO_LENGTH_MISMATCH );
1881  glb_buff_size = buff_size;
1882 
1883  #define CHECK( cond ) \
1884  { \
1885  KMP_DEBUG_ASSERT( cond ); \
1886  if ( ! ( cond ) ) { \
1887  running_threads = -1; \
1888  goto finish; \
1889  } \
1890  }
1891 
1892  CHECK( buff_size >= info_size );
1893  spi = PSYSTEM_PROCESS_INFORMATION( buffer );
1894  for ( ; ; ) {
1895  ptrdiff_t offset = uintptr_t( spi ) - uintptr_t( buffer );
1896  CHECK( 0 <= offset && offset + sizeof( SYSTEM_PROCESS_INFORMATION ) < info_size );
1897  HANDLE pid = spi->ProcessId;
1898  ULONG num = spi->NumberOfThreads;
1899  CHECK( num >= 1 );
1900  size_t spi_size = sizeof( SYSTEM_PROCESS_INFORMATION ) + sizeof( SYSTEM_THREAD ) * ( num - 1 );
1901  CHECK( offset + spi_size < info_size ); // Make sure process info record fits the buffer.
1902  if ( spi->NextEntryOffset != 0 ) {
1903  CHECK( spi_size <= spi->NextEntryOffset ); // And do not overlap with the next record.
1904  }; // if
1905  // pid == 0 corresponds to the System Idle Process. It always has running threads
1906  // on all cores. So, we don't consider the running threads of this process.
1907  if ( pid != 0 ) {
1908  for ( int i = 0; i < num; ++ i ) {
1909  THREAD_STATE state = spi->Threads[ i ].State;
1910  // Count threads that have Ready or Running state.
1911  // !!! TODO: Why comment does not match the code???
1912  if ( state == StateRunning ) {
1913  ++ running_threads;
1914  // Stop counting running threads if the number is already greater than
1915  // the number of available cores
1916  if ( running_threads >= max ) {
1917  goto finish;
1918  }
1919  } // if
1920  }; // for i
1921  } // if
1922  if ( spi->NextEntryOffset == 0 ) {
1923  break;
1924  }; // if
1925  spi = PSYSTEM_PROCESS_INFORMATION( uintptr_t( spi ) + spi->NextEntryOffset );
1926  }; // forever
1927 
1928  #undef CHECK
1929 
1930  finish: // Clean up and exit.
1931 
1932  if ( buffer != NULL ) {
1933  KMP_INTERNAL_FREE( buffer );
1934  }; // if
1935 
1936  glb_running_threads = running_threads;
1937 
1938  return running_threads;
1939 
1940 } //__kmp_get_load_balance()
1941