15 #ifndef KMP_AFFINITY_H 16 #define KMP_AFFINITY_H 18 extern int __kmp_affinity_compact;
22 static const unsigned maxDepth = 32;
23 unsigned labels[maxDepth];
24 unsigned childNums[maxDepth];
27 Address(
unsigned _depth)
28 : depth(_depth), leader(FALSE) {
30 Address &operator=(
const Address &b) {
32 for (
unsigned i = 0; i < depth; i++) {
33 labels[i] = b.labels[i];
34 childNums[i] = b.childNums[i];
39 bool operator==(
const Address &b)
const {
42 for (
unsigned i = 0; i < depth; i++)
43 if(labels[i] != b.labels[i])
47 bool isClose(
const Address &b,
int level)
const {
50 if ((
unsigned)level >= depth)
52 for (
unsigned i = 0; i < (depth - level); i++)
53 if(labels[i] != b.labels[i])
57 bool operator!=(
const Address &b)
const {
58 return !operator==(b);
62 printf(
"Depth: %u --- ", depth);
63 for(i=0;i<depth;i++) {
64 printf(
"%u ", labels[i]);
73 AddrUnsPair(Address _first,
unsigned _second)
74 : first(_first), second(_second) {
76 AddrUnsPair &operator=(
const AddrUnsPair &b)
83 printf(
"first = "); first.print();
84 printf(
" --- second = %u", second);
86 bool operator==(
const AddrUnsPair &b)
const {
87 if(first != b.first)
return false;
88 if(second != b.second)
return false;
91 bool operator!=(
const AddrUnsPair &b)
const {
92 return !operator==(b);
98 __kmp_affinity_cmp_Address_labels(
const void *a,
const void *b)
100 const Address *aa = (
const Address *)&(((AddrUnsPair *)a)
102 const Address *bb = (
const Address *)&(((AddrUnsPair *)b)
104 unsigned depth = aa->depth;
106 KMP_DEBUG_ASSERT(depth == bb->depth);
107 for (i = 0; i < depth; i++) {
108 if (aa->labels[i] < bb->labels[i])
return -1;
109 if (aa->labels[i] > bb->labels[i])
return 1;
116 __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b)
118 const Address *aa = (
const Address *)&(((AddrUnsPair *)a)
120 const Address *bb = (
const Address *)&(((AddrUnsPair *)b)
122 unsigned depth = aa->depth;
124 KMP_DEBUG_ASSERT(depth == bb->depth);
125 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
126 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
127 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
128 int j = depth - i - 1;
129 if (aa->childNums[j] < bb->childNums[j])
return -1;
130 if (aa->childNums[j] > bb->childNums[j])
return 1;
132 for (; i < depth; i++) {
133 int j = i - __kmp_affinity_compact;
134 if (aa->childNums[j] < bb->childNums[j])
return -1;
135 if (aa->childNums[j] > bb->childNums[j])
return 1;
150 static const kmp_uint32 maxLeaves=4;
151 static const kmp_uint32 minBranch=4;
162 kmp_uint32 base_num_threads;
163 enum init_status { initialized=0, not_initialized=1, initializing=2 };
164 volatile kmp_int8 uninitialized;
165 volatile kmp_int8 resizing;
171 kmp_uint32 *skipPerLevel;
173 void deriveLevels(AddrUnsPair *adr2os,
int num_addrs) {
174 int hier_depth = adr2os[0].first.depth;
176 for (
int i=hier_depth-1; i>=0; --i) {
178 for (
int j=0; j<num_addrs; ++j) {
179 int next = adr2os[j].first.childNums[i];
180 if (next > max) max = next;
182 numPerLevel[level] = max+1;
187 hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
189 void fini() {
if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
191 void init(AddrUnsPair *adr2os,
int num_addrs)
193 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
194 if (bool_result == 0) {
195 while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
198 KMP_DEBUG_ASSERT(bool_result==1);
206 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*
sizeof(kmp_uint32));
207 skipPerLevel = &(numPerLevel[maxLevels]);
208 for (kmp_uint32 i=0; i<maxLevels; ++i) {
215 qsort(adr2os, num_addrs,
sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
216 deriveLevels(adr2os, num_addrs);
219 numPerLevel[0] = maxLeaves;
220 numPerLevel[1] = num_addrs/maxLeaves;
221 if (num_addrs%maxLeaves) numPerLevel[1]++;
224 base_num_threads = num_addrs;
225 for (
int i=maxLevels-1; i>=0; --i)
226 if (numPerLevel[i] != 1 || depth > 1)
229 kmp_uint32 branch = minBranch;
230 if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
231 if (branch<minBranch) branch=minBranch;
232 for (kmp_uint32 d=0; d<depth-1; ++d) {
233 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) {
234 if (numPerLevel[d] & 1) numPerLevel[d]++;
235 numPerLevel[d] = numPerLevel[d] >> 1;
236 if (numPerLevel[d+1] == 1) depth++;
237 numPerLevel[d+1] = numPerLevel[d+1] << 1;
239 if(numPerLevel[0] == 1) {
240 branch = branch >> 1;
241 if (branch<4) branch = minBranch;
245 for (kmp_uint32 i=1; i<depth; ++i)
246 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
248 for (kmp_uint32 i=depth; i<maxLevels; ++i)
249 skipPerLevel[i] = 2*skipPerLevel[i-1];
251 uninitialized = initialized;
256 void resize(kmp_uint32 nproc)
258 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
259 while (bool_result == 0) {
261 if (nproc <= base_num_threads)
264 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
266 KMP_DEBUG_ASSERT(bool_result!=0);
267 if (nproc <= base_num_threads)
return;
270 kmp_uint32 old_sz = skipPerLevel[depth-1];
271 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
273 for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
274 skipPerLevel[i] = 2*skipPerLevel[i-1];
275 numPerLevel[i-1] *= 2;
279 if (nproc > old_sz) {
280 while (nproc > old_sz) {
288 kmp_uint32 *old_numPerLevel = numPerLevel;
289 kmp_uint32 *old_skipPerLevel = skipPerLevel;
290 numPerLevel = skipPerLevel = NULL;
291 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*
sizeof(kmp_uint32));
292 skipPerLevel = &(numPerLevel[maxLevels]);
295 for (kmp_uint32 i=0; i<old_maxLevels; ++i) {
296 numPerLevel[i] = old_numPerLevel[i];
297 skipPerLevel[i] = old_skipPerLevel[i];
301 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) {
307 __kmp_free(old_numPerLevel);
311 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
312 skipPerLevel[i] = 2*skipPerLevel[i-1];
314 base_num_threads = nproc;
319 #endif // KMP_AFFINITY_H