LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef KMP_AFFINITY_H
16 #define KMP_AFFINITY_H
17 
18 #include "kmp_os.h"
19 #include "kmp.h"
20 
21 #if KMP_AFFINITY_SUPPORTED
22 #if KMP_USE_HWLOC
23 class KMPHwlocAffinity: public KMPAffinity {
24 public:
25  class Mask : public KMPAffinity::Mask {
26  hwloc_cpuset_t mask;
27  public:
28  Mask() { mask = hwloc_bitmap_alloc(); this->zero(); }
29  ~Mask() { hwloc_bitmap_free(mask); }
30  void set(int i) override { hwloc_bitmap_set(mask, i); }
31  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
32  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
33  void zero() override { hwloc_bitmap_zero(mask); }
34  void copy(const KMPAffinity::Mask* src) override {
35  const Mask* convert = static_cast<const Mask*>(src);
36  hwloc_bitmap_copy(mask, convert->mask);
37  }
38  void bitwise_and(const KMPAffinity::Mask* rhs) override {
39  const Mask* convert = static_cast<const Mask*>(rhs);
40  hwloc_bitmap_and(mask, mask, convert->mask);
41  }
42  void bitwise_or(const KMPAffinity::Mask * rhs) override {
43  const Mask* convert = static_cast<const Mask*>(rhs);
44  hwloc_bitmap_or(mask, mask, convert->mask);
45  }
46  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
47  int begin() const override { return hwloc_bitmap_first(mask); }
48  int end() const override { return -1; }
49  int next(int previous) const override { return hwloc_bitmap_next(mask, previous); }
50  int get_system_affinity(bool abort_on_error) override {
51  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
52  "Illegal get affinity operation when not capable");
53  int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
54  if (retval >= 0) {
55  return 0;
56  }
57  int error = errno;
58  if (abort_on_error) {
59  __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
60  }
61  return error;
62  }
63  int set_system_affinity(bool abort_on_error) const override {
64  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
65  "Illegal get affinity operation when not capable");
66  int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
67  if (retval >= 0) {
68  return 0;
69  }
70  int error = errno;
71  if (abort_on_error) {
72  __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
73  }
74  return error;
75  }
76  int get_proc_group() const override {
77  int i;
78  int group = -1;
79 # if KMP_OS_WINDOWS
80  if (__kmp_num_proc_groups == 1) {
81  return 1;
82  }
83  for (i = 0; i < __kmp_num_proc_groups; i++) {
84  // On windows, the long type is always 32 bits
85  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2);
86  unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1);
87  if (first_32_bits == 0 && second_32_bits == 0) {
88  continue;
89  }
90  if (group >= 0) {
91  return -1;
92  }
93  group = i;
94  }
95 # endif /* KMP_OS_WINDOWS */
96  return group;
97  }
98  };
99  void determine_capable(const char* var) override {
100  const hwloc_topology_support* topology_support;
101  if(__kmp_hwloc_topology == NULL) {
102  if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
103  __kmp_hwloc_error = TRUE;
104  if(__kmp_affinity_verbose)
105  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
106  }
107  if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
108  __kmp_hwloc_error = TRUE;
109  if(__kmp_affinity_verbose)
110  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
111  }
112  }
113  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
114  // Is the system capable of setting/getting this thread's affinity?
115  // also, is topology discovery possible? (pu indicates ability to discover processing units)
116  // and finally, were there no errors when calling any hwloc_* API functions?
117  if(topology_support && topology_support->cpubind->set_thisthread_cpubind &&
118  topology_support->cpubind->get_thisthread_cpubind &&
119  topology_support->discovery->pu &&
120  !__kmp_hwloc_error)
121  {
122  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
123  KMP_AFFINITY_ENABLE(TRUE);
124  } else {
125  // indicate that hwloc didn't work and disable affinity
126  __kmp_hwloc_error = TRUE;
127  KMP_AFFINITY_DISABLE();
128  }
129  }
130  void bind_thread(int which) override {
131  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
132  "Illegal set affinity operation when not capable");
133  KMPAffinity::Mask *mask;
134  KMP_CPU_ALLOC_ON_STACK(mask);
135  KMP_CPU_ZERO(mask);
136  KMP_CPU_SET(which, mask);
137  __kmp_set_system_affinity(mask, TRUE);
138  KMP_CPU_FREE_FROM_STACK(mask);
139  }
140  KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
141  void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
142  KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
143  void deallocate_mask_array(KMPAffinity::Mask* array) override {
144  Mask* hwloc_array = static_cast<Mask*>(array);
145  delete[] hwloc_array;
146  }
147  KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
148  Mask* hwloc_array = static_cast<Mask*>(array);
149  return &(hwloc_array[index]);
150  }
151  api_type get_api_type() const override { return HWLOC; }
152 };
153 #endif /* KMP_USE_HWLOC */
154 
155 #if KMP_OS_LINUX
156 /*
157  * On some of the older OS's that we build on, these constants aren't present
158  * in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
159  * all systems of the same arch where they are defined, and they cannot change.
160  * stone forever.
161  */
162 #include <sys/syscall.h>
163 # if KMP_ARCH_X86 || KMP_ARCH_ARM
164 # ifndef __NR_sched_setaffinity
165 # define __NR_sched_setaffinity 241
166 # elif __NR_sched_setaffinity != 241
167 # error Wrong code for setaffinity system call.
168 # endif /* __NR_sched_setaffinity */
169 # ifndef __NR_sched_getaffinity
170 # define __NR_sched_getaffinity 242
171 # elif __NR_sched_getaffinity != 242
172 # error Wrong code for getaffinity system call.
173 # endif /* __NR_sched_getaffinity */
174 # elif KMP_ARCH_AARCH64
175 # ifndef __NR_sched_setaffinity
176 # define __NR_sched_setaffinity 122
177 # elif __NR_sched_setaffinity != 122
178 # error Wrong code for setaffinity system call.
179 # endif /* __NR_sched_setaffinity */
180 # ifndef __NR_sched_getaffinity
181 # define __NR_sched_getaffinity 123
182 # elif __NR_sched_getaffinity != 123
183 # error Wrong code for getaffinity system call.
184 # endif /* __NR_sched_getaffinity */
185 # elif KMP_ARCH_X86_64
186 # ifndef __NR_sched_setaffinity
187 # define __NR_sched_setaffinity 203
188 # elif __NR_sched_setaffinity != 203
189 # error Wrong code for setaffinity system call.
190 # endif /* __NR_sched_setaffinity */
191 # ifndef __NR_sched_getaffinity
192 # define __NR_sched_getaffinity 204
193 # elif __NR_sched_getaffinity != 204
194 # error Wrong code for getaffinity system call.
195 # endif /* __NR_sched_getaffinity */
196 # elif KMP_ARCH_PPC64
197 # ifndef __NR_sched_setaffinity
198 # define __NR_sched_setaffinity 222
199 # elif __NR_sched_setaffinity != 222
200 # error Wrong code for setaffinity system call.
201 # endif /* __NR_sched_setaffinity */
202 # ifndef __NR_sched_getaffinity
203 # define __NR_sched_getaffinity 223
204 # elif __NR_sched_getaffinity != 223
205 # error Wrong code for getaffinity system call.
206 # endif /* __NR_sched_getaffinity */
207 # elif KMP_ARCH_MIPS
208 # ifndef __NR_sched_setaffinity
209 # define __NR_sched_setaffinity 4239
210 # elif __NR_sched_setaffinity != 4239
211 # error Wrong code for setaffinity system call.
212 # endif /* __NR_sched_setaffinity */
213 # ifndef __NR_sched_getaffinity
214 # define __NR_sched_getaffinity 4240
215 # elif __NR_sched_getaffinity != 4240
216 # error Wrong code for getaffinity system call.
217 # endif /* __NR_sched_getaffinity */
218 # elif KMP_ARCH_MIPS64
219 # ifndef __NR_sched_setaffinity
220 # define __NR_sched_setaffinity 5195
221 # elif __NR_sched_setaffinity != 5195
222 # error Wrong code for setaffinity system call.
223 # endif /* __NR_sched_setaffinity */
224 # ifndef __NR_sched_getaffinity
225 # define __NR_sched_getaffinity 5196
226 # elif __NR_sched_getaffinity != 5196
227 # error Wrong code for getaffinity system call.
228 # endif /* __NR_sched_getaffinity */
229 # error Unknown or unsupported architecture
230 # endif /* KMP_ARCH_* */
231 class KMPNativeAffinity : public KMPAffinity {
232  class Mask : public KMPAffinity::Mask {
233  typedef unsigned char mask_t;
234  static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
235  public:
236  mask_t* mask;
237  Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); }
238  ~Mask() { if (mask) __kmp_free(mask); }
239  void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
240  bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
241  void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
242  void zero() override {
243  for (size_t i=0; i<__kmp_affin_mask_size; ++i)
244  mask[i] = 0;
245  }
246  void copy(const KMPAffinity::Mask* src) override {
247  const Mask * convert = static_cast<const Mask*>(src);
248  for (size_t i=0; i<__kmp_affin_mask_size; ++i)
249  mask[i] = convert->mask[i];
250  }
251  void bitwise_and(const KMPAffinity::Mask* rhs) override {
252  const Mask * convert = static_cast<const Mask*>(rhs);
253  for (size_t i=0; i<__kmp_affin_mask_size; ++i)
254  mask[i] &= convert->mask[i];
255  }
256  void bitwise_or(const KMPAffinity::Mask* rhs) override {
257  const Mask * convert = static_cast<const Mask*>(rhs);
258  for (size_t i=0; i<__kmp_affin_mask_size; ++i)
259  mask[i] |= convert->mask[i];
260  }
261  void bitwise_not() override {
262  for (size_t i=0; i<__kmp_affin_mask_size; ++i)
263  mask[i] = ~(mask[i]);
264  }
265  int begin() const override {
266  int retval = 0;
267  while (retval < end() && !is_set(retval))
268  ++retval;
269  return retval;
270  }
271  int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; }
272  int next(int previous) const override {
273  int retval = previous+1;
274  while (retval < end() && !is_set(retval))
275  ++retval;
276  return retval;
277  }
278  int get_system_affinity(bool abort_on_error) override {
279  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
280  "Illegal get affinity operation when not capable");
281  int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
282  if (retval >= 0) {
283  return 0;
284  }
285  int error = errno;
286  if (abort_on_error) {
287  __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
288  }
289  return error;
290  }
291  int set_system_affinity(bool abort_on_error) const override {
292  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
293  "Illegal get affinity operation when not capable");
294  int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
295  if (retval >= 0) {
296  return 0;
297  }
298  int error = errno;
299  if (abort_on_error) {
300  __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
301  }
302  return error;
303  }
304  };
305  void determine_capable(const char* env_var) override {
306  __kmp_affinity_determine_capable(env_var);
307  }
308  void bind_thread(int which) override {
309  __kmp_affinity_bind_thread(which);
310  }
311  KMPAffinity::Mask* allocate_mask() override {
312  KMPNativeAffinity::Mask* retval = new Mask();
313  return retval;
314  }
315  void deallocate_mask(KMPAffinity::Mask* m) override {
316  KMPNativeAffinity::Mask* native_mask = static_cast<KMPNativeAffinity::Mask*>(m);
317  delete m;
318  }
319  KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
320  void deallocate_mask_array(KMPAffinity::Mask* array) override {
321  Mask* linux_array = static_cast<Mask*>(array);
322  delete[] linux_array;
323  }
324  KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
325  Mask* linux_array = static_cast<Mask*>(array);
326  return &(linux_array[index]);
327  }
328  api_type get_api_type() const override { return NATIVE_OS; }
329 };
330 #endif /* KMP_OS_LINUX */
331 
332 #if KMP_OS_WINDOWS
333 class KMPNativeAffinity : public KMPAffinity {
334  class Mask : public KMPAffinity::Mask {
335  typedef ULONG_PTR mask_t;
336  static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
337  mask_t* mask;
338  public:
339  Mask() { mask = (mask_t*)__kmp_allocate(sizeof(mask_t)*__kmp_num_proc_groups); }
340  ~Mask() { if (mask) __kmp_free(mask); }
341  void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
342  bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
343  void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
344  void zero() override {
345  for (size_t i=0; i<__kmp_num_proc_groups; ++i)
346  mask[i] = 0;
347  }
348  void copy(const KMPAffinity::Mask* src) override {
349  const Mask * convert = static_cast<const Mask*>(src);
350  for (size_t i=0; i<__kmp_num_proc_groups; ++i)
351  mask[i] = convert->mask[i];
352  }
353  void bitwise_and(const KMPAffinity::Mask* rhs) override {
354  const Mask * convert = static_cast<const Mask*>(rhs);
355  for (size_t i=0; i<__kmp_num_proc_groups; ++i)
356  mask[i] &= convert->mask[i];
357  }
358  void bitwise_or(const KMPAffinity::Mask* rhs) override {
359  const Mask * convert = static_cast<const Mask*>(rhs);
360  for (size_t i=0; i<__kmp_num_proc_groups; ++i)
361  mask[i] |= convert->mask[i];
362  }
363  void bitwise_not() override {
364  for (size_t i=0; i<__kmp_num_proc_groups; ++i)
365  mask[i] = ~(mask[i]);
366  }
367  int begin() const override {
368  int retval = 0;
369  while (retval < end() && !is_set(retval))
370  ++retval;
371  return retval;
372  }
373  int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; }
374  int next(int previous) const override {
375  int retval = previous+1;
376  while (retval < end() && !is_set(retval))
377  ++retval;
378  return retval;
379  }
380  int set_system_affinity(bool abort_on_error) const override {
381  if (__kmp_num_proc_groups > 1) {
382  // Check for a valid mask.
383  GROUP_AFFINITY ga;
384  int group = get_proc_group();
385  if (group < 0) {
386  if (abort_on_error) {
387  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
388  }
389  return -1;
390  }
391  // Transform the bit vector into a GROUP_AFFINITY struct
392  // and make the system call to set affinity.
393  ga.Group = group;
394  ga.Mask = mask[group];
395  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
396 
397  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
398  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
399  DWORD error = GetLastError();
400  if (abort_on_error) {
401  __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
402  KMP_ERR( error ), __kmp_msg_null);
403  }
404  return error;
405  }
406  } else {
407  if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
408  DWORD error = GetLastError();
409  if (abort_on_error) {
410  __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
411  KMP_ERR( error ), __kmp_msg_null);
412  }
413  return error;
414  }
415  }
416  return 0;
417  }
418  int get_system_affinity(bool abort_on_error) override {
419  if (__kmp_num_proc_groups > 1) {
420  this->zero();
421  GROUP_AFFINITY ga;
422  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
423  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
424  DWORD error = GetLastError();
425  if (abort_on_error) {
426  __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
427  KMP_ERR(error), __kmp_msg_null);
428  }
429  return error;
430  }
431  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || (ga.Mask == 0)) {
432  return -1;
433  }
434  mask[ga.Group] = ga.Mask;
435  } else {
436  mask_t newMask, sysMask, retval;
437  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
438  DWORD error = GetLastError();
439  if (abort_on_error) {
440  __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
441  KMP_ERR(error), __kmp_msg_null);
442  }
443  return error;
444  }
445  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
446  if (! retval) {
447  DWORD error = GetLastError();
448  if (abort_on_error) {
449  __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
450  KMP_ERR(error), __kmp_msg_null);
451  }
452  return error;
453  }
454  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
455  if (! newMask) {
456  DWORD error = GetLastError();
457  if (abort_on_error) {
458  __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
459  KMP_ERR(error), __kmp_msg_null);
460  }
461  }
462  *mask = retval;
463  }
464  return 0;
465  }
466  int get_proc_group() const override {
467  int group = -1;
468  if (__kmp_num_proc_groups == 1) {
469  return 1;
470  }
471  for (int i = 0; i < __kmp_num_proc_groups; i++) {
472  if (mask[i] == 0)
473  continue;
474  if (group >= 0)
475  return -1;
476  group = i;
477  }
478  return group;
479  }
480  };
481  void determine_capable(const char* env_var) override {
482  __kmp_affinity_determine_capable(env_var);
483  }
484  void bind_thread(int which) override {
485  __kmp_affinity_bind_thread(which);
486  }
487  KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
488  void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
489  KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
490  void deallocate_mask_array(KMPAffinity::Mask* array) override {
491  Mask* windows_array = static_cast<Mask*>(array);
492  delete[] windows_array;
493  }
494  KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
495  Mask* windows_array = static_cast<Mask*>(array);
496  return &(windows_array[index]);
497  }
498  api_type get_api_type() const override { return NATIVE_OS; }
499 };
500 #endif /* KMP_OS_WINDOWS */
501 #endif /* KMP_AFFINITY_SUPPORTED */
502 
503 class Address {
504 public:
505  static const unsigned maxDepth = 32;
506  unsigned labels[maxDepth];
507  unsigned childNums[maxDepth];
508  unsigned depth;
509  unsigned leader;
510  Address(unsigned _depth)
511  : depth(_depth), leader(FALSE) {
512  }
513  Address &operator=(const Address &b) {
514  depth = b.depth;
515  for (unsigned i = 0; i < depth; i++) {
516  labels[i] = b.labels[i];
517  childNums[i] = b.childNums[i];
518  }
519  leader = FALSE;
520  return *this;
521  }
522  bool operator==(const Address &b) const {
523  if (depth != b.depth)
524  return false;
525  for (unsigned i = 0; i < depth; i++)
526  if(labels[i] != b.labels[i])
527  return false;
528  return true;
529  }
530  bool isClose(const Address &b, int level) const {
531  if (depth != b.depth)
532  return false;
533  if ((unsigned)level >= depth)
534  return true;
535  for (unsigned i = 0; i < (depth - level); i++)
536  if(labels[i] != b.labels[i])
537  return false;
538  return true;
539  }
540  bool operator!=(const Address &b) const {
541  return !operator==(b);
542  }
543  void print() const {
544  unsigned i;
545  printf("Depth: %u --- ", depth);
546  for(i=0;i<depth;i++) {
547  printf("%u ", labels[i]);
548  }
549  }
550 };
551 
552 class AddrUnsPair {
553 public:
554  Address first;
555  unsigned second;
556  AddrUnsPair(Address _first, unsigned _second)
557  : first(_first), second(_second) {
558  }
559  AddrUnsPair &operator=(const AddrUnsPair &b)
560  {
561  first = b.first;
562  second = b.second;
563  return *this;
564  }
565  void print() const {
566  printf("first = "); first.print();
567  printf(" --- second = %u", second);
568  }
569  bool operator==(const AddrUnsPair &b) const {
570  if(first != b.first) return false;
571  if(second != b.second) return false;
572  return true;
573  }
574  bool operator!=(const AddrUnsPair &b) const {
575  return !operator==(b);
576  }
577 };
578 
579 
580 static int
581 __kmp_affinity_cmp_Address_labels(const void *a, const void *b)
582 {
583  const Address *aa = (const Address *)&(((AddrUnsPair *)a)
584  ->first);
585  const Address *bb = (const Address *)&(((AddrUnsPair *)b)
586  ->first);
587  unsigned depth = aa->depth;
588  unsigned i;
589  KMP_DEBUG_ASSERT(depth == bb->depth);
590  for (i = 0; i < depth; i++) {
591  if (aa->labels[i] < bb->labels[i]) return -1;
592  if (aa->labels[i] > bb->labels[i]) return 1;
593  }
594  return 0;
595 }
596 
597 
604 public:
607  static const kmp_uint32 maxLeaves=4;
608  static const kmp_uint32 minBranch=4;
613  kmp_uint32 maxLevels;
614 
618  kmp_uint32 depth;
619  kmp_uint32 base_num_threads;
620  enum init_status { initialized=0, not_initialized=1, initializing=2 };
621  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
622  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
623 
627  kmp_uint32 *numPerLevel;
628  kmp_uint32 *skipPerLevel;
629 
630  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
631  int hier_depth = adr2os[0].first.depth;
632  int level = 0;
633  for (int i=hier_depth-1; i>=0; --i) {
634  int max = -1;
635  for (int j=0; j<num_addrs; ++j) {
636  int next = adr2os[j].first.childNums[i];
637  if (next > max) max = next;
638  }
639  numPerLevel[level] = max+1;
640  ++level;
641  }
642  }
643 
644  hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
645 
646  void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
647 
648  void init(AddrUnsPair *adr2os, int num_addrs)
649  {
650  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
651  if (bool_result == 0) { // Wait for initialization
652  while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
653  return;
654  }
655  KMP_DEBUG_ASSERT(bool_result==1);
656 
657  /* Added explicit initialization of the data fields here to prevent usage of dirty value
658  observed when static library is re-initialized multiple times (e.g. when
659  non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
660  depth = 1;
661  resizing = 0;
662  maxLevels = 7;
663  numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
664  skipPerLevel = &(numPerLevel[maxLevels]);
665  for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
666  numPerLevel[i] = 1;
667  skipPerLevel[i] = 1;
668  }
669 
670  // Sort table by physical ID
671  if (adr2os) {
672  qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
673  deriveLevels(adr2os, num_addrs);
674  }
675  else {
676  numPerLevel[0] = maxLeaves;
677  numPerLevel[1] = num_addrs/maxLeaves;
678  if (num_addrs%maxLeaves) numPerLevel[1]++;
679  }
680 
681  base_num_threads = num_addrs;
682  for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
683  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
684  depth++;
685 
686  kmp_uint32 branch = minBranch;
687  if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
688  if (branch<minBranch) branch=minBranch;
689  for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
690  while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
691  if (numPerLevel[d] & 1) numPerLevel[d]++;
692  numPerLevel[d] = numPerLevel[d] >> 1;
693  if (numPerLevel[d+1] == 1) depth++;
694  numPerLevel[d+1] = numPerLevel[d+1] << 1;
695  }
696  if(numPerLevel[0] == 1) {
697  branch = branch >> 1;
698  if (branch<4) branch = minBranch;
699  }
700  }
701 
702  for (kmp_uint32 i=1; i<depth; ++i)
703  skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
704  // Fill in hierarchy in the case of oversubscription
705  for (kmp_uint32 i=depth; i<maxLevels; ++i)
706  skipPerLevel[i] = 2*skipPerLevel[i-1];
707 
708  uninitialized = initialized; // One writer
709 
710  }
711 
712  // Resize the hierarchy if nproc changes to something larger than before
713  void resize(kmp_uint32 nproc)
714  {
715  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
716  while (bool_result == 0) { // someone else is trying to resize
717  KMP_CPU_PAUSE();
718  if (nproc <= base_num_threads) // happy with other thread's resize
719  return;
720  else // try to resize
721  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
722  }
723  KMP_DEBUG_ASSERT(bool_result!=0);
724  if (nproc <= base_num_threads) return; // happy with other thread's resize
725 
726  // Calculate new maxLevels
727  kmp_uint32 old_sz = skipPerLevel[depth-1];
728  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
729  // First see if old maxLevels is enough to contain new size
730  for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
731  skipPerLevel[i] = 2*skipPerLevel[i-1];
732  numPerLevel[i-1] *= 2;
733  old_sz *= 2;
734  depth++;
735  }
736  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
737  while (nproc > old_sz) {
738  old_sz *=2;
739  incs++;
740  depth++;
741  }
742  maxLevels += incs;
743 
744  // Resize arrays
745  kmp_uint32 *old_numPerLevel = numPerLevel;
746  kmp_uint32 *old_skipPerLevel = skipPerLevel;
747  numPerLevel = skipPerLevel = NULL;
748  numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
749  skipPerLevel = &(numPerLevel[maxLevels]);
750 
751  // Copy old elements from old arrays
752  for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
753  numPerLevel[i] = old_numPerLevel[i];
754  skipPerLevel[i] = old_skipPerLevel[i];
755  }
756 
757  // Init new elements in arrays to 1
758  for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
759  numPerLevel[i] = 1;
760  skipPerLevel[i] = 1;
761  }
762 
763  // Free old arrays
764  __kmp_free(old_numPerLevel);
765  }
766 
767  // Fill in oversubscription levels of hierarchy
768  for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
769  skipPerLevel[i] = 2*skipPerLevel[i-1];
770 
771  base_num_threads = nproc;
772  resizing = 0; // One writer
773 
774  }
775 };
776 #endif // KMP_AFFINITY_H
kmp_uint32 depth
Definition: kmp_affinity.h:618
kmp_uint32 * numPerLevel
Definition: kmp_affinity.h:627
kmp_uint32 maxLevels
Definition: kmp_affinity.h:613