19#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
21#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
22#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
23#elif KMP_HAVE_ALIGNED_ALLOC
24#define KMP_ALGIN_UP(val, alignment) \
25 (((val) + (alignment)-1) / (alignment) * (alignment))
26#define KMP_ALIGNED_ALLOCATE(size, alignment) \
27 aligned_alloc(alignment, KMP_ALGIN_UP(size, alignment))
28#define KMP_ALIGNED_FREE(ptr) free(ptr)
29#elif KMP_HAVE_POSIX_MEMALIGN
30static inline void *KMP_ALIGNED_ALLOCATE(
size_t size,
size_t alignment) {
32 int n = posix_memalign(&ptr, alignment, size);
40#define KMP_ALIGNED_FREE(ptr) free(ptr)
41#elif KMP_HAVE__ALIGNED_MALLOC
43#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
44#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
46#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
47#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
53#ifndef KMP_FOURLINE_ALIGN_CACHE
54#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
57#define KMP_OPTIMIZE_FOR_REDUCTIONS 0
59class distributedBarrier {
61 kmp_uint32
volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
65 std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
69 kmp_uint64
volatile KMP_FOURLINE_ALIGN_CACHE iter;
73 std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
76 void init(
size_t nthr);
77 void resize(
size_t nthr);
78 void computeGo(
size_t n);
79 void computeVarsForN(
size_t n);
86 IDEAL_CONTENTION = 16,
89 flags_s *flags[MAX_ITERS];
94 size_t KMP_ALIGN_CACHE num_threads;
95 size_t KMP_ALIGN_CACHE max_threads;
97 size_t KMP_ALIGN_CACHE num_gos;
99 size_t KMP_ALIGN_CACHE num_groups;
101 size_t KMP_ALIGN_CACHE threads_per_go;
102 bool KMP_ALIGN_CACHE fix_threads_per_go;
104 size_t KMP_ALIGN_CACHE threads_per_group;
106 size_t KMP_ALIGN_CACHE gos_per_group;
109 distributedBarrier() =
delete;
110 ~distributedBarrier() =
delete;
113 static distributedBarrier *allocate(
int nThreads) {
114 distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
115 sizeof(distributedBarrier), 4 * CACHE_LINE);
117 KMP_FATAL(MemoryAllocFailed);
121 for (
int i = 0; i < MAX_ITERS; ++i)
127 d->fix_threads_per_go =
false;
129 d->computeGo(nThreads);
134 static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
136 void update_num_threads(
size_t nthr) { init(nthr); }
138 bool need_resize(
size_t new_nthr) {
return (new_nthr > max_threads); }
139 size_t get_num_threads() {
return num_threads; }
140 kmp_uint64 go_release();