14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
29#include "kmp_dispatch_hier.h"
33#include "ompt-specific.h"
36#include "ompd-specific.h"
39#if OMP_PROFILING_SUPPORT
40#include "llvm/Support/TimeProfiler.h"
41static char *ProfileTraceFile =
nullptr;
45#define KMP_USE_PRCTL 0
61#if defined(KMP_GOMP_COMPAT)
62char const __kmp_version_alt_comp[] =
63 KMP_VERSION_PREFIX
"alternative compiler support: yes";
66char const __kmp_version_omp_api[] =
67 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
70char const __kmp_version_lock[] =
71 KMP_VERSION_PREFIX
"lock type: run time selectable";
74#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
79kmp_info_t __kmp_monitor;
84void __kmp_cleanup(
void);
86static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
88static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
89 kmp_internal_control_t *new_icvs,
91#if KMP_AFFINITY_SUPPORTED
92static void __kmp_partition_places(kmp_team_t *team,
93 int update_master_only = 0);
95static void __kmp_do_serial_initialize(
void);
96void __kmp_fork_barrier(
int gtid,
int tid);
97void __kmp_join_barrier(
int gtid);
98void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
99 kmp_internal_control_t *new_icvs,
ident_t *loc);
101#ifdef USE_LOAD_BALANCE
102static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
105static int __kmp_expand_threads(
int nNeed);
107static int __kmp_unregister_root_other_thread(
int gtid);
109static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
110kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
112void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
114void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
116static kmp_nested_nthreads_t *__kmp_override_nested_nth(kmp_info_t *thr,
118 kmp_nested_nthreads_t *new_nested_nth =
119 (kmp_nested_nthreads_t *)KMP_INTERNAL_MALLOC(
120 sizeof(kmp_nested_nthreads_t));
121 int new_size = level + thr->th.th_set_nested_nth_sz;
122 new_nested_nth->nth = (
int *)KMP_INTERNAL_MALLOC(new_size *
sizeof(
int));
123 for (
int i = 0; i < level + 1; ++i)
124 new_nested_nth->nth[i] = 0;
125 for (
int i = level + 1, j = 1; i < new_size; ++i, ++j)
126 new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j];
127 new_nested_nth->size = new_nested_nth->used = new_size;
128 return new_nested_nth;
134int __kmp_get_global_thread_id() {
136 kmp_info_t **other_threads;
144 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
145 __kmp_nth, __kmp_all_nth));
152 if (!TCR_4(__kmp_init_gtid))
156 if (TCR_4(__kmp_gtid_mode) >= 3) {
157 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
161 if (TCR_4(__kmp_gtid_mode) >= 2) {
162 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
163 return __kmp_gtid_get_specific();
165 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
167 stack_addr = (
char *)&stack_data;
168 other_threads = __kmp_threads;
181 for (i = 0; i < __kmp_threads_capacity; i++) {
183 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
187 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
188 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
192 if (stack_addr <= stack_base) {
193 size_t stack_diff = stack_base - stack_addr;
195 if (stack_diff <= stack_size) {
202 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
203 __kmp_gtid_get_specific() == i);
211 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
212 "thread, using TLS\n"));
213 i = __kmp_gtid_get_specific();
224 if (!TCR_SYNC_PTR(other_threads[i]))
229 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
230 KMP_FATAL(StackOverflow, i);
233 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
234 if (stack_addr > stack_base) {
235 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
236 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
237 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
240 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
241 stack_base - stack_addr);
245 if (__kmp_storage_map) {
246 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
247 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
248 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
249 other_threads[i]->th.th_info.ds.ds_stacksize,
250 "th_%d stack (refinement)", i);
255int __kmp_get_global_thread_id_reg() {
258 if (!__kmp_init_serial) {
262 if (TCR_4(__kmp_gtid_mode) >= 3) {
263 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
267 if (TCR_4(__kmp_gtid_mode) >= 2) {
268 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
269 gtid = __kmp_gtid_get_specific();
272 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
273 gtid = __kmp_get_global_thread_id();
277 if (gtid == KMP_GTID_DNE) {
279 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
280 "Registering a new gtid.\n"));
281 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
282 if (!__kmp_init_serial) {
283 __kmp_do_serial_initialize();
284 gtid = __kmp_gtid_get_specific();
286 gtid = __kmp_register_root(FALSE);
288 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
292 KMP_DEBUG_ASSERT(gtid >= 0);
298void __kmp_check_stack_overlap(kmp_info_t *th) {
300 char *stack_beg = NULL;
301 char *stack_end = NULL;
304 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
305 if (__kmp_storage_map) {
306 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
307 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
309 gtid = __kmp_gtid_from_thread(th);
311 if (gtid == KMP_GTID_MONITOR) {
312 __kmp_print_storage_map_gtid(
313 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
314 "th_%s stack (%s)",
"mon",
315 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
317 __kmp_print_storage_map_gtid(
318 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
319 "th_%d stack (%s)", gtid,
320 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
326 gtid = __kmp_gtid_from_thread(th);
327 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
329 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
330 if (stack_beg == NULL) {
331 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
332 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
335 for (f = 0; f < __kmp_threads_capacity; f++) {
336 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
338 if (f_th && f_th != th) {
339 char *other_stack_end =
340 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
341 char *other_stack_beg =
342 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
343 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
344 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
347 if (__kmp_storage_map)
348 __kmp_print_storage_map_gtid(
349 -1, other_stack_beg, other_stack_end,
350 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
351 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
353 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
359 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
364void __kmp_infinite_loop(
void) {
365 static int done = FALSE;
372#define MAX_MESSAGE 512
374void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
375 char const *format, ...) {
376 char buffer[MAX_MESSAGE];
379 va_start(ap, format);
380 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
381 p2, (
unsigned long)size, format);
382 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
383 __kmp_vprintf(kmp_err, buffer, ap);
384#if KMP_PRINT_DATA_PLACEMENT
387 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
388 if (__kmp_storage_map_verbose) {
389 node = __kmp_get_host_node(p1);
391 __kmp_storage_map_verbose = FALSE;
395 int localProc = __kmp_get_cpu_from_gtid(gtid);
397 const int page_size = KMP_GET_PAGE_SIZE();
399 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
400 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
402 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
405 __kmp_printf_no_lock(
" GTID %d\n", gtid);
414 (
char *)p1 += page_size;
415 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
416 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
420 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
421 (
char *)p1 + (page_size - 1),
422 __kmp_get_host_node(p1));
424 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
425 (
char *)p2 + (page_size - 1),
426 __kmp_get_host_node(p2));
432 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
435 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
440void __kmp_warn(
char const *format, ...) {
441 char buffer[MAX_MESSAGE];
444 if (__kmp_generate_warnings == kmp_warnings_off) {
448 va_start(ap, format);
450 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
451 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
452 __kmp_vprintf(kmp_err, buffer, ap);
453 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
458void __kmp_abort_process() {
460 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
462 if (__kmp_debug_buf) {
463 __kmp_dump_debug_buffer();
469 __kmp_global.g.g_abort = SIGABRT;
483 __kmp_unregister_library();
487 __kmp_infinite_loop();
488 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
492void __kmp_abort_thread(
void) {
495 __kmp_infinite_loop();
501static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
502 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
505 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
506 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
508 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
509 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
511 __kmp_print_storage_map_gtid(
512 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
513 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
515 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
516 &thr->th.th_bar[bs_plain_barrier + 1],
517 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
520 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
521 &thr->th.th_bar[bs_forkjoin_barrier + 1],
522 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
525#if KMP_FAST_REDUCTION_BARRIER
526 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
527 &thr->th.th_bar[bs_reduction_barrier + 1],
528 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
536static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
537 int team_id,
int num_thr) {
538 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
539 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
542 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
543 &team->t.t_bar[bs_last_barrier],
544 sizeof(kmp_balign_team_t) * bs_last_barrier,
545 "%s_%d.t_bar", header, team_id);
547 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
548 &team->t.t_bar[bs_plain_barrier + 1],
549 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
552 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
553 &team->t.t_bar[bs_forkjoin_barrier + 1],
554 sizeof(kmp_balign_team_t),
555 "%s_%d.t_bar[forkjoin]", header, team_id);
557#if KMP_FAST_REDUCTION_BARRIER
558 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
559 &team->t.t_bar[bs_reduction_barrier + 1],
560 sizeof(kmp_balign_team_t),
561 "%s_%d.t_bar[reduction]", header, team_id);
564 __kmp_print_storage_map_gtid(
565 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
566 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
568 __kmp_print_storage_map_gtid(
569 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
570 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
572 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
573 &team->t.t_disp_buffer[num_disp_buff],
574 sizeof(dispatch_shared_info_t) * num_disp_buff,
575 "%s_%d.t_disp_buffer", header, team_id);
578static void __kmp_init_allocator() {
579 __kmp_init_memkind();
580 __kmp_init_target_mem();
582static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
586#if ENABLE_LIBOMPTARGET
587static void __kmp_init_omptarget() {
588 __kmp_init_target_task();
597BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
602 case DLL_PROCESS_ATTACH:
603 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
607 case DLL_PROCESS_DETACH:
608 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
621 if (lpReserved == NULL)
622 __kmp_internal_end_library(__kmp_gtid_get_specific());
626 case DLL_THREAD_ATTACH:
627 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
633 case DLL_THREAD_DETACH:
634 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
636 __kmp_internal_end_thread(__kmp_gtid_get_specific());
647void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
648 int gtid = *gtid_ref;
649#ifdef BUILD_PARALLEL_ORDERED
650 kmp_team_t *team = __kmp_team_from_gtid(gtid);
653 if (__kmp_env_consistency_check) {
654 if (__kmp_threads[gtid]->th.th_root->r.r_active)
655#if KMP_USE_DYNAMIC_LOCK
656 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
658 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
661#ifdef BUILD_PARALLEL_ORDERED
662 if (!team->t.t_serialized) {
664 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
672void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
673 int gtid = *gtid_ref;
674#ifdef BUILD_PARALLEL_ORDERED
675 int tid = __kmp_tid_from_gtid(gtid);
676 kmp_team_t *team = __kmp_team_from_gtid(gtid);
679 if (__kmp_env_consistency_check) {
680 if (__kmp_threads[gtid]->th.th_root->r.r_active)
681 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
683#ifdef BUILD_PARALLEL_ORDERED
684 if (!team->t.t_serialized) {
689 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
699int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
704 if (!TCR_4(__kmp_init_parallel))
705 __kmp_parallel_initialize();
706 __kmp_resume_if_soft_paused();
708 th = __kmp_threads[gtid];
709 team = th->th.th_team;
712 th->th.th_ident = id_ref;
714 if (team->t.t_serialized) {
717 kmp_int32 old_this = th->th.th_local.this_construct;
719 ++th->th.th_local.this_construct;
723 if (team->t.t_construct == old_this) {
724 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
725 th->th.th_local.this_construct);
728 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
729 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
730 team->t.t_active_level == 1) {
732 __kmp_itt_metadata_single(id_ref);
737 if (__kmp_env_consistency_check) {
738 if (status && push_ws) {
739 __kmp_push_workshare(gtid, ct_psingle, id_ref);
741 __kmp_check_workshare(gtid, ct_psingle, id_ref);
746 __kmp_itt_single_start(gtid);
752void __kmp_exit_single(
int gtid) {
754 __kmp_itt_single_end(gtid);
756 if (__kmp_env_consistency_check)
757 __kmp_pop_workshare(gtid, ct_psingle, NULL);
766static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
767 int master_tid,
int set_nthreads,
771 KMP_DEBUG_ASSERT(__kmp_init_serial);
772 KMP_DEBUG_ASSERT(root && parent_team);
773 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
777 new_nthreads = set_nthreads;
778 if (!get__dynamic_2(parent_team, master_tid)) {
781#ifdef USE_LOAD_BALANCE
782 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
783 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
784 if (new_nthreads == 1) {
785 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
786 "reservation to 1 thread\n",
790 if (new_nthreads < set_nthreads) {
791 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
792 "reservation to %d threads\n",
793 master_tid, new_nthreads));
797 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
798 new_nthreads = __kmp_avail_proc - __kmp_nth +
799 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
800 if (new_nthreads <= 1) {
801 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
802 "reservation to 1 thread\n",
806 if (new_nthreads < set_nthreads) {
807 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
808 "reservation to %d threads\n",
809 master_tid, new_nthreads));
811 new_nthreads = set_nthreads;
813 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
814 if (set_nthreads > 2) {
815 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
816 new_nthreads = (new_nthreads % set_nthreads) + 1;
817 if (new_nthreads == 1) {
818 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
819 "reservation to 1 thread\n",
823 if (new_nthreads < set_nthreads) {
824 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
825 "reservation to %d threads\n",
826 master_tid, new_nthreads));
834 if (__kmp_nth + new_nthreads -
835 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
837 int tl_nthreads = __kmp_max_nth - __kmp_nth +
838 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
839 if (tl_nthreads <= 0) {
844 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
845 __kmp_reserve_warn = 1;
846 __kmp_msg(kmp_ms_warning,
847 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
848 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
850 if (tl_nthreads == 1) {
851 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
852 "reduced reservation to 1 thread\n",
856 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
857 "reservation to %d threads\n",
858 master_tid, tl_nthreads));
859 new_nthreads = tl_nthreads;
863 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
864 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
865 if (cg_nthreads + new_nthreads -
866 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
868 int tl_nthreads = max_cg_threads - cg_nthreads +
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
870 if (tl_nthreads <= 0) {
875 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
876 __kmp_reserve_warn = 1;
877 __kmp_msg(kmp_ms_warning,
878 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
879 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
881 if (tl_nthreads == 1) {
882 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
883 "reduced reservation to 1 thread\n",
887 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
888 "reservation to %d threads\n",
889 master_tid, tl_nthreads));
890 new_nthreads = tl_nthreads;
896 capacity = __kmp_threads_capacity;
897 if (TCR_PTR(__kmp_threads[0]) == NULL) {
903 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
904 capacity -= __kmp_hidden_helper_threads_num;
906 if (__kmp_nth + new_nthreads -
907 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
910 int slotsRequired = __kmp_nth + new_nthreads -
911 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
913 int slotsAdded = __kmp_expand_threads(slotsRequired);
914 if (slotsAdded < slotsRequired) {
916 new_nthreads -= (slotsRequired - slotsAdded);
917 KMP_ASSERT(new_nthreads >= 1);
920 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
921 __kmp_reserve_warn = 1;
922 if (__kmp_tp_cached) {
923 __kmp_msg(kmp_ms_warning,
924 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
925 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
926 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
928 __kmp_msg(kmp_ms_warning,
929 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
930 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
937 if (new_nthreads == 1) {
939 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
940 "dead roots and rechecking; requested %d threads\n",
941 __kmp_get_gtid(), set_nthreads));
943 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
945 __kmp_get_gtid(), new_nthreads, set_nthreads));
949 if (this_thr->th.th_nt_strict && new_nthreads < set_nthreads) {
950 __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
951 this_thr->th.th_nt_msg);
959static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
960 kmp_info_t *master_th,
int master_gtid,
961 int fork_teams_workers) {
965 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
966 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
970 master_th->th.th_info.ds.ds_tid = 0;
971 master_th->th.th_team = team;
972 master_th->th.th_team_nproc = team->t.t_nproc;
973 master_th->th.th_team_master = master_th;
974 master_th->th.th_team_serialized = FALSE;
975 master_th->th.th_dispatch = &team->t.t_dispatch[0];
978#if KMP_NESTED_HOT_TEAMS
980 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
983 int level = team->t.t_active_level - 1;
984 if (master_th->th.th_teams_microtask) {
985 if (master_th->th.th_teams_size.nteams > 1) {
989 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
990 master_th->th.th_teams_level == team->t.t_level) {
995 if (level < __kmp_hot_teams_max_level) {
996 if (hot_teams[level].hot_team) {
998 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1002 hot_teams[level].hot_team = team;
1003 hot_teams[level].hot_team_nth = team->t.t_nproc;
1010 use_hot_team = team == root->r.r_hot_team;
1012 if (!use_hot_team) {
1015 team->t.t_threads[0] = master_th;
1016 __kmp_initialize_info(master_th, team, 0, master_gtid);
1019 for (i = 1; i < team->t.t_nproc; i++) {
1022 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1023 team->t.t_threads[i] = thr;
1024 KMP_DEBUG_ASSERT(thr);
1025 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1027 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1028 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1029 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1030 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1031 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1032 team->t.t_bar[bs_plain_barrier].b_arrived));
1033 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1034 thr->th.th_teams_level = master_th->th.th_teams_level;
1035 thr->th.th_teams_size = master_th->th.th_teams_size;
1038 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1039 for (b = 0; b < bs_last_barrier; ++b) {
1040 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1041 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1043 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1049#if KMP_AFFINITY_SUPPORTED
1053 if (!fork_teams_workers) {
1054 __kmp_partition_places(team);
1058 if (team->t.t_nproc > 1 &&
1059 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1060 team->t.b->update_num_threads(team->t.t_nproc);
1061 __kmp_add_threads_to_team(team, team->t.t_nproc);
1066 if (__kmp_tasking_mode != tskm_immediate_exec) {
1068 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team->t.t_parent, master_th);
1071 (
"__kmp_fork_team_threads: Primary T#%d pushing task_team %p / team "
1072 "%p, new task_team %p / team %p\n",
1073 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
1074 team->t.t_parent, team->t.t_task_team[master_th->th.th_task_state],
1078 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1079 master_th->th.th_task_state);
1083 if (team->t.t_nproc > 1) {
1084 KMP_DEBUG_ASSERT(team->t.t_threads[1]->th.th_task_state == 0 ||
1085 team->t.t_threads[1]->th.th_task_state == 1);
1086 KMP_CHECK_UPDATE(master_th->th.th_task_state,
1087 team->t.t_threads[1]->th.th_task_state);
1089 master_th->th.th_task_state = 0;
1093 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1094 master_th->th.th_task_state);
1096 master_th->th.th_task_state = 0;
1100 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1101 for (i = 0; i < team->t.t_nproc; i++) {
1102 kmp_info_t *thr = team->t.t_threads[i];
1103 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1104 thr->th.th_prev_level != team->t.t_level) {
1105 team->t.t_display_affinity = 1;
1114#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1118inline static void propagateFPControl(kmp_team_t *team) {
1119 if (__kmp_inherit_fp_control) {
1120 kmp_int16 x87_fpu_control_word;
1124 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1125 __kmp_store_mxcsr(&mxcsr);
1126 mxcsr &= KMP_X86_MXCSR_MASK;
1137 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1138 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1141 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1145 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1151inline static void updateHWFPControl(kmp_team_t *team) {
1152 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1155 kmp_int16 x87_fpu_control_word;
1157 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1158 __kmp_store_mxcsr(&mxcsr);
1159 mxcsr &= KMP_X86_MXCSR_MASK;
1161 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1162 __kmp_clear_x87_fpu_status_word();
1163 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1166 if (team->t.t_mxcsr != mxcsr) {
1167 __kmp_load_mxcsr(&team->t.t_mxcsr);
1172#define propagateFPControl(x) ((void)0)
1173#define updateHWFPControl(x) ((void)0)
1176static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1181void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1182 kmp_info_t *this_thr;
1183 kmp_team_t *serial_team;
1185 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1192 if (!TCR_4(__kmp_init_parallel))
1193 __kmp_parallel_initialize();
1194 __kmp_resume_if_soft_paused();
1196 this_thr = __kmp_threads[global_tid];
1197 serial_team = this_thr->th.th_serial_team;
1200 KMP_DEBUG_ASSERT(serial_team);
1203 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1204 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1205 proc_bind = proc_bind_false;
1206 }
else if (proc_bind == proc_bind_default) {
1209 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1212 this_thr->th.th_set_proc_bind = proc_bind_default;
1215 this_thr->th.th_set_nproc = 0;
1218 ompt_data_t ompt_parallel_data = ompt_data_none;
1219 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1220 if (ompt_enabled.enabled &&
1221 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1223 ompt_task_info_t *parent_task_info;
1224 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1226 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1227 if (ompt_enabled.ompt_callback_parallel_begin) {
1230 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1231 &(parent_task_info->task_data), &(parent_task_info->frame),
1232 &ompt_parallel_data, team_size,
1233 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1238 if (this_thr->th.th_team != serial_team) {
1240 int level = this_thr->th.th_team->t.t_level;
1242 if (serial_team->t.t_serialized) {
1245 kmp_team_t *new_team;
1247 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1250 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1254 proc_bind, &this_thr->th.th_current_task->td_icvs,
1255 0 USE_NESTED_HOT_ARG(NULL));
1256 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1257 KMP_ASSERT(new_team);
1260 new_team->t.t_threads[0] = this_thr;
1261 new_team->t.t_parent = this_thr->th.th_team;
1262 serial_team = new_team;
1263 this_thr->th.th_serial_team = serial_team;
1267 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1268 global_tid, serial_team));
1276 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1277 global_tid, serial_team));
1281 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1282 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1283 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1284 serial_team->t.t_ident = loc;
1285 serial_team->t.t_serialized = 1;
1286 serial_team->t.t_nproc = 1;
1287 serial_team->t.t_parent = this_thr->th.th_team;
1288 if (this_thr->th.th_team->t.t_nested_nth)
1289 serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth;
1291 serial_team->t.t_nested_nth = &__kmp_nested_nth;
1293 serial_team->t.t_primary_task_state = this_thr->th.th_task_state;
1294 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1295 this_thr->th.th_team = serial_team;
1296 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1298 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1299 this_thr->th.th_current_task));
1300 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1301 this_thr->th.th_current_task->td_flags.executing = 0;
1303 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1308 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1309 &this_thr->th.th_current_task->td_parent->td_icvs);
1313 kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
1314 if (this_thr->th.th_team->t.t_nested_nth)
1315 nested_nth = this_thr->th.th_team->t.t_nested_nth;
1316 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1317 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1320 if (__kmp_nested_proc_bind.used &&
1321 (level + 1 < __kmp_nested_proc_bind.used)) {
1322 this_thr->th.th_current_task->td_icvs.proc_bind =
1323 __kmp_nested_proc_bind.bind_types[level + 1];
1327 serial_team->t.t_pkfn = (microtask_t)(~0);
1329 this_thr->th.th_info.ds.ds_tid = 0;
1332 this_thr->th.th_team_nproc = 1;
1333 this_thr->th.th_team_master = this_thr;
1334 this_thr->th.th_team_serialized = 1;
1335 this_thr->th.th_task_team = NULL;
1336 this_thr->th.th_task_state = 0;
1338 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1339 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1340 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1342 propagateFPControl(serial_team);
1345 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1346 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1347 serial_team->t.t_dispatch->th_disp_buffer =
1348 (dispatch_private_info_t *)__kmp_allocate(
1349 sizeof(dispatch_private_info_t));
1351 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1358 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1359 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1360 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1361 ++serial_team->t.t_serialized;
1362 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1365 int level = this_thr->th.th_team->t.t_level;
1369 kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
1370 if (serial_team->t.t_nested_nth)
1371 nested_nth = serial_team->t.t_nested_nth;
1372 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1373 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1376 serial_team->t.t_level++;
1377 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1378 "of serial team %p to %d\n",
1379 global_tid, serial_team, serial_team->t.t_level));
1382 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1384 dispatch_private_info_t *disp_buffer =
1385 (dispatch_private_info_t *)__kmp_allocate(
1386 sizeof(dispatch_private_info_t));
1387 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1388 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1390 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1393 __kmp_push_task_team_node(this_thr, serial_team);
1397 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1401 if (__kmp_display_affinity) {
1402 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1403 this_thr->th.th_prev_num_threads != 1) {
1405 __kmp_aux_display_affinity(global_tid, NULL);
1406 this_thr->th.th_prev_level = serial_team->t.t_level;
1407 this_thr->th.th_prev_num_threads = 1;
1411 if (__kmp_env_consistency_check)
1412 __kmp_push_parallel(global_tid, NULL);
1414 serial_team->t.ompt_team_info.master_return_address = codeptr;
1415 if (ompt_enabled.enabled &&
1416 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1417 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1418 OMPT_GET_FRAME_ADDRESS(0);
1420 ompt_lw_taskteam_t lw_taskteam;
1421 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1422 &ompt_parallel_data, codeptr);
1424 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1428 if (ompt_enabled.ompt_callback_implicit_task) {
1429 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1430 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1431 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1432 ompt_task_implicit);
1433 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1434 __kmp_tid_from_gtid(global_tid);
1438 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1439 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1440 OMPT_GET_FRAME_ADDRESS(0);
1446static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1447 microtask_t microtask,
int level,
1448 int teams_level, kmp_va_list ap) {
1449 return (master_th->th.th_teams_microtask && ap &&
1450 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1455static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1456 int teams_level, kmp_va_list ap) {
1457 return ((ap == NULL && active_level == 0) ||
1458 (ap && teams_level > 0 && teams_level == level));
1465__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1466 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1467 enum fork_context_e call_context, microtask_t microtask,
1468 launch_t invoker,
int master_set_numthreads,
int level,
1470 ompt_data_t ompt_parallel_data,
void *return_address,
1476 parent_team->t.t_ident = loc;
1477 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1478 parent_team->t.t_argc = argc;
1479 argv = (
void **)parent_team->t.t_argv;
1480 for (i = argc - 1; i >= 0; --i) {
1481 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1484 if (parent_team == master_th->th.th_serial_team) {
1487 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1489 if (call_context == fork_context_gnu) {
1492 parent_team->t.t_serialized--;
1497 parent_team->t.t_pkfn = microtask;
1502 void **exit_frame_p;
1503 ompt_data_t *implicit_task_data;
1504 ompt_lw_taskteam_t lw_taskteam;
1506 if (ompt_enabled.enabled) {
1507 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1508 &ompt_parallel_data, return_address);
1509 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1511 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1515 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1516 if (ompt_enabled.ompt_callback_implicit_task) {
1517 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1518 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1519 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1520 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1524 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1526 exit_frame_p = &dummy;
1532 parent_team->t.t_serialized--;
1535 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1536 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1537 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1546 if (ompt_enabled.enabled) {
1547 *exit_frame_p = NULL;
1548 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1549 if (ompt_enabled.ompt_callback_implicit_task) {
1550 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1551 ompt_scope_end, NULL, implicit_task_data, 1,
1552 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1554 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1555 __ompt_lw_taskteam_unlink(master_th);
1556 if (ompt_enabled.ompt_callback_parallel_end) {
1557 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1558 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1559 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1561 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1567 parent_team->t.t_pkfn = microtask;
1568 parent_team->t.t_invoke = invoker;
1569 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1570 parent_team->t.t_active_level++;
1571 parent_team->t.t_level++;
1572 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1579 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1582 if (ompt_enabled.enabled) {
1583 ompt_lw_taskteam_t lw_taskteam;
1584 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1586 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1591 if (master_set_numthreads) {
1592 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1594 kmp_info_t **other_threads = parent_team->t.t_threads;
1597 int old_proc = master_th->th.th_teams_size.nth;
1598 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1599 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1600 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1602 parent_team->t.t_nproc = master_set_numthreads;
1603 for (i = 0; i < master_set_numthreads; ++i) {
1604 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1608 master_th->th.th_set_nproc = 0;
1612 if (__kmp_debugging) {
1613 int nth = __kmp_omp_num_threads(loc);
1615 master_set_numthreads = nth;
1621 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1623 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1624 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1625 proc_bind = proc_bind_false;
1628 if (proc_bind == proc_bind_default) {
1629 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1635 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1636 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1637 master_th->th.th_current_task->td_icvs.proc_bind)) {
1638 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1641 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1643 if (proc_bind_icv != proc_bind_default &&
1644 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1645 kmp_info_t **other_threads = parent_team->t.t_threads;
1646 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1647 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1651 master_th->th.th_set_proc_bind = proc_bind_default;
1653#if USE_ITT_BUILD && USE_ITT_NOTIFY
1654 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1656 __kmp_forkjoin_frames_mode == 3 &&
1657 parent_team->t.t_active_level == 1
1658 && master_th->th.th_teams_size.nteams == 1) {
1659 kmp_uint64 tmp_time = __itt_get_timestamp();
1660 master_th->th.th_frame_time = tmp_time;
1661 parent_team->t.t_region_time = tmp_time;
1663 if (__itt_stack_caller_create_ptr) {
1664 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1666 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1669#if KMP_AFFINITY_SUPPORTED
1670 __kmp_partition_places(parent_team);
1673 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1674 "master_th=%p, gtid=%d\n",
1675 root, parent_team, master_th, gtid));
1676 __kmp_internal_fork(loc, gtid, parent_team);
1677 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1678 "master_th=%p, gtid=%d\n",
1679 root, parent_team, master_th, gtid));
1681 if (call_context == fork_context_gnu)
1685 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1686 parent_team->t.t_id, parent_team->t.t_pkfn));
1688 if (!parent_team->t.t_invoke(gtid)) {
1689 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1691 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1692 parent_team->t.t_id, parent_team->t.t_pkfn));
1695 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1702__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1703 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1704 kmp_info_t *master_th, kmp_team_t *parent_team,
1706 ompt_data_t *ompt_parallel_data,
void **return_address,
1707 ompt_data_t **parent_task_data,
1715#if KMP_OS_LINUX && \
1716 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1719 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1724 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1729 master_th->th.th_serial_team->t.t_pkfn = microtask;
1732 if (call_context == fork_context_intel) {
1734 master_th->th.th_serial_team->t.t_ident = loc;
1737 master_th->th.th_serial_team->t.t_level--;
1742 void **exit_frame_p;
1743 ompt_task_info_t *task_info;
1744 ompt_lw_taskteam_t lw_taskteam;
1746 if (ompt_enabled.enabled) {
1747 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1748 ompt_parallel_data, *return_address);
1750 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1752 task_info = OMPT_CUR_TASK_INFO(master_th);
1753 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1754 if (ompt_enabled.ompt_callback_implicit_task) {
1755 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1756 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1757 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1758 &(task_info->task_data), 1,
1759 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1763 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1765 exit_frame_p = &dummy;
1770 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1771 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1772 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1781 if (ompt_enabled.enabled) {
1782 *exit_frame_p = NULL;
1783 if (ompt_enabled.ompt_callback_implicit_task) {
1784 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1785 ompt_scope_end, NULL, &(task_info->task_data), 1,
1786 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1788 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1789 __ompt_lw_taskteam_unlink(master_th);
1790 if (ompt_enabled.ompt_callback_parallel_end) {
1791 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1792 ompt_parallel_data, *parent_task_data,
1793 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1795 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1798 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1799 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1800 team = master_th->th.th_team;
1802 team->t.t_invoke = invoker;
1803 __kmp_alloc_argv_entries(argc, team, TRUE);
1804 team->t.t_argc = argc;
1805 argv = (
void **)team->t.t_argv;
1806 for (i = argc - 1; i >= 0; --i)
1807 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1814 if (ompt_enabled.enabled) {
1815 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1816 if (ompt_enabled.ompt_callback_implicit_task) {
1817 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1818 ompt_scope_end, NULL, &(task_info->task_data), 0,
1819 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1821 if (ompt_enabled.ompt_callback_parallel_end) {
1822 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1823 ompt_parallel_data, *parent_task_data,
1824 OMPT_INVOKER(call_context) | ompt_parallel_league,
1827 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1832 for (i = argc - 1; i >= 0; --i)
1833 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1838 void **exit_frame_p;
1839 ompt_task_info_t *task_info;
1840 ompt_lw_taskteam_t lw_taskteam;
1841 ompt_data_t *implicit_task_data;
1843 if (ompt_enabled.enabled) {
1844 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1845 ompt_parallel_data, *return_address);
1846 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1848 task_info = OMPT_CUR_TASK_INFO(master_th);
1849 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1852 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1853 if (ompt_enabled.ompt_callback_implicit_task) {
1854 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1855 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1856 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1857 ompt_task_implicit);
1858 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1862 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1864 exit_frame_p = &dummy;
1869 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1870 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1871 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1880 if (ompt_enabled.enabled) {
1881 *exit_frame_p = NULL;
1882 if (ompt_enabled.ompt_callback_implicit_task) {
1883 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1884 ompt_scope_end, NULL, &(task_info->task_data), 1,
1885 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1888 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1889 __ompt_lw_taskteam_unlink(master_th);
1890 if (ompt_enabled.ompt_callback_parallel_end) {
1891 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1892 ompt_parallel_data, *parent_task_data,
1893 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1895 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1899 }
else if (call_context == fork_context_gnu) {
1901 if (ompt_enabled.enabled) {
1902 ompt_lw_taskteam_t lwt;
1903 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1906 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1907 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1913 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1916 KMP_ASSERT2(call_context < fork_context_last,
1917 "__kmp_serial_fork_call: unknown fork_context parameter");
1920 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1927int __kmp_fork_call(
ident_t *loc,
int gtid,
1928 enum fork_context_e call_context,
1929 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1934 int master_this_cons;
1936 kmp_team_t *parent_team;
1937 kmp_info_t *master_th;
1941 int master_set_numthreads;
1942 int task_thread_limit = 0;
1946#if KMP_NESTED_HOT_TEAMS
1947 kmp_hot_team_ptr_t **p_hot_teams;
1950 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1953 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1954 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1957 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1959 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1960 __kmp_stkpadding += (short)((kmp_int64)dummy);
1966 if (!TCR_4(__kmp_init_parallel))
1967 __kmp_parallel_initialize();
1968 __kmp_resume_if_soft_paused();
1973 master_th = __kmp_threads[gtid];
1975 parent_team = master_th->th.th_team;
1976 master_tid = master_th->th.th_info.ds.ds_tid;
1977 master_this_cons = master_th->th.th_local.this_construct;
1978 root = master_th->th.th_root;
1979 master_active = root->r.r_active;
1980 master_set_numthreads = master_th->th.th_set_nproc;
1982 master_th->th.th_current_task->td_icvs.task_thread_limit;
1985 ompt_data_t ompt_parallel_data = ompt_data_none;
1986 ompt_data_t *parent_task_data;
1987 ompt_frame_t *ompt_frame;
1988 void *return_address = NULL;
1990 if (ompt_enabled.enabled) {
1991 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1993 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1998 __kmp_assign_root_init_mask();
2001 level = parent_team->t.t_level;
2003 active_level = parent_team->t.t_active_level;
2005 teams_level = master_th->th.th_teams_level;
2006#if KMP_NESTED_HOT_TEAMS
2007 p_hot_teams = &master_th->th.th_hot_teams;
2008 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
2009 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
2010 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
2011 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
2013 (*p_hot_teams)[0].hot_team_nth = 1;
2018 if (ompt_enabled.enabled) {
2019 if (ompt_enabled.ompt_callback_parallel_begin) {
2020 int team_size = master_set_numthreads
2021 ? master_set_numthreads
2022 : get__nproc_2(parent_team, master_tid);
2023 int flags = OMPT_INVOKER(call_context) |
2024 ((microtask == (microtask_t)__kmp_teams_master)
2025 ? ompt_parallel_league
2026 : ompt_parallel_team);
2027 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
2028 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
2031 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2035 master_th->th.th_ident = loc;
2038 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
2039 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
2040 call_context, microtask, invoker,
2041 master_set_numthreads, level,
2043 ompt_parallel_data, return_address,
2052 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(parent_team, master_th);
2056 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2057 if ((!enter_teams &&
2058 (parent_team->t.t_active_level >=
2059 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2060 (__kmp_library == library_serial)) {
2061 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
2064 nthreads = master_set_numthreads
2065 ? master_set_numthreads
2067 : get__nproc_2(parent_team, master_tid);
2070 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2077 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2082 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2083 nthreads, enter_teams);
2084 if (nthreads == 1) {
2088 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2092 KMP_DEBUG_ASSERT(nthreads > 0);
2095 master_th->th.th_set_nproc = 0;
2097 if (nthreads == 1) {
2098 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2099 invoker, master_th, parent_team,
2101 &ompt_parallel_data, &return_address,
2109 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2110 "curtask=%p, curtask_max_aclevel=%d\n",
2111 parent_team->t.t_active_level, master_th,
2112 master_th->th.th_current_task,
2113 master_th->th.th_current_task->td_icvs.max_active_levels));
2117 master_th->th.th_current_task->td_flags.executing = 0;
2119 if (!master_th->th.th_teams_microtask || level > teams_level) {
2121 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2125 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2126 kmp_nested_nthreads_t *nested_nth = NULL;
2127 if (!master_th->th.th_set_nested_nth &&
2128 (level + 1 < parent_team->t.t_nested_nth->used) &&
2129 (parent_team->t.t_nested_nth->nth[level + 1] != nthreads_icv)) {
2130 nthreads_icv = parent_team->t.t_nested_nth->nth[level + 1];
2131 }
else if (master_th->th.th_set_nested_nth) {
2132 nested_nth = __kmp_override_nested_nth(master_th, level);
2133 if ((level + 1 < nested_nth->used) &&
2134 (nested_nth->nth[level + 1] != nthreads_icv))
2135 nthreads_icv = nested_nth->nth[level + 1];
2143 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2145 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2146 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2147 proc_bind = proc_bind_false;
2151 if (proc_bind == proc_bind_default) {
2152 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2155 if (master_th->th.th_teams_microtask &&
2156 microtask == (microtask_t)__kmp_teams_master) {
2157 proc_bind = __kmp_teams_proc_bind;
2163 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2164 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2165 master_th->th.th_current_task->td_icvs.proc_bind)) {
2168 if (!master_th->th.th_teams_microtask ||
2169 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2170 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2175 master_th->th.th_set_proc_bind = proc_bind_default;
2177 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2178 kmp_internal_control_t new_icvs;
2179 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2180 new_icvs.next = NULL;
2181 if (nthreads_icv > 0) {
2182 new_icvs.nproc = nthreads_icv;
2184 if (proc_bind_icv != proc_bind_default) {
2185 new_icvs.proc_bind = proc_bind_icv;
2189 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2190 team = __kmp_allocate_team(root, nthreads, nthreads,
2194 proc_bind, &new_icvs,
2195 argc USE_NESTED_HOT_ARG(master_th));
2196 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2197 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2200 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2201 team = __kmp_allocate_team(root, nthreads, nthreads,
2206 &master_th->th.th_current_task->td_icvs,
2207 argc USE_NESTED_HOT_ARG(master_th));
2208 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2209 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2210 &master_th->th.th_current_task->td_icvs);
2213 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2216 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2217 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2218 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2219 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2220 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2222 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2225 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2227 if (!master_th->th.th_teams_microtask || level > teams_level) {
2228 int new_level = parent_team->t.t_level + 1;
2229 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2230 new_level = parent_team->t.t_active_level + 1;
2231 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2234 int new_level = parent_team->t.t_level;
2235 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2236 new_level = parent_team->t.t_active_level;
2237 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2239 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2241 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2243 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2244 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2247 if (team->t.t_nested_nth &&
2248 team->t.t_nested_nth != parent_team->t.t_nested_nth) {
2249 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
2250 KMP_INTERNAL_FREE(team->t.t_nested_nth);
2251 team->t.t_nested_nth = NULL;
2253 team->t.t_nested_nth = parent_team->t.t_nested_nth;
2254 if (master_th->th.th_set_nested_nth) {
2256 nested_nth = __kmp_override_nested_nth(master_th, level);
2257 team->t.t_nested_nth = nested_nth;
2258 KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth);
2259 master_th->th.th_set_nested_nth = NULL;
2260 master_th->th.th_set_nested_nth_sz = 0;
2261 master_th->th.th_nt_strict =
false;
2265 propagateFPControl(team);
2267 if (ompd_state & OMPD_ENABLE_BP)
2268 ompd_bp_parallel_begin();
2273 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2274 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2276 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2277 (team->t.t_master_tid == 0 &&
2278 (team->t.t_parent == root->r.r_root_team ||
2279 team->t.t_parent->t.t_serialized)));
2283 argv = (
void **)team->t.t_argv;
2285 for (i = argc - 1; i >= 0; --i) {
2286 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2287 KMP_CHECK_UPDATE(*argv, new_argv);
2291 for (i = 0; i < argc; ++i) {
2293 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2298 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2299 if (!root->r.r_active)
2300 root->r.r_active = TRUE;
2302 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2303 __kmp_setup_icv_copy(team, nthreads,
2304 &master_th->th.th_current_task->td_icvs, loc);
2307 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2310 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2313 if (team->t.t_active_level == 1
2314 && !master_th->th.th_teams_microtask) {
2316 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2317 (__kmp_forkjoin_frames_mode == 3 ||
2318 __kmp_forkjoin_frames_mode == 1)) {
2319 kmp_uint64 tmp_time = 0;
2320 if (__itt_get_timestamp_ptr)
2321 tmp_time = __itt_get_timestamp();
2323 master_th->th.th_frame_time = tmp_time;
2324 if (__kmp_forkjoin_frames_mode == 3)
2325 team->t.t_region_time = tmp_time;
2329 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2330 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2332 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2338 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2341 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2342 root, team, master_th, gtid));
2345 if (__itt_stack_caller_create_ptr) {
2348 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2349 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2350 }
else if (parent_team->t.t_serialized) {
2355 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2356 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2364 __kmp_internal_fork(loc, gtid, team);
2365 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2366 "master_th=%p, gtid=%d\n",
2367 root, team, master_th, gtid));
2370 if (call_context == fork_context_gnu) {
2371 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2376 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2377 team->t.t_id, team->t.t_pkfn));
2380#if KMP_STATS_ENABLED
2384 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2388 if (!team->t.t_invoke(gtid)) {
2389 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2392#if KMP_STATS_ENABLED
2395 KMP_SET_THREAD_STATE(previous_state);
2399 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2400 team->t.t_id, team->t.t_pkfn));
2403 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2405 if (ompt_enabled.enabled) {
2406 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2414static inline void __kmp_join_restore_state(kmp_info_t *thread,
2417 thread->th.ompt_thread_info.state =
2418 ((team->t.t_serialized) ? ompt_state_work_serial
2419 : ompt_state_work_parallel);
2422static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2423 kmp_team_t *team, ompt_data_t *parallel_data,
2424 int flags,
void *codeptr) {
2425 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2426 if (ompt_enabled.ompt_callback_parallel_end) {
2427 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2428 parallel_data, &(task_info->task_data), flags, codeptr);
2431 task_info->frame.enter_frame = ompt_data_none;
2432 __kmp_join_restore_state(thread, team);
2436void __kmp_join_call(
ident_t *loc,
int gtid
2439 enum fork_context_e fork_context
2443 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2445 kmp_team_t *parent_team;
2446 kmp_info_t *master_th;
2450 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2453 master_th = __kmp_threads[gtid];
2454 root = master_th->th.th_root;
2455 team = master_th->th.th_team;
2456 parent_team = team->t.t_parent;
2458 master_th->th.th_ident = loc;
2461 void *team_microtask = (
void *)team->t.t_pkfn;
2465 if (ompt_enabled.enabled &&
2466 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2467 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2472 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2473 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2474 "th_task_team = %p\n",
2475 __kmp_gtid_from_thread(master_th), team,
2476 team->t.t_task_team[master_th->th.th_task_state],
2477 master_th->th.th_task_team));
2478 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, master_th);
2482 if (team->t.t_serialized) {
2483 if (master_th->th.th_teams_microtask) {
2485 int level = team->t.t_level;
2486 int tlevel = master_th->th.th_teams_level;
2487 if (level == tlevel) {
2491 }
else if (level == tlevel + 1) {
2495 team->t.t_serialized++;
2501 if (ompt_enabled.enabled) {
2502 if (fork_context == fork_context_gnu) {
2503 __ompt_lw_taskteam_unlink(master_th);
2505 __kmp_join_restore_state(master_th, parent_team);
2512 master_active = team->t.t_master_active;
2517 __kmp_internal_join(loc, gtid, team);
2519 if (__itt_stack_caller_create_ptr) {
2520 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2522 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2523 team->t.t_stack_id = NULL;
2527 master_th->th.th_task_state =
2530 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2531 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2535 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2536 parent_team->t.t_stack_id = NULL;
2544 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2545 void *codeptr = team->t.ompt_team_info.master_return_address;
2550 if (team->t.t_active_level == 1 &&
2551 (!master_th->th.th_teams_microtask ||
2552 master_th->th.th_teams_size.nteams == 1)) {
2553 master_th->th.th_ident = loc;
2556 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2557 __kmp_forkjoin_frames_mode == 3)
2558 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2559 master_th->th.th_frame_time, 0, loc,
2560 master_th->th.th_team_nproc, 1);
2561 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2562 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2563 __kmp_itt_region_joined(gtid);
2567#if KMP_AFFINITY_SUPPORTED
2570 master_th->th.th_first_place = team->t.t_first_place;
2571 master_th->th.th_last_place = team->t.t_last_place;
2575 if (master_th->th.th_teams_microtask && !exit_teams &&
2576 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2577 team->t.t_level == master_th->th.th_teams_level + 1) {
2582 ompt_data_t ompt_parallel_data = ompt_data_none;
2583 if (ompt_enabled.enabled) {
2584 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2585 if (ompt_enabled.ompt_callback_implicit_task) {
2586 int ompt_team_size = team->t.t_nproc;
2587 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2588 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2589 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2591 task_info->frame.exit_frame = ompt_data_none;
2592 task_info->task_data = ompt_data_none;
2593 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2594 __ompt_lw_taskteam_unlink(master_th);
2599 team->t.t_active_level--;
2600 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2606 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2607 int old_num = master_th->th.th_team_nproc;
2608 int new_num = master_th->th.th_teams_size.nth;
2609 kmp_info_t **other_threads = team->t.t_threads;
2610 team->t.t_nproc = new_num;
2611 for (
int i = 0; i < old_num; ++i) {
2612 other_threads[i]->th.th_team_nproc = new_num;
2615 for (
int i = old_num; i < new_num; ++i) {
2617 KMP_DEBUG_ASSERT(other_threads[i]);
2618 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2619 for (
int b = 0; b < bs_last_barrier; ++b) {
2620 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2621 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2623 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2626 if (__kmp_tasking_mode != tskm_immediate_exec) {
2628 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2634 if (ompt_enabled.enabled) {
2635 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2636 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2644 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2645 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2647 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2652 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2654 if (!master_th->th.th_teams_microtask ||
2655 team->t.t_level > master_th->th.th_teams_level) {
2657 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2659 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2662 if (ompt_enabled.enabled) {
2663 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2664 if (ompt_enabled.ompt_callback_implicit_task) {
2665 int flags = (team_microtask == (
void *)__kmp_teams_master)
2667 : ompt_task_implicit;
2668 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2669 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2670 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2671 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2673 task_info->frame.exit_frame = ompt_data_none;
2674 task_info->task_data = ompt_data_none;
2678 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2680 __kmp_pop_current_task_from_thread(master_th);
2682 master_th->th.th_def_allocator = team->t.t_def_allocator;
2685 if (ompd_state & OMPD_ENABLE_BP)
2686 ompd_bp_parallel_end();
2688 updateHWFPControl(team);
2690 if (root->r.r_active != master_active)
2691 root->r.r_active = master_active;
2693 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2701 master_th->th.th_team = parent_team;
2702 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2703 master_th->th.th_team_master = parent_team->t.t_threads[0];
2704 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2707 if (parent_team->t.t_serialized &&
2708 parent_team != master_th->th.th_serial_team &&
2709 parent_team != root->r.r_root_team) {
2710 __kmp_free_team(root,
2711 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2712 master_th->th.th_serial_team = parent_team;
2715 if (__kmp_tasking_mode != tskm_immediate_exec) {
2717 KMP_DEBUG_ASSERT(team->t.t_primary_task_state == 0 ||
2718 team->t.t_primary_task_state == 1);
2719 master_th->th.th_task_state = (kmp_uint8)team->t.t_primary_task_state;
2722 master_th->th.th_task_team =
2723 parent_team->t.t_task_team[master_th->th.th_task_state];
2725 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2726 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2733 master_th->th.th_current_task->td_flags.executing = 1;
2735 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2737#if KMP_AFFINITY_SUPPORTED
2738 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2739 __kmp_reset_root_init_mask(gtid);
2744 OMPT_INVOKER(fork_context) |
2745 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2746 : ompt_parallel_team);
2747 if (ompt_enabled.enabled) {
2748 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2754 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2759void __kmp_save_internal_controls(kmp_info_t *thread) {
2761 if (thread->th.th_team != thread->th.th_serial_team) {
2764 if (thread->th.th_team->t.t_serialized > 1) {
2767 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2770 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2771 thread->th.th_team->t.t_serialized) {
2776 kmp_internal_control_t *control =
2777 (kmp_internal_control_t *)__kmp_allocate(
2778 sizeof(kmp_internal_control_t));
2780 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2782 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2784 control->next = thread->th.th_team->t.t_control_stack_top;
2785 thread->th.th_team->t.t_control_stack_top = control;
2791void __kmp_set_num_threads(
int new_nth,
int gtid) {
2795 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2796 KMP_DEBUG_ASSERT(__kmp_init_serial);
2800 else if (new_nth > __kmp_max_nth)
2801 new_nth = __kmp_max_nth;
2804 thread = __kmp_threads[gtid];
2805 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2808 __kmp_save_internal_controls(thread);
2810 set__nproc(thread, new_nth);
2815 root = thread->th.th_root;
2816 if (__kmp_init_parallel && (!root->r.r_active) &&
2817 (root->r.r_hot_team->t.t_nproc > new_nth)
2818#
if KMP_NESTED_HOT_TEAMS
2819 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2822 kmp_team_t *hot_team = root->r.r_hot_team;
2825 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2827 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2828 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2831 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2832 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2833 if (__kmp_tasking_mode != tskm_immediate_exec) {
2836 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2838 __kmp_free_thread(hot_team->t.t_threads[f]);
2839 hot_team->t.t_threads[f] = NULL;
2841 hot_team->t.t_nproc = new_nth;
2842#if KMP_NESTED_HOT_TEAMS
2843 if (thread->th.th_hot_teams) {
2844 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2845 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2849 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2850 hot_team->t.b->update_num_threads(new_nth);
2851 __kmp_add_threads_to_team(hot_team, new_nth);
2854 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2857 for (f = 0; f < new_nth; f++) {
2858 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2859 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2862 hot_team->t.t_size_changed = -1;
2867void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2870 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2872 gtid, max_active_levels));
2873 KMP_DEBUG_ASSERT(__kmp_init_serial);
2876 if (max_active_levels < 0) {
2877 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2882 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2883 "max_active_levels for thread %d = (%d)\n",
2884 gtid, max_active_levels));
2887 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2892 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2893 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2894 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2900 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2901 "max_active_levels for thread %d = (%d)\n",
2902 gtid, max_active_levels));
2904 thread = __kmp_threads[gtid];
2906 __kmp_save_internal_controls(thread);
2908 set__max_active_levels(thread, max_active_levels);
2912int __kmp_get_max_active_levels(
int gtid) {
2915 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2916 KMP_DEBUG_ASSERT(__kmp_init_serial);
2918 thread = __kmp_threads[gtid];
2919 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2920 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2921 "curtask_maxaclevel=%d\n",
2922 gtid, thread->th.th_current_task,
2923 thread->th.th_current_task->td_icvs.max_active_levels));
2924 return thread->th.th_current_task->td_icvs.max_active_levels;
2928void __kmp_set_num_teams(
int num_teams) {
2930 __kmp_nteams = num_teams;
2932int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2934void __kmp_set_teams_thread_limit(
int limit) {
2936 __kmp_teams_thread_limit = limit;
2938int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2940KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2941KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2944void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2946 kmp_sched_t orig_kind;
2949 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2950 gtid, (
int)kind, chunk));
2951 KMP_DEBUG_ASSERT(__kmp_init_serial);
2958 kind = __kmp_sched_without_mods(kind);
2960 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2961 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2963 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2964 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2966 kind = kmp_sched_default;
2970 thread = __kmp_threads[gtid];
2972 __kmp_save_internal_controls(thread);
2974 if (kind < kmp_sched_upper_std) {
2975 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2978 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2980 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2981 __kmp_sch_map[kind - kmp_sched_lower - 1];
2986 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2987 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2988 kmp_sched_lower - 2];
2990 __kmp_sched_apply_mods_intkind(
2991 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2992 if (kind == kmp_sched_auto || chunk < 1) {
2994 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2996 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
3001void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
3005 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
3006 KMP_DEBUG_ASSERT(__kmp_init_serial);
3008 thread = __kmp_threads[gtid];
3010 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3011 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
3013 case kmp_sch_static_greedy:
3014 case kmp_sch_static_balanced:
3015 *kind = kmp_sched_static;
3016 __kmp_sched_apply_mods_stdkind(kind, th_type);
3019 case kmp_sch_static_chunked:
3020 *kind = kmp_sched_static;
3022 case kmp_sch_dynamic_chunked:
3023 *kind = kmp_sched_dynamic;
3026 case kmp_sch_guided_iterative_chunked:
3027 case kmp_sch_guided_analytical_chunked:
3028 *kind = kmp_sched_guided;
3031 *kind = kmp_sched_auto;
3033 case kmp_sch_trapezoidal:
3034 *kind = kmp_sched_trapezoidal;
3036#if KMP_STATIC_STEAL_ENABLED
3037 case kmp_sch_static_steal:
3038 *kind = kmp_sched_static_steal;
3042 KMP_FATAL(UnknownSchedulingType, th_type);
3045 __kmp_sched_apply_mods_stdkind(kind, th_type);
3046 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3049int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3055 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3056 KMP_DEBUG_ASSERT(__kmp_init_serial);
3063 thr = __kmp_threads[gtid];
3064 team = thr->th.th_team;
3065 ii = team->t.t_level;
3069 if (thr->th.th_teams_microtask) {
3071 int tlevel = thr->th.th_teams_level;
3074 KMP_DEBUG_ASSERT(ii >= tlevel);
3086 return __kmp_tid_from_gtid(gtid);
3088 dd = team->t.t_serialized;
3090 while (ii > level) {
3091 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3093 if ((team->t.t_serialized) && (!dd)) {
3094 team = team->t.t_parent;
3098 team = team->t.t_parent;
3099 dd = team->t.t_serialized;
3104 return (dd > 1) ? (0) : (team->t.t_master_tid);
3107int __kmp_get_team_size(
int gtid,
int level) {
3113 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3114 KMP_DEBUG_ASSERT(__kmp_init_serial);
3121 thr = __kmp_threads[gtid];
3122 team = thr->th.th_team;
3123 ii = team->t.t_level;
3127 if (thr->th.th_teams_microtask) {
3129 int tlevel = thr->th.th_teams_level;
3132 KMP_DEBUG_ASSERT(ii >= tlevel);
3143 while (ii > level) {
3144 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3146 if (team->t.t_serialized && (!dd)) {
3147 team = team->t.t_parent;
3151 team = team->t.t_parent;
3156 return team->t.t_nproc;
3159kmp_r_sched_t __kmp_get_schedule_global() {
3164 kmp_r_sched_t r_sched;
3170 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3171 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3174 r_sched.r_sched_type = __kmp_static;
3177 r_sched.r_sched_type = __kmp_guided;
3179 r_sched.r_sched_type = __kmp_sched;
3181 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3183 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3185 r_sched.chunk = KMP_DEFAULT_CHUNK;
3187 r_sched.chunk = __kmp_chunk;
3195static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3197 KMP_DEBUG_ASSERT(team);
3198 if (!realloc || argc > team->t.t_max_argc) {
3200 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3201 "current entries=%d\n",
3202 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3204 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3205 __kmp_free((
void *)team->t.t_argv);
3207 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3209 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3210 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3212 team->t.t_id, team->t.t_max_argc));
3213 team->t.t_argv = &team->t.t_inline_argv[0];
3214 if (__kmp_storage_map) {
3215 __kmp_print_storage_map_gtid(
3216 -1, &team->t.t_inline_argv[0],
3217 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3218 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3223 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3224 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3226 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3228 team->t.t_id, team->t.t_max_argc));
3230 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3231 if (__kmp_storage_map) {
3232 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3233 &team->t.t_argv[team->t.t_max_argc],
3234 sizeof(
void *) * team->t.t_max_argc,
3235 "team_%d.t_argv", team->t.t_id);
3241static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3243 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3245 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3246 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3247 sizeof(dispatch_shared_info_t) * num_disp_buff);
3248 team->t.t_dispatch =
3249 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3250 team->t.t_implicit_task_taskdata =
3251 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3252 team->t.t_max_nproc = max_nth;
3255 for (i = 0; i < num_disp_buff; ++i) {
3256 team->t.t_disp_buffer[i].buffer_index = i;
3257 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3261static void __kmp_free_team_arrays(kmp_team_t *team) {
3264 for (i = 0; i < team->t.t_max_nproc; ++i) {
3265 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3266 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3267 team->t.t_dispatch[i].th_disp_buffer = NULL;
3270#if KMP_USE_HIER_SCHED
3271 __kmp_dispatch_free_hierarchies(team);
3273 __kmp_free(team->t.t_threads);
3274 __kmp_free(team->t.t_disp_buffer);
3275 __kmp_free(team->t.t_dispatch);
3276 __kmp_free(team->t.t_implicit_task_taskdata);
3277 team->t.t_threads = NULL;
3278 team->t.t_disp_buffer = NULL;
3279 team->t.t_dispatch = NULL;
3280 team->t.t_implicit_task_taskdata = 0;
3283static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3284 kmp_info_t **oldThreads = team->t.t_threads;
3286 __kmp_free(team->t.t_disp_buffer);
3287 __kmp_free(team->t.t_dispatch);
3288 __kmp_free(team->t.t_implicit_task_taskdata);
3289 __kmp_allocate_team_arrays(team, max_nth);
3291 KMP_MEMCPY(team->t.t_threads, oldThreads,
3292 team->t.t_nproc *
sizeof(kmp_info_t *));
3294 __kmp_free(oldThreads);
3297static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3299 kmp_r_sched_t r_sched =
3300 __kmp_get_schedule_global();
3302 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3304 kmp_internal_control_t g_icvs = {
3306 (kmp_int8)__kmp_global.g.g_dynamic,
3308 (kmp_int8)__kmp_env_blocktime,
3310 __kmp_dflt_blocktime,
3315 __kmp_dflt_team_nth,
3321 __kmp_dflt_max_active_levels,
3325 __kmp_nested_proc_bind.bind_types[0],
3326 __kmp_default_device,
3333static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3335 kmp_internal_control_t gx_icvs;
3336 gx_icvs.serial_nesting_level =
3338 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3339 gx_icvs.next = NULL;
3344static void __kmp_initialize_root(kmp_root_t *root) {
3346 kmp_team_t *root_team;
3347 kmp_team_t *hot_team;
3348 int hot_team_max_nth;
3349 kmp_r_sched_t r_sched =
3350 __kmp_get_schedule_global();
3351 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3352 KMP_DEBUG_ASSERT(root);
3353 KMP_ASSERT(!root->r.r_begin);
3356 __kmp_init_lock(&root->r.r_begin_lock);
3357 root->r.r_begin = FALSE;
3358 root->r.r_active = FALSE;
3359 root->r.r_in_parallel = 0;
3360 root->r.r_blocktime = __kmp_dflt_blocktime;
3361#if KMP_AFFINITY_SUPPORTED
3362 root->r.r_affinity_assigned = FALSE;
3367 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3370 __kmp_allocate_team(root,
3376 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3378 USE_NESTED_HOT_ARG(NULL)
3383 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3386 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3388 root->r.r_root_team = root_team;
3389 root_team->t.t_control_stack_top = NULL;
3392 root_team->t.t_threads[0] = NULL;
3393 root_team->t.t_nproc = 1;
3394 root_team->t.t_serialized = 1;
3396 root_team->t.t_sched.sched = r_sched.sched;
3397 root_team->t.t_nested_nth = &__kmp_nested_nth;
3400 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3401 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3405 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3408 __kmp_allocate_team(root,
3410 __kmp_dflt_team_nth_ub * 2,
3414 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3416 USE_NESTED_HOT_ARG(NULL)
3418 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3420 root->r.r_hot_team = hot_team;
3421 root_team->t.t_control_stack_top = NULL;
3424 hot_team->t.t_parent = root_team;
3427 hot_team_max_nth = hot_team->t.t_max_nproc;
3428 for (f = 0; f < hot_team_max_nth; ++f) {
3429 hot_team->t.t_threads[f] = NULL;
3431 hot_team->t.t_nproc = 1;
3433 hot_team->t.t_sched.sched = r_sched.sched;
3434 hot_team->t.t_size_changed = 0;
3435 hot_team->t.t_nested_nth = &__kmp_nested_nth;
3440typedef struct kmp_team_list_item {
3441 kmp_team_p
const *entry;
3442 struct kmp_team_list_item *next;
3443} kmp_team_list_item_t;
3444typedef kmp_team_list_item_t *kmp_team_list_t;
3446static void __kmp_print_structure_team_accum(
3447 kmp_team_list_t list,
3448 kmp_team_p
const *team
3458 KMP_DEBUG_ASSERT(list != NULL);
3463 __kmp_print_structure_team_accum(list, team->t.t_parent);
3464 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3468 while (l->next != NULL && l->entry != team) {
3471 if (l->next != NULL) {
3477 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3483 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3484 sizeof(kmp_team_list_item_t));
3491static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3494 __kmp_printf(
"%s", title);
3496 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3498 __kmp_printf(
" - (nil)\n");
3502static void __kmp_print_structure_thread(
char const *title,
3503 kmp_info_p
const *thread) {
3504 __kmp_printf(
"%s", title);
3505 if (thread != NULL) {
3506 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3508 __kmp_printf(
" - (nil)\n");
3512void __kmp_print_structure(
void) {
3514 kmp_team_list_t list;
3518 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3522 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3523 "Table\n------------------------------\n");
3526 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3527 __kmp_printf(
"%2d", gtid);
3528 if (__kmp_threads != NULL) {
3529 __kmp_printf(
" %p", __kmp_threads[gtid]);
3531 if (__kmp_root != NULL) {
3532 __kmp_printf(
" %p", __kmp_root[gtid]);
3539 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3541 if (__kmp_threads != NULL) {
3543 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3544 kmp_info_t
const *thread = __kmp_threads[gtid];
3545 if (thread != NULL) {
3546 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3547 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3548 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3549 __kmp_print_structure_team(
" Serial Team: ",
3550 thread->th.th_serial_team);
3551 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3552 __kmp_print_structure_thread(
" Primary: ",
3553 thread->th.th_team_master);
3554 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3555 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3556 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3557 __kmp_print_structure_thread(
" Next in pool: ",
3558 thread->th.th_next_pool);
3560 __kmp_print_structure_team_accum(list, thread->th.th_team);
3561 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3565 __kmp_printf(
"Threads array is not allocated.\n");
3569 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3571 if (__kmp_root != NULL) {
3573 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3574 kmp_root_t
const *root = __kmp_root[gtid];
3576 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3577 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3578 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3579 __kmp_print_structure_thread(
" Uber Thread: ",
3580 root->r.r_uber_thread);
3581 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3582 __kmp_printf(
" In Parallel: %2d\n",
3583 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3585 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3586 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3590 __kmp_printf(
"Ubers array is not allocated.\n");
3593 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3595 while (list->next != NULL) {
3596 kmp_team_p
const *team = list->entry;
3598 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3599 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3600 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3601 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3602 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3603 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3604 for (i = 0; i < team->t.t_nproc; ++i) {
3605 __kmp_printf(
" Thread %2d: ", i);
3606 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3608 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3614 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3616 __kmp_print_structure_thread(
"Thread pool: ",
3617 CCAST(kmp_info_t *, __kmp_thread_pool));
3618 __kmp_print_structure_team(
"Team pool: ",
3619 CCAST(kmp_team_t *, __kmp_team_pool));
3623 while (list != NULL) {
3624 kmp_team_list_item_t *item = list;
3626 KMP_INTERNAL_FREE(item);
3635static const unsigned __kmp_primes[] = {
3636 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3637 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3638 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3639 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3640 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3641 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3642 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3643 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3644 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3645 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3646 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3650unsigned short __kmp_get_random(kmp_info_t *thread) {
3651 unsigned x = thread->th.th_x;
3652 unsigned short r = (
unsigned short)(x >> 16);
3654 thread->th.th_x = x * thread->th.th_a + 1;
3656 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3657 thread->th.th_info.ds.ds_tid, r));
3663void __kmp_init_random(kmp_info_t *thread) {
3664 unsigned seed = thread->th.th_info.ds.ds_tid;
3667 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3668 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3670 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3676static int __kmp_reclaim_dead_roots(
void) {
3679 for (i = 0; i < __kmp_threads_capacity; ++i) {
3680 if (KMP_UBER_GTID(i) &&
3681 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3684 r += __kmp_unregister_root_other_thread(i);
3709static int __kmp_expand_threads(
int nNeed) {
3711 int minimumRequiredCapacity;
3713 kmp_info_t **newThreads;
3714 kmp_root_t **newRoot;
3720#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3723 added = __kmp_reclaim_dead_roots();
3752 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3755 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3759 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3761 newCapacity = __kmp_threads_capacity;
3763 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3764 : __kmp_sys_max_nth;
3765 }
while (newCapacity < minimumRequiredCapacity);
3766 newThreads = (kmp_info_t **)__kmp_allocate(
3767 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3769 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3770 KMP_MEMCPY(newThreads, __kmp_threads,
3771 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3772 KMP_MEMCPY(newRoot, __kmp_root,
3773 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3776 kmp_old_threads_list_t *node =
3777 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3778 node->threads = __kmp_threads;
3779 node->next = __kmp_old_threads_list;
3780 __kmp_old_threads_list = node;
3782 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3783 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3784 added += newCapacity - __kmp_threads_capacity;
3785 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3787 if (newCapacity > __kmp_tp_capacity) {
3788 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3789 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3790 __kmp_threadprivate_resize_cache(newCapacity);
3792 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3794 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3803int __kmp_register_root(
int initial_thread) {
3804 kmp_info_t *root_thread;
3808 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3809 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3826 capacity = __kmp_threads_capacity;
3827 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3834 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3835 capacity -= __kmp_hidden_helper_threads_num;
3839 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3840 if (__kmp_tp_cached) {
3841 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3842 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3843 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3845 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3855 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3858 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3859 gtid <= __kmp_hidden_helper_threads_num;
3862 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3863 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3864 "hidden helper thread: T#%d\n",
3870 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3873 for (gtid = __kmp_hidden_helper_threads_num + 1;
3874 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3878 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3879 KMP_ASSERT(gtid < __kmp_threads_capacity);
3884 TCW_4(__kmp_nth, __kmp_nth + 1);
3888 if (__kmp_adjust_gtid_mode) {
3889 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3890 if (TCR_4(__kmp_gtid_mode) != 2) {
3891 TCW_4(__kmp_gtid_mode, 2);
3894 if (TCR_4(__kmp_gtid_mode) != 1) {
3895 TCW_4(__kmp_gtid_mode, 1);
3900#ifdef KMP_ADJUST_BLOCKTIME
3903 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3904 if (__kmp_nth > __kmp_avail_proc) {
3905 __kmp_zero_bt = TRUE;
3911 if (!(root = __kmp_root[gtid])) {
3912 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3913 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3916#if KMP_STATS_ENABLED
3918 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3919 __kmp_stats_thread_ptr->startLife();
3920 KMP_SET_THREAD_STATE(SERIAL_REGION);
3923 __kmp_initialize_root(root);
3926 if (root->r.r_uber_thread) {
3927 root_thread = root->r.r_uber_thread;
3929 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3930 if (__kmp_storage_map) {
3931 __kmp_print_thread_storage_map(root_thread, gtid);
3933 root_thread->th.th_info.ds.ds_gtid = gtid;
3935 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3937 root_thread->th.th_root = root;
3938 if (__kmp_env_consistency_check) {
3939 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3942 __kmp_initialize_fast_memory(root_thread);
3946 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3947 __kmp_initialize_bget(root_thread);
3949 __kmp_init_random(root_thread);
3953 if (!root_thread->th.th_serial_team) {
3954 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3955 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3956 root_thread->th.th_serial_team = __kmp_allocate_team(
3961 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3963 KMP_ASSERT(root_thread->th.th_serial_team);
3964 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3965 root_thread->th.th_serial_team));
3968 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3970 root->r.r_root_team->t.t_threads[0] = root_thread;
3971 root->r.r_hot_team->t.t_threads[0] = root_thread;
3972 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3974 root_thread->th.th_serial_team->t.t_serialized = 0;
3975 root->r.r_uber_thread = root_thread;
3978 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3979 TCW_4(__kmp_init_gtid, TRUE);
3982 __kmp_gtid_set_specific(gtid);
3985 __kmp_itt_thread_name(gtid);
3988#ifdef KMP_TDATA_GTID
3991 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3992 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3994 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3996 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3997 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3998 KMP_INIT_BARRIER_STATE));
4001 for (b = 0; b < bs_last_barrier; ++b) {
4002 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
4004 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
4008 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
4009 KMP_INIT_BARRIER_STATE);
4011#if KMP_AFFINITY_SUPPORTED
4012 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4013 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4014 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4015 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4017 root_thread->th.th_def_allocator = __kmp_def_allocator;
4018 root_thread->th.th_prev_level = 0;
4019 root_thread->th.th_prev_num_threads = 1;
4021 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
4022 tmp->cg_root = root_thread;
4023 tmp->cg_thread_limit = __kmp_cg_max_nth;
4024 tmp->cg_nthreads = 1;
4025 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
4026 " cg_nthreads init to 1\n",
4029 root_thread->th.th_cg_roots = tmp;
4031 __kmp_root_counter++;
4034 if (ompt_enabled.enabled) {
4036 kmp_info_t *root_thread = ompt_get_thread();
4038 ompt_set_thread_state(root_thread, ompt_state_overhead);
4040 if (ompt_enabled.ompt_callback_thread_begin) {
4041 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4042 ompt_thread_initial, __ompt_get_thread_data_internal());
4044 ompt_data_t *task_data;
4045 ompt_data_t *parallel_data;
4046 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4048 if (ompt_enabled.ompt_callback_implicit_task) {
4049 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4050 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4053 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4057 if (ompd_state & OMPD_ENABLE_BP)
4058 ompd_bp_thread_begin();
4062 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4067#if KMP_NESTED_HOT_TEAMS
4068static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4069 const int max_level) {
4071 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4072 if (!hot_teams || !hot_teams[level].hot_team) {
4075 KMP_DEBUG_ASSERT(level < max_level);
4076 kmp_team_t *team = hot_teams[level].hot_team;
4077 nth = hot_teams[level].hot_team_nth;
4079 if (level < max_level - 1) {
4080 for (i = 0; i < nth; ++i) {
4081 kmp_info_t *th = team->t.t_threads[i];
4082 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4083 if (i > 0 && th->th.th_hot_teams) {
4084 __kmp_free(th->th.th_hot_teams);
4085 th->th.th_hot_teams = NULL;
4089 __kmp_free_team(root, team, NULL);
4096static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4097 kmp_team_t *root_team = root->r.r_root_team;
4098 kmp_team_t *hot_team = root->r.r_hot_team;
4099 int n = hot_team->t.t_nproc;
4102 KMP_DEBUG_ASSERT(!root->r.r_active);
4104 root->r.r_root_team = NULL;
4105 root->r.r_hot_team = NULL;
4108 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4109#if KMP_NESTED_HOT_TEAMS
4110 if (__kmp_hot_teams_max_level >
4112 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4113 kmp_info_t *th = hot_team->t.t_threads[i];
4114 if (__kmp_hot_teams_max_level > 1) {
4115 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4117 if (th->th.th_hot_teams) {
4118 __kmp_free(th->th.th_hot_teams);
4119 th->th.th_hot_teams = NULL;
4124 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4129 if (__kmp_tasking_mode != tskm_immediate_exec) {
4130 __kmp_wait_to_unref_task_teams();
4136 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4138 (LPVOID) & (root->r.r_uber_thread->th),
4139 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4140 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4144 if (ompd_state & OMPD_ENABLE_BP)
4145 ompd_bp_thread_end();
4149 ompt_data_t *task_data;
4150 ompt_data_t *parallel_data;
4151 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4153 if (ompt_enabled.ompt_callback_implicit_task) {
4154 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4155 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4157 if (ompt_enabled.ompt_callback_thread_end) {
4158 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4159 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4165 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4166 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4168 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4169 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4172 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4173 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4174 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4175 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4176 root->r.r_uber_thread->th.th_cg_roots = NULL;
4178 __kmp_reap_thread(root->r.r_uber_thread, 1);
4182 root->r.r_uber_thread = NULL;
4184 root->r.r_begin = FALSE;
4189void __kmp_unregister_root_current_thread(
int gtid) {
4190 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4194 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4195 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4196 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4199 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4202 kmp_root_t *root = __kmp_root[gtid];
4204 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4205 KMP_ASSERT(KMP_UBER_GTID(gtid));
4206 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4207 KMP_ASSERT(root->r.r_active == FALSE);
4211 kmp_info_t *thread = __kmp_threads[gtid];
4212 kmp_team_t *team = thread->th.th_team;
4213 kmp_task_team_t *task_team = thread->th.th_task_team;
4216 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4217 task_team->tt.tt_hidden_helper_task_encountered)) {
4220 thread->th.ompt_thread_info.state = ompt_state_undefined;
4222 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4225 __kmp_reset_root(gtid, root);
4229 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4231 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4238static int __kmp_unregister_root_other_thread(
int gtid) {
4239 kmp_root_t *root = __kmp_root[gtid];
4242 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4243 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4244 KMP_ASSERT(KMP_UBER_GTID(gtid));
4245 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4246 KMP_ASSERT(root->r.r_active == FALSE);
4248 r = __kmp_reset_root(gtid, root);
4250 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4256void __kmp_task_info() {
4258 kmp_int32 gtid = __kmp_entry_gtid();
4259 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4260 kmp_info_t *this_thr = __kmp_threads[gtid];
4261 kmp_team_t *steam = this_thr->th.th_serial_team;
4262 kmp_team_t *team = this_thr->th.th_team;
4265 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4267 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4268 team->t.t_implicit_task_taskdata[tid].td_parent);
4275static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4276 int tid,
int gtid) {
4280 KMP_DEBUG_ASSERT(this_thr != NULL);
4281 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4282 KMP_DEBUG_ASSERT(team);
4283 KMP_DEBUG_ASSERT(team->t.t_threads);
4284 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4285 kmp_info_t *master = team->t.t_threads[0];
4286 KMP_DEBUG_ASSERT(master);
4287 KMP_DEBUG_ASSERT(master->th.th_root);
4291 TCW_SYNC_PTR(this_thr->th.th_team, team);
4293 this_thr->th.th_info.ds.ds_tid = tid;
4294 this_thr->th.th_set_nproc = 0;
4295 if (__kmp_tasking_mode != tskm_immediate_exec)
4298 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4300 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4301 this_thr->th.th_set_proc_bind = proc_bind_default;
4303#if KMP_AFFINITY_SUPPORTED
4304 this_thr->th.th_new_place = this_thr->th.th_current_place;
4306 this_thr->th.th_root = master->th.th_root;
4309 this_thr->th.th_team_nproc = team->t.t_nproc;
4310 this_thr->th.th_team_master = master;
4311 this_thr->th.th_team_serialized = team->t.t_serialized;
4313 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4315 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4316 tid, gtid, this_thr, this_thr->th.th_current_task));
4318 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4321 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4322 tid, gtid, this_thr, this_thr->th.th_current_task));
4327 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4329 this_thr->th.th_local.this_construct = 0;
4331 if (!this_thr->th.th_pri_common) {
4332 this_thr->th.th_pri_common =
4333 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4334 if (__kmp_storage_map) {
4335 __kmp_print_storage_map_gtid(
4336 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4337 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4339 this_thr->th.th_pri_head = NULL;
4342 if (this_thr != master &&
4343 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4345 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4346 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4349 int i = tmp->cg_nthreads--;
4350 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4351 " on node %p of thread %p to %d\n",
4352 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4357 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4359 this_thr->th.th_cg_roots->cg_nthreads++;
4360 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4361 " node %p of thread %p to %d\n",
4362 this_thr, this_thr->th.th_cg_roots,
4363 this_thr->th.th_cg_roots->cg_root,
4364 this_thr->th.th_cg_roots->cg_nthreads));
4365 this_thr->th.th_current_task->td_icvs.thread_limit =
4366 this_thr->th.th_cg_roots->cg_thread_limit;
4371 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4374 sizeof(dispatch_private_info_t) *
4375 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4376 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4377 team->t.t_max_nproc));
4378 KMP_ASSERT(dispatch);
4379 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4380 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4382 dispatch->th_disp_index = 0;
4383 dispatch->th_doacross_buf_idx = 0;
4384 if (!dispatch->th_disp_buffer) {
4385 dispatch->th_disp_buffer =
4386 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4388 if (__kmp_storage_map) {
4389 __kmp_print_storage_map_gtid(
4390 gtid, &dispatch->th_disp_buffer[0],
4391 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4393 : __kmp_dispatch_num_buffers],
4395 "th_%d.th_dispatch.th_disp_buffer "
4396 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4397 gtid, team->t.t_id, gtid);
4400 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4403 dispatch->th_dispatch_pr_current = 0;
4404 dispatch->th_dispatch_sh_current = 0;
4406 dispatch->th_deo_fcn = 0;
4407 dispatch->th_dxo_fcn = 0;
4410 this_thr->th.th_next_pool = NULL;
4412 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4413 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4423kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4425 kmp_team_t *serial_team;
4426 kmp_info_t *new_thr;
4429 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4430 KMP_DEBUG_ASSERT(root && team);
4431#if !KMP_NESTED_HOT_TEAMS
4432 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4439 if (__kmp_thread_pool && !KMP_HIDDEN_HELPER_TEAM(team)) {
4440 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4441 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4442 if (new_thr == __kmp_thread_pool_insert_pt) {
4443 __kmp_thread_pool_insert_pt = NULL;
4445 TCW_4(new_thr->th.th_in_pool, FALSE);
4446 __kmp_suspend_initialize_thread(new_thr);
4447 __kmp_lock_suspend_mx(new_thr);
4448 if (new_thr->th.th_active_in_pool == TRUE) {
4449 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4450 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4451 new_thr->th.th_active_in_pool = FALSE;
4453 __kmp_unlock_suspend_mx(new_thr);
4455 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4456 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4457 KMP_ASSERT(!new_thr->th.th_team);
4458 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4461 __kmp_initialize_info(new_thr, team, new_tid,
4462 new_thr->th.th_info.ds.ds_gtid);
4463 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4465 TCW_4(__kmp_nth, __kmp_nth + 1);
4467 new_thr->th.th_task_state = 0;
4469 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4471 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4475#ifdef KMP_ADJUST_BLOCKTIME
4478 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4479 if (__kmp_nth > __kmp_avail_proc) {
4480 __kmp_zero_bt = TRUE;
4489 kmp_balign_t *balign = new_thr->th.th_bar;
4490 for (b = 0; b < bs_last_barrier; ++b)
4491 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4494 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4495 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4502 KMP_ASSERT(KMP_HIDDEN_HELPER_TEAM(team) || __kmp_nth == __kmp_all_nth);
4503 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4508 if (!TCR_4(__kmp_init_monitor)) {
4509 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4510 if (!TCR_4(__kmp_init_monitor)) {
4511 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4512 TCW_4(__kmp_init_monitor, 1);
4513 __kmp_create_monitor(&__kmp_monitor);
4514 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4525 while (TCR_4(__kmp_init_monitor) < 2) {
4528 KF_TRACE(10, (
"after monitor thread has started\n"));
4531 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4538 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4540 : __kmp_hidden_helper_threads_num + 1;
4542 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4544 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4547 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4548 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4553 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4555 new_thr->th.th_nt_strict =
false;
4556 new_thr->th.th_nt_loc = NULL;
4557 new_thr->th.th_nt_sev = severity_fatal;
4558 new_thr->th.th_nt_msg = NULL;
4560 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4562#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4565 __itt_suppress_mark_range(
4566 __itt_suppress_range, __itt_suppress_threading_errors,
4567 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4568 __itt_suppress_mark_range(
4569 __itt_suppress_range, __itt_suppress_threading_errors,
4570 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4572 __itt_suppress_mark_range(
4573 __itt_suppress_range, __itt_suppress_threading_errors,
4574 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4576 __itt_suppress_mark_range(__itt_suppress_range,
4577 __itt_suppress_threading_errors,
4578 &new_thr->th.th_suspend_init_count,
4579 sizeof(new_thr->th.th_suspend_init_count));
4582 __itt_suppress_mark_range(__itt_suppress_range,
4583 __itt_suppress_threading_errors,
4584 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4585 sizeof(new_thr->th.th_bar[0].bb.b_go));
4586 __itt_suppress_mark_range(__itt_suppress_range,
4587 __itt_suppress_threading_errors,
4588 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4589 sizeof(new_thr->th.th_bar[1].bb.b_go));
4590 __itt_suppress_mark_range(__itt_suppress_range,
4591 __itt_suppress_threading_errors,
4592 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4593 sizeof(new_thr->th.th_bar[2].bb.b_go));
4595 if (__kmp_storage_map) {
4596 __kmp_print_thread_storage_map(new_thr, new_gtid);
4601 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4602 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4603 new_thr->th.th_serial_team = serial_team =
4604 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4608 proc_bind_default, &r_icvs,
4609 0 USE_NESTED_HOT_ARG(NULL));
4611 KMP_ASSERT(serial_team);
4612 serial_team->t.t_serialized = 0;
4614 serial_team->t.t_threads[0] = new_thr;
4616 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4620 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4623 __kmp_initialize_fast_memory(new_thr);
4627 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4628 __kmp_initialize_bget(new_thr);
4631 __kmp_init_random(new_thr);
4635 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4636 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4639 kmp_balign_t *balign = new_thr->th.th_bar;
4640 for (b = 0; b < bs_last_barrier; ++b) {
4641 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4642 balign[b].bb.team = NULL;
4643 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4644 balign[b].bb.use_oncore_barrier = 0;
4647 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4648 new_thr->th.th_sleep_loc_type = flag_unset;
4650 new_thr->th.th_spin_here = FALSE;
4651 new_thr->th.th_next_waiting = 0;
4653 new_thr->th.th_blocking =
false;
4656#if KMP_AFFINITY_SUPPORTED
4657 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4658 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4659 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4660 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4662 new_thr->th.th_def_allocator = __kmp_def_allocator;
4663 new_thr->th.th_prev_level = 0;
4664 new_thr->th.th_prev_num_threads = 1;
4666 TCW_4(new_thr->th.th_in_pool, FALSE);
4667 new_thr->th.th_active_in_pool = FALSE;
4668 TCW_4(new_thr->th.th_active, TRUE);
4670 new_thr->th.th_set_nested_nth = NULL;
4671 new_thr->th.th_set_nested_nth_sz = 0;
4679 if (__kmp_adjust_gtid_mode) {
4680 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4681 if (TCR_4(__kmp_gtid_mode) != 2) {
4682 TCW_4(__kmp_gtid_mode, 2);
4685 if (TCR_4(__kmp_gtid_mode) != 1) {
4686 TCW_4(__kmp_gtid_mode, 1);
4691#ifdef KMP_ADJUST_BLOCKTIME
4694 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4695 if (__kmp_nth > __kmp_avail_proc) {
4696 __kmp_zero_bt = TRUE;
4701#if KMP_AFFINITY_SUPPORTED
4703 __kmp_affinity_set_init_mask(new_gtid, FALSE);
4708 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4709 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4711 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4713 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4724static void __kmp_reinitialize_team(kmp_team_t *team,
4725 kmp_internal_control_t *new_icvs,
4727 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4728 team->t.t_threads[0], team));
4729 KMP_DEBUG_ASSERT(team && new_icvs);
4730 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4731 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4733 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4735 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4736 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4738 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4739 team->t.t_threads[0], team));
4745static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4746 kmp_internal_control_t *new_icvs,
4748 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4751 KMP_DEBUG_ASSERT(team);
4752 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4753 KMP_DEBUG_ASSERT(team->t.t_threads);
4756 team->t.t_master_tid = 0;
4758 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4759 team->t.t_nproc = new_nproc;
4762 team->t.t_next_pool = NULL;
4766 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4767 team->t.t_invoke = NULL;
4770 team->t.t_sched.sched = new_icvs->sched.sched;
4772#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4773 team->t.t_fp_control_saved = FALSE;
4774 team->t.t_x87_fpu_control_word = 0;
4775 team->t.t_mxcsr = 0;
4778 team->t.t_construct = 0;
4780 team->t.t_ordered.dt.t_value = 0;
4781 team->t.t_master_active = FALSE;
4784 team->t.t_copypriv_data = NULL;
4787 team->t.t_copyin_counter = 0;
4790 team->t.t_control_stack_top = NULL;
4792 __kmp_reinitialize_team(team, new_icvs, loc);
4795 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4798#if KMP_AFFINITY_SUPPORTED
4799static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4800 int first,
int last,
int newp) {
4801 th->th.th_first_place = first;
4802 th->th.th_last_place = last;
4803 th->th.th_new_place = newp;
4804 if (newp != th->th.th_current_place) {
4805 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4806 team->t.t_display_affinity = 1;
4808 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4809 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4817static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4819 if (KMP_HIDDEN_HELPER_TEAM(team))
4822 kmp_info_t *master_th = team->t.t_threads[0];
4823 KMP_DEBUG_ASSERT(master_th != NULL);
4824 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4825 int first_place = master_th->th.th_first_place;
4826 int last_place = master_th->th.th_last_place;
4827 int masters_place = master_th->th.th_current_place;
4828 int num_masks = __kmp_affinity.num_masks;
4829 team->t.t_first_place = first_place;
4830 team->t.t_last_place = last_place;
4832 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4833 "bound to place %d partition = [%d,%d]\n",
4834 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4835 team->t.t_id, masters_place, first_place, last_place));
4837 switch (proc_bind) {
4839 case proc_bind_default:
4842 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4845 case proc_bind_primary: {
4847 int n_th = team->t.t_nproc;
4848 for (f = 1; f < n_th; f++) {
4849 kmp_info_t *th = team->t.t_threads[f];
4850 KMP_DEBUG_ASSERT(th != NULL);
4851 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4853 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4854 "partition = [%d,%d]\n",
4855 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4856 f, masters_place, first_place, last_place));
4860 case proc_bind_close: {
4862 int n_th = team->t.t_nproc;
4864 if (first_place <= last_place) {
4865 n_places = last_place - first_place + 1;
4867 n_places = num_masks - first_place + last_place + 1;
4869 if (n_th <= n_places) {
4870 int place = masters_place;
4871 for (f = 1; f < n_th; f++) {
4872 kmp_info_t *th = team->t.t_threads[f];
4873 KMP_DEBUG_ASSERT(th != NULL);
4875 if (place == last_place) {
4876 place = first_place;
4877 }
else if (place == (num_masks - 1)) {
4882 __kmp_set_thread_place(team, th, first_place, last_place, place);
4884 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4885 "partition = [%d,%d]\n",
4886 __kmp_gtid_from_thread(team->t.t_threads[f]),
4887 team->t.t_id, f, place, first_place, last_place));
4890 int S, rem, gap, s_count;
4891 S = n_th / n_places;
4893 rem = n_th - (S * n_places);
4894 gap = rem > 0 ? n_places / rem : n_places;
4895 int place = masters_place;
4897 for (f = 0; f < n_th; f++) {
4898 kmp_info_t *th = team->t.t_threads[f];
4899 KMP_DEBUG_ASSERT(th != NULL);
4901 __kmp_set_thread_place(team, th, first_place, last_place, place);
4904 if ((s_count == S) && rem && (gap_ct == gap)) {
4906 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4908 if (place == last_place) {
4909 place = first_place;
4910 }
else if (place == (num_masks - 1)) {
4918 }
else if (s_count == S) {
4919 if (place == last_place) {
4920 place = first_place;
4921 }
else if (place == (num_masks - 1)) {
4931 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4932 "partition = [%d,%d]\n",
4933 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4934 th->th.th_new_place, first_place, last_place));
4936 KMP_DEBUG_ASSERT(place == masters_place);
4940 case proc_bind_spread: {
4942 int n_th = team->t.t_nproc;
4945 if (first_place <= last_place) {
4946 n_places = last_place - first_place + 1;
4948 n_places = num_masks - first_place + last_place + 1;
4950 if (n_th <= n_places) {
4953 if (n_places != num_masks) {
4954 int S = n_places / n_th;
4955 int s_count, rem, gap, gap_ct;
4957 place = masters_place;
4958 rem = n_places - n_th * S;
4959 gap = rem ? n_th / rem : 1;
4962 if (update_master_only == 1)
4964 for (f = 0; f < thidx; f++) {
4965 kmp_info_t *th = team->t.t_threads[f];
4966 KMP_DEBUG_ASSERT(th != NULL);
4968 int fplace = place, nplace = place;
4970 while (s_count < S) {
4971 if (place == last_place) {
4972 place = first_place;
4973 }
else if (place == (num_masks - 1)) {
4980 if (rem && (gap_ct == gap)) {
4981 if (place == last_place) {
4982 place = first_place;
4983 }
else if (place == (num_masks - 1)) {
4991 __kmp_set_thread_place(team, th, fplace, place, nplace);
4994 if (place == last_place) {
4995 place = first_place;
4996 }
else if (place == (num_masks - 1)) {
5003 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5004 "partition = [%d,%d], num_masks: %u\n",
5005 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
5006 f, th->th.th_new_place, th->th.th_first_place,
5007 th->th.th_last_place, num_masks));
5013 double current =
static_cast<double>(masters_place);
5015 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5020 if (update_master_only == 1)
5022 for (f = 0; f < thidx; f++) {
5023 first =
static_cast<int>(current);
5024 last =
static_cast<int>(current + spacing) - 1;
5025 KMP_DEBUG_ASSERT(last >= first);
5026 if (first >= n_places) {
5027 if (masters_place) {
5030 if (first == (masters_place + 1)) {
5031 KMP_DEBUG_ASSERT(f == n_th);
5034 if (last == masters_place) {
5035 KMP_DEBUG_ASSERT(f == (n_th - 1));
5039 KMP_DEBUG_ASSERT(f == n_th);
5044 if (last >= n_places) {
5045 last = (n_places - 1);
5050 KMP_DEBUG_ASSERT(0 <= first);
5051 KMP_DEBUG_ASSERT(n_places > first);
5052 KMP_DEBUG_ASSERT(0 <= last);
5053 KMP_DEBUG_ASSERT(n_places > last);
5054 KMP_DEBUG_ASSERT(last_place >= first_place);
5055 th = team->t.t_threads[f];
5056 KMP_DEBUG_ASSERT(th);
5057 __kmp_set_thread_place(team, th, first, last, place);
5059 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5060 "partition = [%d,%d], spacing = %.4f\n",
5061 __kmp_gtid_from_thread(team->t.t_threads[f]),
5062 team->t.t_id, f, th->th.th_new_place,
5063 th->th.th_first_place, th->th.th_last_place, spacing));
5067 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5069 int S, rem, gap, s_count;
5070 S = n_th / n_places;
5072 rem = n_th - (S * n_places);
5073 gap = rem > 0 ? n_places / rem : n_places;
5074 int place = masters_place;
5077 if (update_master_only == 1)
5079 for (f = 0; f < thidx; f++) {
5080 kmp_info_t *th = team->t.t_threads[f];
5081 KMP_DEBUG_ASSERT(th != NULL);
5083 __kmp_set_thread_place(team, th, place, place, place);
5086 if ((s_count == S) && rem && (gap_ct == gap)) {
5088 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5090 if (place == last_place) {
5091 place = first_place;
5092 }
else if (place == (num_masks - 1)) {
5100 }
else if (s_count == S) {
5101 if (place == last_place) {
5102 place = first_place;
5103 }
else if (place == (num_masks - 1)) {
5112 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5113 "partition = [%d,%d]\n",
5114 __kmp_gtid_from_thread(team->t.t_threads[f]),
5115 team->t.t_id, f, th->th.th_new_place,
5116 th->th.th_first_place, th->th.th_last_place));
5118 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5126 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5134__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5136 ompt_data_t ompt_parallel_data,
5138 kmp_proc_bind_t new_proc_bind,
5139 kmp_internal_control_t *new_icvs,
5140 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5141 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5144 int use_hot_team = !root->r.r_active;
5146 int do_place_partition = 1;
5148 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5149 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5150 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5153#if KMP_NESTED_HOT_TEAMS
5154 kmp_hot_team_ptr_t *hot_teams;
5156 team = master->th.th_team;
5157 level = team->t.t_active_level;
5158 if (master->th.th_teams_microtask) {
5159 if (master->th.th_teams_size.nteams > 1 &&
5162 (microtask_t)__kmp_teams_master ||
5163 master->th.th_teams_level <
5170 if ((master->th.th_teams_size.nteams == 1 &&
5171 master->th.th_teams_level >= team->t.t_level) ||
5172 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5173 do_place_partition = 0;
5175 hot_teams = master->th.th_hot_teams;
5176 if (level < __kmp_hot_teams_max_level && hot_teams &&
5177 hot_teams[level].hot_team) {
5185 KMP_DEBUG_ASSERT(new_nproc == 1);
5189 if (use_hot_team && new_nproc > 1) {
5190 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5191#if KMP_NESTED_HOT_TEAMS
5192 team = hot_teams[level].hot_team;
5194 team = root->r.r_hot_team;
5197 if (__kmp_tasking_mode != tskm_immediate_exec) {
5198 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5199 "task_team[1] = %p before reinit\n",
5200 team->t.t_task_team[0], team->t.t_task_team[1]));
5204 if (team->t.t_nproc != new_nproc &&
5205 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5207 int old_nthr = team->t.t_nproc;
5208 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5213 if (do_place_partition == 0)
5214 team->t.t_proc_bind = proc_bind_default;
5218 if (team->t.t_nproc == new_nproc) {
5219 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5222 if (team->t.t_size_changed == -1) {
5223 team->t.t_size_changed = 1;
5225 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5229 kmp_r_sched_t new_sched = new_icvs->sched;
5231 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5233 __kmp_reinitialize_team(team, new_icvs,
5234 root->r.r_uber_thread->th.th_ident);
5236 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5237 team->t.t_threads[0], team));
5238 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5240#if KMP_AFFINITY_SUPPORTED
5241 if ((team->t.t_size_changed == 0) &&
5242 (team->t.t_proc_bind == new_proc_bind)) {
5243 if (new_proc_bind == proc_bind_spread) {
5244 if (do_place_partition) {
5246 __kmp_partition_places(team, 1);
5249 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5250 "proc_bind = %d, partition = [%d,%d]\n",
5251 team->t.t_id, new_proc_bind, team->t.t_first_place,
5252 team->t.t_last_place));
5254 if (do_place_partition) {
5255 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5256 __kmp_partition_places(team);
5260 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5262 }
else if (team->t.t_nproc > new_nproc) {
5264 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5267 team->t.t_size_changed = 1;
5268 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5271 __kmp_add_threads_to_team(team, new_nproc);
5275 if (__kmp_tasking_mode != tskm_immediate_exec) {
5276 for (f = new_nproc; f < team->t.t_nproc; f++) {
5277 kmp_info_t *th = team->t.t_threads[f];
5278 KMP_DEBUG_ASSERT(th);
5279 th->th.th_task_team = NULL;
5282#if KMP_NESTED_HOT_TEAMS
5283 if (__kmp_hot_teams_mode == 0) {
5286 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5287 hot_teams[level].hot_team_nth = new_nproc;
5290 for (f = new_nproc; f < team->t.t_nproc; f++) {
5291 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5292 __kmp_free_thread(team->t.t_threads[f]);
5293 team->t.t_threads[f] = NULL;
5295#if KMP_NESTED_HOT_TEAMS
5300 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5301 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5302 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5303 for (
int b = 0; b < bs_last_barrier; ++b) {
5304 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5305 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5307 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5312 team->t.t_nproc = new_nproc;
5314 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5315 __kmp_reinitialize_team(team, new_icvs,
5316 root->r.r_uber_thread->th.th_ident);
5319 for (f = 0; f < new_nproc; ++f) {
5320 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5325 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5326 team->t.t_threads[0], team));
5328 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5331 for (f = 0; f < team->t.t_nproc; f++) {
5332 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5333 team->t.t_threads[f]->th.th_team_nproc ==
5338 if (do_place_partition) {
5339 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5340#if KMP_AFFINITY_SUPPORTED
5341 __kmp_partition_places(team);
5347 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5349 int old_nproc = team->t.t_nproc;
5350 team->t.t_size_changed = 1;
5352#if KMP_NESTED_HOT_TEAMS
5353 int avail_threads = hot_teams[level].hot_team_nth;
5354 if (new_nproc < avail_threads)
5355 avail_threads = new_nproc;
5356 kmp_info_t **other_threads = team->t.t_threads;
5357 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5361 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5362 for (b = 0; b < bs_last_barrier; ++b) {
5363 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5364 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5366 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5370 if (hot_teams[level].hot_team_nth >= new_nproc) {
5373 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5374 team->t.t_nproc = new_nproc;
5378 team->t.t_nproc = hot_teams[level].hot_team_nth;
5379 hot_teams[level].hot_team_nth = new_nproc;
5381 if (team->t.t_max_nproc < new_nproc) {
5383 __kmp_reallocate_team_arrays(team, new_nproc);
5384 __kmp_reinitialize_team(team, new_icvs, NULL);
5387#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5388 KMP_AFFINITY_SUPPORTED
5394 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5398 for (f = team->t.t_nproc; f < new_nproc; f++) {
5399 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5400 KMP_DEBUG_ASSERT(new_worker);
5401 team->t.t_threads[f] = new_worker;
5404 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5405 "join=%llu, plain=%llu\n",
5406 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5407 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5408 team->t.t_bar[bs_plain_barrier].b_arrived));
5412 kmp_balign_t *balign = new_worker->th.th_bar;
5413 for (b = 0; b < bs_last_barrier; ++b) {
5414 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5415 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5416 KMP_BARRIER_PARENT_FLAG);
5418 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5424#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5425 KMP_AFFINITY_SUPPORTED
5427 new_temp_affinity.restore();
5429#if KMP_NESTED_HOT_TEAMS
5432 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5435 __kmp_add_threads_to_team(team, new_nproc);
5439 __kmp_initialize_team(team, new_nproc, new_icvs,
5440 root->r.r_uber_thread->th.th_ident);
5443 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5444 for (f = 0; f < team->t.t_nproc; ++f)
5445 __kmp_initialize_info(team->t.t_threads[f], team, f,
5446 __kmp_gtid_from_tid(f, team));
5449 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5450 for (f = old_nproc; f < team->t.t_nproc; ++f)
5451 team->t.t_threads[f]->th.th_task_state = old_state;
5454 for (f = 0; f < team->t.t_nproc; ++f) {
5455 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5456 team->t.t_threads[f]->th.th_team_nproc ==
5461 if (do_place_partition) {
5462 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5463#if KMP_AFFINITY_SUPPORTED
5464 __kmp_partition_places(team);
5469 if (master->th.th_teams_microtask) {
5470 for (f = 1; f < new_nproc; ++f) {
5472 kmp_info_t *thr = team->t.t_threads[f];
5473 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5474 thr->th.th_teams_level = master->th.th_teams_level;
5475 thr->th.th_teams_size = master->th.th_teams_size;
5478#if KMP_NESTED_HOT_TEAMS
5482 for (f = 1; f < new_nproc; ++f) {
5483 kmp_info_t *thr = team->t.t_threads[f];
5485 kmp_balign_t *balign = thr->th.th_bar;
5486 for (b = 0; b < bs_last_barrier; ++b) {
5487 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5488 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5490 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5498 __kmp_alloc_argv_entries(argc, team, TRUE);
5499 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5503 KF_TRACE(10, (
" hot_team = %p\n", team));
5506 if (__kmp_tasking_mode != tskm_immediate_exec) {
5507 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5508 "task_team[1] = %p after reinit\n",
5509 team->t.t_task_team[0], team->t.t_task_team[1]));
5514 __ompt_team_assign_id(team, ompt_parallel_data);
5524 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5527 if (team->t.t_max_nproc >= max_nproc) {
5529 __kmp_team_pool = team->t.t_next_pool;
5531 if (max_nproc > 1 &&
5532 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5534 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5539 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5541 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5542 "task_team[1] %p to NULL\n",
5543 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5544 team->t.t_task_team[0] = NULL;
5545 team->t.t_task_team[1] = NULL;
5548 __kmp_alloc_argv_entries(argc, team, TRUE);
5549 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5552 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5553 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5556 for (b = 0; b < bs_last_barrier; ++b) {
5557 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5559 team->t.t_bar[b].b_master_arrived = 0;
5560 team->t.t_bar[b].b_team_arrived = 0;
5565 team->t.t_proc_bind = new_proc_bind;
5567 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5571 __ompt_team_assign_id(team, ompt_parallel_data);
5574 team->t.t_nested_nth = NULL;
5585 team = __kmp_reap_team(team);
5586 __kmp_team_pool = team;
5591 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5594 team->t.t_max_nproc = max_nproc;
5595 if (max_nproc > 1 &&
5596 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5598 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5603 __kmp_allocate_team_arrays(team, max_nproc);
5605 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5606 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5608 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5610 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5611 team->t.t_task_team[0] = NULL;
5613 team->t.t_task_team[1] = NULL;
5616 if (__kmp_storage_map) {
5617 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5621 __kmp_alloc_argv_entries(argc, team, FALSE);
5622 team->t.t_argc = argc;
5625 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5626 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5629 for (b = 0; b < bs_last_barrier; ++b) {
5630 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5632 team->t.t_bar[b].b_master_arrived = 0;
5633 team->t.t_bar[b].b_team_arrived = 0;
5638 team->t.t_proc_bind = new_proc_bind;
5641 __ompt_team_assign_id(team, ompt_parallel_data);
5642 team->t.ompt_serialized_team_info = NULL;
5647 team->t.t_nested_nth = NULL;
5649 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5660void __kmp_free_team(kmp_root_t *root,
5661 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5663 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5667 KMP_DEBUG_ASSERT(root);
5668 KMP_DEBUG_ASSERT(team);
5669 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5670 KMP_DEBUG_ASSERT(team->t.t_threads);
5672 int use_hot_team = team == root->r.r_hot_team;
5673#if KMP_NESTED_HOT_TEAMS
5676 level = team->t.t_active_level - 1;
5677 if (master->th.th_teams_microtask) {
5678 if (master->th.th_teams_size.nteams > 1) {
5682 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5683 master->th.th_teams_level == team->t.t_level) {
5689 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5691 if (level < __kmp_hot_teams_max_level) {
5692 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5699 TCW_SYNC_PTR(team->t.t_pkfn,
5702 team->t.t_copyin_counter = 0;
5707 if (!use_hot_team) {
5708 if (__kmp_tasking_mode != tskm_immediate_exec) {
5710 for (f = 1; f < team->t.t_nproc; ++f) {
5711 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5712 kmp_info_t *th = team->t.t_threads[f];
5713 volatile kmp_uint32 *state = &th->th.th_reap_state;
5714 while (*state != KMP_SAFE_TO_REAP) {
5718 if (!__kmp_is_thread_alive(th, &ecode)) {
5719 *state = KMP_SAFE_TO_REAP;
5724 if (th->th.th_sleep_loc)
5725 __kmp_null_resume_wrapper(th);
5732 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5733 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5734 if (task_team != NULL) {
5735 for (f = 0; f < team->t.t_nproc; ++f) {
5736 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5737 team->t.t_threads[f]->th.th_task_team = NULL;
5741 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5742 __kmp_get_gtid(), task_team, team->t.t_id));
5743#if KMP_NESTED_HOT_TEAMS
5744 __kmp_free_task_team(master, task_team);
5746 team->t.t_task_team[tt_idx] = NULL;
5752 if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth &&
5753 team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) {
5754 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
5755 KMP_INTERNAL_FREE(team->t.t_nested_nth);
5757 team->t.t_nested_nth = NULL;
5760 team->t.t_parent = NULL;
5761 team->t.t_level = 0;
5762 team->t.t_active_level = 0;
5765 for (f = 1; f < team->t.t_nproc; ++f) {
5766 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5767 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5768 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5771 __kmp_free_thread(team->t.t_threads[f]);
5774 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5777 team->t.b->go_release();
5778 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5779 for (f = 1; f < team->t.t_nproc; ++f) {
5780 if (team->t.b->sleep[f].sleep) {
5781 __kmp_atomic_resume_64(
5782 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5783 (kmp_atomic_flag_64<> *)NULL);
5788 for (
int f = 1; f < team->t.t_nproc; ++f) {
5789 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5795 for (f = 1; f < team->t.t_nproc; ++f) {
5796 team->t.t_threads[f] = NULL;
5799 if (team->t.t_max_nproc > 1 &&
5800 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5801 distributedBarrier::deallocate(team->t.b);
5806 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5807 __kmp_team_pool = (
volatile kmp_team_t *)team;
5810 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5811 team->t.t_threads[1]->th.th_cg_roots);
5812 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5814 for (f = 1; f < team->t.t_nproc; ++f) {
5815 kmp_info_t *thr = team->t.t_threads[f];
5816 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5817 thr->th.th_cg_roots->cg_root == thr);
5819 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5820 thr->th.th_cg_roots = tmp->up;
5821 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5822 " up to node %p. cg_nthreads was %d\n",
5823 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5824 int i = tmp->cg_nthreads--;
5829 if (thr->th.th_cg_roots)
5830 thr->th.th_current_task->td_icvs.thread_limit =
5831 thr->th.th_cg_roots->cg_thread_limit;
5840kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5841 kmp_team_t *next_pool = team->t.t_next_pool;
5843 KMP_DEBUG_ASSERT(team);
5844 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5845 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5846 KMP_DEBUG_ASSERT(team->t.t_threads);
5847 KMP_DEBUG_ASSERT(team->t.t_argv);
5852 __kmp_free_team_arrays(team);
5853 if (team->t.t_argv != &team->t.t_inline_argv[0])
5854 __kmp_free((
void *)team->t.t_argv);
5886void __kmp_free_thread(kmp_info_t *this_th) {
5890 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5891 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5893 KMP_DEBUG_ASSERT(this_th);
5898 kmp_balign_t *balign = this_th->th.th_bar;
5899 for (b = 0; b < bs_last_barrier; ++b) {
5900 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5901 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5902 balign[b].bb.team = NULL;
5903 balign[b].bb.leaf_kids = 0;
5905 this_th->th.th_task_state = 0;
5906 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5909 TCW_PTR(this_th->th.th_team, NULL);
5910 TCW_PTR(this_th->th.th_root, NULL);
5911 TCW_PTR(this_th->th.th_dispatch, NULL);
5913 while (this_th->th.th_cg_roots) {
5914 this_th->th.th_cg_roots->cg_nthreads--;
5915 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5916 " %p of thread %p to %d\n",
5917 this_th, this_th->th.th_cg_roots,
5918 this_th->th.th_cg_roots->cg_root,
5919 this_th->th.th_cg_roots->cg_nthreads));
5920 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5921 if (tmp->cg_root == this_th) {
5922 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5924 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5925 this_th->th.th_cg_roots = tmp->up;
5928 if (tmp->cg_nthreads == 0) {
5931 this_th->th.th_cg_roots = NULL;
5941 __kmp_free_implicit_task(this_th);
5942 this_th->th.th_current_task = NULL;
5946 gtid = this_th->th.th_info.ds.ds_gtid;
5947 if (__kmp_thread_pool_insert_pt != NULL) {
5948 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5949 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5950 __kmp_thread_pool_insert_pt = NULL;
5959 if (__kmp_thread_pool_insert_pt != NULL) {
5960 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5962 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5964 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5965 scan = &((*scan)->th.th_next_pool))
5970 TCW_PTR(this_th->th.th_next_pool, *scan);
5971 __kmp_thread_pool_insert_pt = *scan = this_th;
5972 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5973 (this_th->th.th_info.ds.ds_gtid <
5974 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5975 TCW_4(this_th->th.th_in_pool, TRUE);
5976 __kmp_suspend_initialize_thread(this_th);
5977 __kmp_lock_suspend_mx(this_th);
5978 if (this_th->th.th_active == TRUE) {
5979 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5980 this_th->th.th_active_in_pool = TRUE;
5984 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5987 __kmp_unlock_suspend_mx(this_th);
5989 TCW_4(__kmp_nth, __kmp_nth - 1);
5991#ifdef KMP_ADJUST_BLOCKTIME
5994 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5995 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5996 if (__kmp_nth <= __kmp_avail_proc) {
5997 __kmp_zero_bt = FALSE;
6007void *__kmp_launch_thread(kmp_info_t *this_thr) {
6008#if OMP_PROFILING_SUPPORT
6009 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
6011 if (ProfileTraceFile)
6012 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
6015 int gtid = this_thr->th.th_info.ds.ds_gtid;
6017 kmp_team_t **
volatile pteam;
6020 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
6022 if (__kmp_env_consistency_check) {
6023 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6027 if (ompd_state & OMPD_ENABLE_BP)
6028 ompd_bp_thread_begin();
6032 ompt_data_t *thread_data =
nullptr;
6033 if (ompt_enabled.enabled) {
6034 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6035 *thread_data = ompt_data_none;
6037 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6038 this_thr->th.ompt_thread_info.wait_id = 0;
6039 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6040 this_thr->th.ompt_thread_info.parallel_flags = 0;
6041 if (ompt_enabled.ompt_callback_thread_begin) {
6042 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6043 ompt_thread_worker, thread_data);
6045 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6050 while (!TCR_4(__kmp_global.g.g_done)) {
6051 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6055 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6058 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6061 if (ompt_enabled.enabled) {
6062 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6066 pteam = &this_thr->th.th_team;
6069 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6071 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6074 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6075 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6076 (*pteam)->t.t_pkfn));
6078 updateHWFPControl(*pteam);
6081 if (ompt_enabled.enabled) {
6082 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6086 rc = (*pteam)->t.t_invoke(gtid);
6090 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6091 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6092 (*pteam)->t.t_pkfn));
6095 if (ompt_enabled.enabled) {
6097 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6099 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6103 __kmp_join_barrier(gtid);
6108 if (ompd_state & OMPD_ENABLE_BP)
6109 ompd_bp_thread_end();
6113 if (ompt_enabled.ompt_callback_thread_end) {
6114 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6118 this_thr->th.th_task_team = NULL;
6120 __kmp_common_destroy_gtid(gtid);
6122 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6125#if OMP_PROFILING_SUPPORT
6126 llvm::timeTraceProfilerFinishThread();
6133void __kmp_internal_end_dest(
void *specific_gtid) {
6136 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6138 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6142 __kmp_internal_end_thread(gtid);
6145#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6147__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6148 __kmp_internal_end_atexit();
6155void __kmp_internal_end_atexit(
void) {
6156 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6180 __kmp_internal_end_library(-1);
6182 __kmp_close_console();
6186static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6191 KMP_DEBUG_ASSERT(thread != NULL);
6193 gtid = thread->th.th_info.ds.ds_gtid;
6196 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6199 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6201 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6203 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6205 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6209 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6211 __kmp_release_64(&flag);
6216 __kmp_reap_worker(thread);
6228 if (thread->th.th_active_in_pool) {
6229 thread->th.th_active_in_pool = FALSE;
6230 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6231 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6235 __kmp_free_implicit_task(thread);
6239 __kmp_free_fast_memory(thread);
6242 __kmp_suspend_uninitialize_thread(thread);
6244 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6245 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6250#ifdef KMP_ADJUST_BLOCKTIME
6253 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6254 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6255 if (__kmp_nth <= __kmp_avail_proc) {
6256 __kmp_zero_bt = FALSE;
6262 if (__kmp_env_consistency_check) {
6263 if (thread->th.th_cons) {
6264 __kmp_free_cons_stack(thread->th.th_cons);
6265 thread->th.th_cons = NULL;
6269 if (thread->th.th_pri_common != NULL) {
6270 __kmp_free(thread->th.th_pri_common);
6271 thread->th.th_pri_common = NULL;
6275 if (thread->th.th_local.bget_data != NULL) {
6276 __kmp_finalize_bget(thread);
6280#if KMP_AFFINITY_SUPPORTED
6281 if (thread->th.th_affin_mask != NULL) {
6282 KMP_CPU_FREE(thread->th.th_affin_mask);
6283 thread->th.th_affin_mask = NULL;
6287#if KMP_USE_HIER_SCHED
6288 if (thread->th.th_hier_bar_data != NULL) {
6289 __kmp_free(thread->th.th_hier_bar_data);
6290 thread->th.th_hier_bar_data = NULL;
6294 __kmp_reap_team(thread->th.th_serial_team);
6295 thread->th.th_serial_team = NULL;
6302static void __kmp_itthash_clean(kmp_info_t *th) {
6304 if (__kmp_itt_region_domains.count > 0) {
6305 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6306 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6308 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6309 __kmp_thread_free(th, bucket);
6314 if (__kmp_itt_barrier_domains.count > 0) {
6315 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6316 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6318 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6319 __kmp_thread_free(th, bucket);
6327static void __kmp_internal_end(
void) {
6331 __kmp_unregister_library();
6338 __kmp_reclaim_dead_roots();
6342 for (i = 0; i < __kmp_threads_capacity; i++)
6344 if (__kmp_root[i]->r.r_active)
6347 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6349 if (i < __kmp_threads_capacity) {
6361 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6362 if (TCR_4(__kmp_init_monitor)) {
6363 __kmp_reap_monitor(&__kmp_monitor);
6364 TCW_4(__kmp_init_monitor, 0);
6366 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6367 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6373 for (i = 0; i < __kmp_threads_capacity; i++) {
6374 if (__kmp_root[i]) {
6377 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6386 while (__kmp_thread_pool != NULL) {
6388 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6389 __kmp_thread_pool = thread->th.th_next_pool;
6391 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6392 thread->th.th_next_pool = NULL;
6393 thread->th.th_in_pool = FALSE;
6394 __kmp_reap_thread(thread, 0);
6396 __kmp_thread_pool_insert_pt = NULL;
6399 while (__kmp_team_pool != NULL) {
6401 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6402 __kmp_team_pool = team->t.t_next_pool;
6404 team->t.t_next_pool = NULL;
6405 __kmp_reap_team(team);
6408 __kmp_reap_task_teams();
6415 for (i = 0; i < __kmp_threads_capacity; i++) {
6416 kmp_info_t *thr = __kmp_threads[i];
6417 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6422 for (i = 0; i < __kmp_threads_capacity; ++i) {
6429 TCW_SYNC_4(__kmp_init_common, FALSE);
6431 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6439 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6440 if (TCR_4(__kmp_init_monitor)) {
6441 __kmp_reap_monitor(&__kmp_monitor);
6442 TCW_4(__kmp_init_monitor, 0);
6444 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6445 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6448 TCW_4(__kmp_init_gtid, FALSE);
6457void __kmp_internal_end_library(
int gtid_req) {
6464 if (__kmp_global.g.g_abort) {
6465 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6469 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6470 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6475 if (TCR_4(__kmp_init_hidden_helper) &&
6476 !TCR_4(__kmp_hidden_helper_team_done)) {
6477 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6479 __kmp_hidden_helper_main_thread_release();
6481 __kmp_hidden_helper_threads_deinitz_wait();
6487 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6489 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6490 if (gtid == KMP_GTID_SHUTDOWN) {
6491 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6492 "already shutdown\n"));
6494 }
else if (gtid == KMP_GTID_MONITOR) {
6495 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6496 "registered, or system shutdown\n"));
6498 }
else if (gtid == KMP_GTID_DNE) {
6499 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6502 }
else if (KMP_UBER_GTID(gtid)) {
6504 if (__kmp_root[gtid]->r.r_active) {
6505 __kmp_global.g.g_abort = -1;
6506 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6507 __kmp_unregister_library();
6509 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6513 __kmp_itthash_clean(__kmp_threads[gtid]);
6516 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6517 __kmp_unregister_root_current_thread(gtid);
6524#ifdef DUMP_DEBUG_ON_EXIT
6525 if (__kmp_debug_buf)
6526 __kmp_dump_debug_buffer();
6531 __kmp_unregister_library();
6536 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6539 if (__kmp_global.g.g_abort) {
6540 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6542 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6545 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6546 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6555 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6558 __kmp_internal_end();
6560 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6561 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6563 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6565#ifdef DUMP_DEBUG_ON_EXIT
6566 if (__kmp_debug_buf)
6567 __kmp_dump_debug_buffer();
6571 __kmp_close_console();
6574 __kmp_fini_allocator();
6578void __kmp_internal_end_thread(
int gtid_req) {
6587 if (__kmp_global.g.g_abort) {
6588 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6592 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6593 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6598 if (TCR_4(__kmp_init_hidden_helper) &&
6599 !TCR_4(__kmp_hidden_helper_team_done)) {
6600 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6602 __kmp_hidden_helper_main_thread_release();
6604 __kmp_hidden_helper_threads_deinitz_wait();
6611 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6613 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6614 if (gtid == KMP_GTID_SHUTDOWN) {
6615 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6616 "already shutdown\n"));
6618 }
else if (gtid == KMP_GTID_MONITOR) {
6619 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6620 "registered, or system shutdown\n"));
6622 }
else if (gtid == KMP_GTID_DNE) {
6623 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6627 }
else if (KMP_UBER_GTID(gtid)) {
6629 if (__kmp_root[gtid]->r.r_active) {
6630 __kmp_global.g.g_abort = -1;
6631 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6633 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6637 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6639 __kmp_unregister_root_current_thread(gtid);
6643 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6646 __kmp_threads[gtid]->th.th_task_team = NULL;
6650 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6656 if (__kmp_pause_status != kmp_hard_paused)
6660 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6665 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6668 if (__kmp_global.g.g_abort) {
6669 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6671 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6674 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6675 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6686 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6688 for (i = 0; i < __kmp_threads_capacity; ++i) {
6689 if (KMP_UBER_GTID(i)) {
6692 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6693 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6694 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6701 __kmp_internal_end();
6703 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6704 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6706 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6708#ifdef DUMP_DEBUG_ON_EXIT
6709 if (__kmp_debug_buf)
6710 __kmp_dump_debug_buffer();
6717static long __kmp_registration_flag = 0;
6719static char *__kmp_registration_str = NULL;
6722static inline char *__kmp_reg_status_name() {
6728#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6729 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6732 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6736#if defined(KMP_USE_SHM)
6737bool __kmp_shm_available =
false;
6738bool __kmp_tmp_available =
false;
6740char *temp_reg_status_file_name =
nullptr;
6743void __kmp_register_library_startup(
void) {
6745 char *name = __kmp_reg_status_name();
6751#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6752 __kmp_initialize_system_tick();
6754 __kmp_read_system_time(&time.dtime);
6755 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6756 __kmp_registration_str =
6757 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6758 __kmp_registration_flag, KMP_LIBRARY_FILE);
6760 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6761 __kmp_registration_str));
6767#if defined(KMP_USE_SHM)
6768 char *shm_name =
nullptr;
6769 char *data1 =
nullptr;
6770 __kmp_shm_available = __kmp_detect_shm();
6771 if (__kmp_shm_available) {
6773 shm_name = __kmp_str_format(
"/%s", name);
6774 int shm_preexist = 0;
6775 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6776 if ((fd1 == -1) && (errno == EEXIST)) {
6779 fd1 = shm_open(shm_name, O_RDWR, 0600);
6781 KMP_WARNING(FunctionError,
"Can't open SHM");
6782 __kmp_shm_available =
false;
6787 if (__kmp_shm_available && shm_preexist == 0) {
6788 if (ftruncate(fd1, SHM_SIZE) == -1) {
6789 KMP_WARNING(FunctionError,
"Can't set size of SHM");
6790 __kmp_shm_available =
false;
6793 if (__kmp_shm_available) {
6794 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6796 if (data1 == MAP_FAILED) {
6797 KMP_WARNING(FunctionError,
"Can't map SHM");
6798 __kmp_shm_available =
false;
6801 if (__kmp_shm_available) {
6802 if (shm_preexist == 0) {
6803 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6806 value = __kmp_str_format(
"%s", data1);
6807 munmap(data1, SHM_SIZE);
6812 if (!__kmp_shm_available)
6813 __kmp_tmp_available = __kmp_detect_tmp();
6814 if (!__kmp_shm_available && __kmp_tmp_available) {
6821 temp_reg_status_file_name = __kmp_str_format(
"/tmp/%s", name);
6822 int tmp_preexist = 0;
6823 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6824 if ((fd1 == -1) && (errno == EEXIST)) {
6827 fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);
6829 KMP_WARNING(FunctionError,
"Can't open TEMP");
6830 __kmp_tmp_available =
false;
6835 if (__kmp_tmp_available && tmp_preexist == 0) {
6837 if (ftruncate(fd1, SHM_SIZE) == -1) {
6838 KMP_WARNING(FunctionError,
"Can't set size of /tmp file");
6839 __kmp_tmp_available =
false;
6842 if (__kmp_tmp_available) {
6843 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6845 if (data1 == MAP_FAILED) {
6846 KMP_WARNING(FunctionError,
"Can't map /tmp");
6847 __kmp_tmp_available =
false;
6850 if (__kmp_tmp_available) {
6851 if (tmp_preexist == 0) {
6852 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6855 value = __kmp_str_format(
"%s", data1);
6856 munmap(data1, SHM_SIZE);
6861 if (!__kmp_shm_available && !__kmp_tmp_available) {
6864 __kmp_env_set(name, __kmp_registration_str, 0);
6866 value = __kmp_env_get(name);
6870 __kmp_env_set(name, __kmp_registration_str, 0);
6872 value = __kmp_env_get(name);
6875 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6882 char *flag_addr_str = NULL;
6883 char *flag_val_str = NULL;
6884 char const *file_name = NULL;
6885 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6886 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6889 unsigned long *flag_addr = 0;
6890 unsigned long flag_val = 0;
6891 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6892 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6893 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6897 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6911 file_name =
"unknown library";
6916 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6917 if (!__kmp_str_match_true(duplicate_ok)) {
6919 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6920 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6922 KMP_INTERNAL_FREE(duplicate_ok);
6923 __kmp_duplicate_library_ok = 1;
6928#if defined(KMP_USE_SHM)
6929 if (__kmp_shm_available) {
6930 shm_unlink(shm_name);
6931 }
else if (__kmp_tmp_available) {
6932 unlink(temp_reg_status_file_name);
6935 __kmp_env_unset(name);
6939 __kmp_env_unset(name);
6943 KMP_DEBUG_ASSERT(0);
6947 KMP_INTERNAL_FREE((
void *)value);
6948#if defined(KMP_USE_SHM)
6950 KMP_INTERNAL_FREE((
void *)shm_name);
6953 KMP_INTERNAL_FREE((
void *)name);
6957void __kmp_unregister_library(
void) {
6959 char *name = __kmp_reg_status_name();
6962#if defined(KMP_USE_SHM)
6963 char *shm_name =
nullptr;
6965 if (__kmp_shm_available) {
6966 shm_name = __kmp_str_format(
"/%s", name);
6967 fd1 = shm_open(shm_name, O_RDONLY, 0600);
6969 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6970 if (data1 != MAP_FAILED) {
6971 value = __kmp_str_format(
"%s", data1);
6972 munmap(data1, SHM_SIZE);
6976 }
else if (__kmp_tmp_available) {
6977 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6979 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6980 if (data1 != MAP_FAILED) {
6981 value = __kmp_str_format(
"%s", data1);
6982 munmap(data1, SHM_SIZE);
6987 value = __kmp_env_get(name);
6990 value = __kmp_env_get(name);
6993 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6994 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6995 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6997#if defined(KMP_USE_SHM)
6998 if (__kmp_shm_available) {
6999 shm_unlink(shm_name);
7000 }
else if (__kmp_tmp_available) {
7001 unlink(temp_reg_status_file_name);
7003 __kmp_env_unset(name);
7006 __kmp_env_unset(name);
7010#if defined(KMP_USE_SHM)
7012 KMP_INTERNAL_FREE(shm_name);
7013 if (temp_reg_status_file_name)
7014 KMP_INTERNAL_FREE(temp_reg_status_file_name);
7017 KMP_INTERNAL_FREE(__kmp_registration_str);
7018 KMP_INTERNAL_FREE(value);
7019 KMP_INTERNAL_FREE(name);
7021 __kmp_registration_flag = 0;
7022 __kmp_registration_str = NULL;
7029#if KMP_MIC_SUPPORTED
7031static void __kmp_check_mic_type() {
7032 kmp_cpuid_t cpuid_state = {0};
7033 kmp_cpuid_t *cs_p = &cpuid_state;
7034 __kmp_x86_cpuid(1, 0, cs_p);
7036 if ((cs_p->eax & 0xff0) == 0xB10) {
7037 __kmp_mic_type = mic2;
7038 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
7039 __kmp_mic_type = mic3;
7041 __kmp_mic_type = non_mic;
7048static void __kmp_user_level_mwait_init() {
7049 struct kmp_cpuid buf;
7050 __kmp_x86_cpuid(7, 0, &buf);
7051 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
7052 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
7053 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
7054 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
7055 __kmp_umwait_enabled));
7058#ifndef AT_INTELPHIUSERMWAIT
7061#define AT_INTELPHIUSERMWAIT 10000
7066unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7067unsigned long getauxval(
unsigned long) {
return 0; }
7069static void __kmp_user_level_mwait_init() {
7074 if (__kmp_mic_type == mic3) {
7075 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7076 if ((res & 0x1) || __kmp_user_level_mwait) {
7077 __kmp_mwait_enabled = TRUE;
7078 if (__kmp_user_level_mwait) {
7079 KMP_INFORM(EnvMwaitWarn);
7082 __kmp_mwait_enabled = FALSE;
7085 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7086 "__kmp_mwait_enabled = %d\n",
7087 __kmp_mic_type, __kmp_mwait_enabled));
7091static void __kmp_do_serial_initialize(
void) {
7095 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7097 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7098 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7099 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7100 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7101 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7111 __kmp_validate_locks();
7113#if ENABLE_LIBOMPTARGET
7115 __kmp_init_omptarget();
7119 __kmp_init_allocator();
7125 if (__kmp_need_register_serial)
7126 __kmp_register_library_startup();
7129 if (TCR_4(__kmp_global.g.g_done)) {
7130 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7133 __kmp_global.g.g_abort = 0;
7134 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7137#if KMP_USE_ADAPTIVE_LOCKS
7138#if KMP_DEBUG_ADAPTIVE_LOCKS
7139 __kmp_init_speculative_stats();
7142#if KMP_STATS_ENABLED
7145 __kmp_init_lock(&__kmp_global_lock);
7146 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7147 __kmp_init_lock(&__kmp_debug_lock);
7148 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7149 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7150 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7151 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7152 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7153 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7154 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7155 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7156 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7157 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7158 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7159 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7160 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7161 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7162 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7164 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7166 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7170 __kmp_runtime_initialize();
7172#if KMP_MIC_SUPPORTED
7173 __kmp_check_mic_type();
7180 __kmp_abort_delay = 0;
7184 __kmp_dflt_team_nth_ub = __kmp_xproc;
7185 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7186 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7188 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7189 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7191 __kmp_max_nth = __kmp_sys_max_nth;
7192 __kmp_cg_max_nth = __kmp_sys_max_nth;
7193 __kmp_teams_max_nth = __kmp_xproc;
7194 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7195 __kmp_teams_max_nth = __kmp_sys_max_nth;
7200 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7202 __kmp_monitor_wakeups =
7203 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7204 __kmp_bt_intervals =
7205 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7208 __kmp_library = library_throughput;
7210 __kmp_static = kmp_sch_static_balanced;
7217#if KMP_FAST_REDUCTION_BARRIER
7218#define kmp_reduction_barrier_gather_bb ((int)1)
7219#define kmp_reduction_barrier_release_bb ((int)1)
7220#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7221#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7223 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7224 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7225 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7226 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7227 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7228#if KMP_FAST_REDUCTION_BARRIER
7229 if (i == bs_reduction_barrier) {
7231 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7232 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7233 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7234 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7238#if KMP_FAST_REDUCTION_BARRIER
7239#undef kmp_reduction_barrier_release_pat
7240#undef kmp_reduction_barrier_gather_pat
7241#undef kmp_reduction_barrier_release_bb
7242#undef kmp_reduction_barrier_gather_bb
7244#if KMP_MIC_SUPPORTED
7245 if (__kmp_mic_type == mic2) {
7247 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7248 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7250 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7251 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7253#if KMP_FAST_REDUCTION_BARRIER
7254 if (__kmp_mic_type == mic2) {
7255 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7256 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7263 __kmp_env_checks = TRUE;
7265 __kmp_env_checks = FALSE;
7269 __kmp_foreign_tp = TRUE;
7271 __kmp_global.g.g_dynamic = FALSE;
7272 __kmp_global.g.g_dynamic_mode = dynamic_default;
7274 __kmp_init_nesting_mode();
7276 __kmp_env_initialize(NULL);
7278#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7279 __kmp_user_level_mwait_init();
7283 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7284 if (__kmp_str_match_true(val)) {
7285 kmp_str_buf_t buffer;
7286 __kmp_str_buf_init(&buffer);
7287 __kmp_i18n_dump_catalog(&buffer);
7288 __kmp_printf(
"%s", buffer.str);
7289 __kmp_str_buf_free(&buffer);
7291 __kmp_env_free(&val);
7294 __kmp_threads_capacity =
7295 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7297 __kmp_tp_capacity = __kmp_default_tp_capacity(
7298 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7303 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7304 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7305 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7306 __kmp_thread_pool = NULL;
7307 __kmp_thread_pool_insert_pt = NULL;
7308 __kmp_team_pool = NULL;
7315 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7317 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7318 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7319 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7322 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7324 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7329 gtid = __kmp_register_root(TRUE);
7330 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7331 KMP_ASSERT(KMP_UBER_GTID(gtid));
7332 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7336 __kmp_common_initialize();
7340 __kmp_register_atfork();
7343#if !KMP_DYNAMIC_LIB || \
7344 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7349 int rc = atexit(__kmp_internal_end_atexit);
7351 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7357#if KMP_HANDLE_SIGNALS
7363 __kmp_install_signals(FALSE);
7366 __kmp_install_signals(TRUE);
7371 __kmp_init_counter++;
7373 __kmp_init_serial = TRUE;
7375 if (__kmp_version) {
7376 __kmp_print_version_1();
7379 if (__kmp_settings) {
7383 if (__kmp_display_env || __kmp_display_env_verbose) {
7384 __kmp_env_print_2();
7393 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7396void __kmp_serial_initialize(
void) {
7397 if (__kmp_init_serial) {
7400 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7401 if (__kmp_init_serial) {
7402 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7405 __kmp_do_serial_initialize();
7406 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7409static void __kmp_do_middle_initialize(
void) {
7411 int prev_dflt_team_nth;
7413 if (!__kmp_init_serial) {
7414 __kmp_do_serial_initialize();
7417 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7419 if (UNLIKELY(!__kmp_need_register_serial)) {
7422 __kmp_register_library_startup();
7427 prev_dflt_team_nth = __kmp_dflt_team_nth;
7429#if KMP_AFFINITY_SUPPORTED
7432 __kmp_affinity_initialize(__kmp_affinity);
7436 KMP_ASSERT(__kmp_xproc > 0);
7437 if (__kmp_avail_proc == 0) {
7438 __kmp_avail_proc = __kmp_xproc;
7444 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7445 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7450 if (__kmp_dflt_team_nth == 0) {
7451#ifdef KMP_DFLT_NTH_CORES
7453 __kmp_dflt_team_nth = __kmp_ncores;
7454 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7455 "__kmp_ncores (%d)\n",
7456 __kmp_dflt_team_nth));
7459 __kmp_dflt_team_nth = __kmp_avail_proc;
7460 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7461 "__kmp_avail_proc(%d)\n",
7462 __kmp_dflt_team_nth));
7466 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7467 __kmp_dflt_team_nth = KMP_MIN_NTH;
7469 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7470 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7473 if (__kmp_nesting_mode > 0)
7474 __kmp_set_nesting_mode_threads();
7478 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7480 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7485 for (i = 0; i < __kmp_threads_capacity; i++) {
7486 kmp_info_t *thread = __kmp_threads[i];
7489 if (thread->th.th_current_task->td_icvs.nproc != 0)
7492 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7497 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7498 __kmp_dflt_team_nth));
7500#ifdef KMP_ADJUST_BLOCKTIME
7502 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7503 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7504 if (__kmp_nth > __kmp_avail_proc) {
7505 __kmp_zero_bt = TRUE;
7511 TCW_SYNC_4(__kmp_init_middle, TRUE);
7513 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7516void __kmp_middle_initialize(
void) {
7517 if (__kmp_init_middle) {
7520 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7521 if (__kmp_init_middle) {
7522 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7525 __kmp_do_middle_initialize();
7526 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7529void __kmp_parallel_initialize(
void) {
7530 int gtid = __kmp_entry_gtid();
7533 if (TCR_4(__kmp_init_parallel))
7535 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7536 if (TCR_4(__kmp_init_parallel)) {
7537 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7542 if (TCR_4(__kmp_global.g.g_done)) {
7545 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7546 __kmp_infinite_loop();
7552 if (!__kmp_init_middle) {
7553 __kmp_do_middle_initialize();
7555 __kmp_assign_root_init_mask();
7556 __kmp_resume_if_hard_paused();
7559 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7560 KMP_ASSERT(KMP_UBER_GTID(gtid));
7562#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7565 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7566 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7567 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7571#if KMP_HANDLE_SIGNALS
7573 __kmp_install_signals(TRUE);
7577 __kmp_suspend_initialize();
7579#if defined(USE_LOAD_BALANCE)
7580 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7581 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7584 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7585 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7589 if (__kmp_version) {
7590 __kmp_print_version_2();
7594 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7597 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7599 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7602void __kmp_hidden_helper_initialize() {
7603 if (TCR_4(__kmp_init_hidden_helper))
7607 if (!TCR_4(__kmp_init_parallel))
7608 __kmp_parallel_initialize();
7612 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7613 if (TCR_4(__kmp_init_hidden_helper)) {
7614 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7618#if KMP_AFFINITY_SUPPORTED
7622 if (!__kmp_hh_affinity.flags.initialized)
7623 __kmp_affinity_initialize(__kmp_hh_affinity);
7627 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7631 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7634 __kmp_do_initialize_hidden_helper_threads();
7637 __kmp_hidden_helper_threads_initz_wait();
7640 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7642 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7647void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7649 kmp_disp_t *dispatch;
7654 this_thr->th.th_local.this_construct = 0;
7656 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7658 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7659 KMP_DEBUG_ASSERT(dispatch);
7660 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7664 dispatch->th_disp_index = 0;
7665 dispatch->th_doacross_buf_idx = 0;
7666 if (__kmp_env_consistency_check)
7667 __kmp_push_parallel(gtid, team->t.t_ident);
7672void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7674 if (__kmp_env_consistency_check)
7675 __kmp_pop_parallel(gtid, team->t.t_ident);
7677 __kmp_finish_implicit_task(this_thr);
7680int __kmp_invoke_task_func(
int gtid) {
7682 int tid = __kmp_tid_from_gtid(gtid);
7683 kmp_info_t *this_thr = __kmp_threads[gtid];
7684 kmp_team_t *team = this_thr->th.th_team;
7686 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7688 if (__itt_stack_caller_create_ptr) {
7690 if (team->t.t_stack_id != NULL) {
7691 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7693 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7694 __kmp_itt_stack_callee_enter(
7695 (__itt_caller)team->t.t_parent->t.t_stack_id);
7699#if INCLUDE_SSC_MARKS
7700 SSC_MARK_INVOKING();
7705 void **exit_frame_p;
7706 ompt_data_t *my_task_data;
7707 ompt_data_t *my_parallel_data;
7710 if (ompt_enabled.enabled) {
7711 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7712 .ompt_task_info.frame.exit_frame.ptr);
7714 exit_frame_p = &dummy;
7718 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7719 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7720 if (ompt_enabled.ompt_callback_implicit_task) {
7721 ompt_team_size = team->t.t_nproc;
7722 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7723 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7724 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7725 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7729#if KMP_STATS_ENABLED
7731 if (previous_state == stats_state_e::TEAMS_REGION) {
7732 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7734 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7736 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7739 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7740 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7747 *exit_frame_p = NULL;
7748 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;
7751#if KMP_STATS_ENABLED
7752 if (previous_state == stats_state_e::TEAMS_REGION) {
7753 KMP_SET_THREAD_STATE(previous_state);
7755 KMP_POP_PARTITIONED_TIMER();
7759 if (__itt_stack_caller_create_ptr) {
7761 if (team->t.t_stack_id != NULL) {
7762 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7764 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7765 __kmp_itt_stack_callee_leave(
7766 (__itt_caller)team->t.t_parent->t.t_stack_id);
7770 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7775void __kmp_teams_master(
int gtid) {
7777 kmp_info_t *thr = __kmp_threads[gtid];
7778 kmp_team_t *team = thr->th.th_team;
7779 ident_t *loc = team->t.t_ident;
7780 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7781 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7782 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7783 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7784 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7787 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7790 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7791 tmp->cg_nthreads = 1;
7792 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7793 " cg_nthreads to 1\n",
7795 tmp->up = thr->th.th_cg_roots;
7796 thr->th.th_cg_roots = tmp;
7800#if INCLUDE_SSC_MARKS
7803 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7804 (microtask_t)thr->th.th_teams_microtask,
7805 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7806#if INCLUDE_SSC_MARKS
7810 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7811 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7814 __kmp_join_call(loc, gtid
7823int __kmp_invoke_teams_master(
int gtid) {
7824 kmp_info_t *this_thr = __kmp_threads[gtid];
7825 kmp_team_t *team = this_thr->th.th_team;
7827 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7828 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7829 (
void *)__kmp_teams_master);
7831 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7833 int tid = __kmp_tid_from_gtid(gtid);
7834 ompt_data_t *task_data =
7835 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7836 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7837 if (ompt_enabled.ompt_callback_implicit_task) {
7838 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7839 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7841 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7844 __kmp_teams_master(gtid);
7846 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;
7848 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7856void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7857 kmp_info_t *thr = __kmp_threads[gtid];
7859 if (num_threads > 0)
7860 thr->th.th_set_nproc = num_threads;
7863void __kmp_push_num_threads_list(
ident_t *
id,
int gtid, kmp_uint32 list_length,
7864 int *num_threads_list) {
7865 kmp_info_t *thr = __kmp_threads[gtid];
7867 KMP_DEBUG_ASSERT(list_length > 1);
7869 if (num_threads_list[0] > 0)
7870 thr->th.th_set_nproc = num_threads_list[0];
7871 thr->th.th_set_nested_nth =
7872 (
int *)KMP_INTERNAL_MALLOC(list_length *
sizeof(
int));
7873 for (kmp_uint32 i = 0; i < list_length; ++i)
7874 thr->th.th_set_nested_nth[i] = num_threads_list[i];
7875 thr->th.th_set_nested_nth_sz = list_length;
7878void __kmp_set_strict_num_threads(
ident_t *loc,
int gtid,
int sev,
7880 kmp_info_t *thr = __kmp_threads[gtid];
7881 thr->th.th_nt_strict =
true;
7882 thr->th.th_nt_loc = loc;
7884 if (sev == severity_warning)
7885 thr->th.th_nt_sev = sev;
7887 thr->th.th_nt_sev = severity_fatal;
7890 thr->th.th_nt_msg = msg;
7892 thr->th.th_nt_msg =
"Cannot form team with number of threads specified by "
7893 "strict num_threads clause.";
7896static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7898 KMP_DEBUG_ASSERT(thr);
7900 if (!TCR_4(__kmp_init_middle))
7901 __kmp_middle_initialize();
7902 __kmp_assign_root_init_mask();
7903 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7904 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7906 if (num_threads == 0) {
7907 if (__kmp_teams_thread_limit > 0) {
7908 num_threads = __kmp_teams_thread_limit;
7910 num_threads = __kmp_avail_proc / num_teams;
7915 if (num_threads > __kmp_dflt_team_nth) {
7916 num_threads = __kmp_dflt_team_nth;
7918 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7919 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7921 if (num_teams * num_threads > __kmp_teams_max_nth) {
7922 num_threads = __kmp_teams_max_nth / num_teams;
7924 if (num_threads == 0) {
7928 if (num_threads < 0) {
7929 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7935 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7937 if (num_threads > __kmp_dflt_team_nth) {
7938 num_threads = __kmp_dflt_team_nth;
7940 if (num_teams * num_threads > __kmp_teams_max_nth) {
7941 int new_threads = __kmp_teams_max_nth / num_teams;
7942 if (new_threads == 0) {
7945 if (new_threads != num_threads) {
7946 if (!__kmp_reserve_warn) {
7947 __kmp_reserve_warn = 1;
7948 __kmp_msg(kmp_ms_warning,
7949 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7950 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7953 num_threads = new_threads;
7956 thr->th.th_teams_size.nth = num_threads;
7961void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7963 kmp_info_t *thr = __kmp_threads[gtid];
7964 if (num_teams < 0) {
7967 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7971 if (num_teams == 0) {
7972 if (__kmp_nteams > 0) {
7973 num_teams = __kmp_nteams;
7978 if (num_teams > __kmp_teams_max_nth) {
7979 if (!__kmp_reserve_warn) {
7980 __kmp_reserve_warn = 1;
7981 __kmp_msg(kmp_ms_warning,
7982 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7983 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7985 num_teams = __kmp_teams_max_nth;
7989 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7991 __kmp_push_thread_limit(thr, num_teams, num_threads);
7996void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7997 int num_teams_ub,
int num_threads) {
7998 kmp_info_t *thr = __kmp_threads[gtid];
7999 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
8000 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
8001 KMP_DEBUG_ASSERT(num_threads >= 0);
8003 if (num_teams_lb > num_teams_ub) {
8004 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
8005 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
8010 if (num_teams_lb == 0 && num_teams_ub > 0)
8011 num_teams_lb = num_teams_ub;
8013 if (num_teams_lb == 0 && num_teams_ub == 0) {
8014 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
8015 if (num_teams > __kmp_teams_max_nth) {
8016 if (!__kmp_reserve_warn) {
8017 __kmp_reserve_warn = 1;
8018 __kmp_msg(kmp_ms_warning,
8019 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
8020 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
8022 num_teams = __kmp_teams_max_nth;
8024 }
else if (num_teams_lb == num_teams_ub) {
8025 num_teams = num_teams_ub;
8027 if (num_threads <= 0) {
8028 if (num_teams_ub > __kmp_teams_max_nth) {
8029 num_teams = num_teams_lb;
8031 num_teams = num_teams_ub;
8034 num_teams = (num_threads > __kmp_teams_max_nth)
8036 : __kmp_teams_max_nth / num_threads;
8037 if (num_teams < num_teams_lb) {
8038 num_teams = num_teams_lb;
8039 }
else if (num_teams > num_teams_ub) {
8040 num_teams = num_teams_ub;
8046 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
8048 __kmp_push_thread_limit(thr, num_teams, num_threads);
8052void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
8053 kmp_info_t *thr = __kmp_threads[gtid];
8054 thr->th.th_set_proc_bind = proc_bind;
8059void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
8060 kmp_info_t *this_thr = __kmp_threads[gtid];
8066 KMP_DEBUG_ASSERT(team);
8067 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8068 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8071 team->t.t_construct = 0;
8072 team->t.t_ordered.dt.t_value =
8076 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
8077 if (team->t.t_max_nproc > 1) {
8079 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
8080 team->t.t_disp_buffer[i].buffer_index = i;
8081 team->t.t_disp_buffer[i].doacross_buf_idx = i;
8084 team->t.t_disp_buffer[0].buffer_index = 0;
8085 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
8089 KMP_ASSERT(this_thr->th.th_team == team);
8092 for (f = 0; f < team->t.t_nproc; f++) {
8093 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8094 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8099 __kmp_fork_barrier(gtid, 0);
8102void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8103 kmp_info_t *this_thr = __kmp_threads[gtid];
8105 KMP_DEBUG_ASSERT(team);
8106 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8107 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8113 if (__kmp_threads[gtid] &&
8114 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8115 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8116 __kmp_threads[gtid]);
8117 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8118 "team->t.t_nproc=%d\n",
8119 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8121 __kmp_print_structure();
8123 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8124 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8127 __kmp_join_barrier(gtid);
8129 ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
8130 if (ompt_enabled.enabled &&
8131 (ompt_state == ompt_state_wait_barrier_teams ||
8132 ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
8133 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8134 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8135 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8137 void *codeptr = NULL;
8138 if (KMP_MASTER_TID(ds_tid) &&
8139 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8140 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8141 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8143 ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
8144 if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
8145 sync_kind = ompt_sync_region_barrier_teams;
8146 if (ompt_enabled.ompt_callback_sync_region_wait) {
8147 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8148 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8150 if (ompt_enabled.ompt_callback_sync_region) {
8151 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8152 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8155 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8156 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8157 ompt_scope_end, NULL, task_data, 0, ds_tid,
8158 ompt_task_implicit);
8164 KMP_ASSERT(this_thr->th.th_team == team);
8169#ifdef USE_LOAD_BALANCE
8173static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8176 kmp_team_t *hot_team;
8178 if (root->r.r_active) {
8181 hot_team = root->r.r_hot_team;
8182 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8183 return hot_team->t.t_nproc - 1;
8188 for (i = 1; i < hot_team->t.t_nproc; i++) {
8189 if (hot_team->t.t_threads[i]->th.th_active) {
8198static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8201 int hot_team_active;
8202 int team_curr_active;
8205 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8207 KMP_DEBUG_ASSERT(root);
8208 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8209 ->th.th_current_task->td_icvs.dynamic == TRUE);
8210 KMP_DEBUG_ASSERT(set_nproc > 1);
8212 if (set_nproc == 1) {
8213 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8222 pool_active = __kmp_thread_pool_active_nth;
8223 hot_team_active = __kmp_active_hot_team_nproc(root);
8224 team_curr_active = pool_active + hot_team_active + 1;
8227 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8228 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8229 "hot team active = %d\n",
8230 system_active, pool_active, hot_team_active));
8232 if (system_active < 0) {
8236 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8237 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8240 retval = __kmp_avail_proc - __kmp_nth +
8241 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8242 if (retval > set_nproc) {
8245 if (retval < KMP_MIN_NTH) {
8246 retval = KMP_MIN_NTH;
8249 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8257 if (system_active < team_curr_active) {
8258 system_active = team_curr_active;
8260 retval = __kmp_avail_proc - system_active + team_curr_active;
8261 if (retval > set_nproc) {
8264 if (retval < KMP_MIN_NTH) {
8265 retval = KMP_MIN_NTH;
8268 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8277void __kmp_cleanup(
void) {
8280 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8282 if (TCR_4(__kmp_init_parallel)) {
8283#if KMP_HANDLE_SIGNALS
8284 __kmp_remove_signals();
8286 TCW_4(__kmp_init_parallel, FALSE);
8289 if (TCR_4(__kmp_init_middle)) {
8290#if KMP_AFFINITY_SUPPORTED
8291 __kmp_affinity_uninitialize();
8293 __kmp_cleanup_hierarchy();
8294 TCW_4(__kmp_init_middle, FALSE);
8297 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8299 if (__kmp_init_serial) {
8300 __kmp_runtime_destroy();
8301 __kmp_init_serial = FALSE;
8304 __kmp_cleanup_threadprivate_caches();
8306 for (f = 0; f < __kmp_threads_capacity; f++) {
8307 if (__kmp_root[f] != NULL) {
8308 __kmp_free(__kmp_root[f]);
8309 __kmp_root[f] = NULL;
8312 __kmp_free(__kmp_threads);
8315 __kmp_threads = NULL;
8317 __kmp_threads_capacity = 0;
8320 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8322 kmp_old_threads_list_t *next = ptr->next;
8323 __kmp_free(ptr->threads);
8328#if KMP_USE_DYNAMIC_LOCK
8329 __kmp_cleanup_indirect_user_locks();
8331 __kmp_cleanup_user_locks();
8335 __kmp_free(ompd_env_block);
8336 ompd_env_block = NULL;
8337 ompd_env_block_size = 0;
8341#if KMP_AFFINITY_SUPPORTED
8342 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8343 __kmp_cpuinfo_file = NULL;
8346#if KMP_USE_ADAPTIVE_LOCKS
8347#if KMP_DEBUG_ADAPTIVE_LOCKS
8348 __kmp_print_speculative_stats();
8351 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8352 __kmp_nested_nth.nth = NULL;
8353 __kmp_nested_nth.size = 0;
8354 __kmp_nested_nth.used = 0;
8356 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8357 __kmp_nested_proc_bind.bind_types = NULL;
8358 __kmp_nested_proc_bind.size = 0;
8359 __kmp_nested_proc_bind.used = 0;
8360 if (__kmp_affinity_format) {
8361 KMP_INTERNAL_FREE(__kmp_affinity_format);
8362 __kmp_affinity_format = NULL;
8365 __kmp_i18n_catclose();
8367#if KMP_USE_HIER_SCHED
8368 __kmp_hier_scheds.deallocate();
8371#if KMP_STATS_ENABLED
8375 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8380int __kmp_ignore_mppbeg(
void) {
8383 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8384 if (__kmp_str_match_false(env))
8391int __kmp_ignore_mppend(
void) {
8394 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8395 if (__kmp_str_match_false(env))
8402void __kmp_internal_begin(
void) {
8408 gtid = __kmp_entry_gtid();
8409 root = __kmp_threads[gtid]->th.th_root;
8410 KMP_ASSERT(KMP_UBER_GTID(gtid));
8412 if (root->r.r_begin)
8414 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8415 if (root->r.r_begin) {
8416 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8420 root->r.r_begin = TRUE;
8422 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8427void __kmp_user_set_library(
enum library_type arg) {
8434 gtid = __kmp_entry_gtid();
8435 thread = __kmp_threads[gtid];
8437 root = thread->th.th_root;
8439 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8441 if (root->r.r_in_parallel) {
8443 KMP_WARNING(SetLibraryIncorrectCall);
8448 case library_serial:
8449 thread->th.th_set_nproc = 0;
8450 set__nproc(thread, 1);
8452 case library_turnaround:
8453 thread->th.th_set_nproc = 0;
8454 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8455 : __kmp_dflt_team_nth_ub);
8457 case library_throughput:
8458 thread->th.th_set_nproc = 0;
8459 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8460 : __kmp_dflt_team_nth_ub);
8463 KMP_FATAL(UnknownLibraryType, arg);
8466 __kmp_aux_set_library(arg);
8469void __kmp_aux_set_stacksize(
size_t arg) {
8470 if (!__kmp_init_serial)
8471 __kmp_serial_initialize();
8474 if (arg & (0x1000 - 1)) {
8475 arg &= ~(0x1000 - 1);
8480 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8483 if (!TCR_4(__kmp_init_parallel)) {
8486 if (value < __kmp_sys_min_stksize)
8487 value = __kmp_sys_min_stksize;
8488 else if (value > KMP_MAX_STKSIZE)
8489 value = KMP_MAX_STKSIZE;
8491 __kmp_stksize = value;
8493 __kmp_env_stksize = TRUE;
8496 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8501void __kmp_aux_set_library(
enum library_type arg) {
8502 __kmp_library = arg;
8504 switch (__kmp_library) {
8505 case library_serial: {
8506 KMP_INFORM(LibraryIsSerial);
8508 case library_turnaround:
8509 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8510 __kmp_use_yield = 2;
8512 case library_throughput:
8513 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8514 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8517 KMP_FATAL(UnknownLibraryType, arg);
8523static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8524 kmp_info_t *thr = __kmp_entry_thread();
8525 teams_serialized = 0;
8526 if (thr->th.th_teams_microtask) {
8527 kmp_team_t *team = thr->th.th_team;
8528 int tlevel = thr->th.th_teams_level;
8529 int ii = team->t.t_level;
8530 teams_serialized = team->t.t_serialized;
8531 int level = tlevel + 1;
8532 KMP_DEBUG_ASSERT(ii >= tlevel);
8533 while (ii > level) {
8534 for (teams_serialized = team->t.t_serialized;
8535 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8537 if (team->t.t_serialized && (!teams_serialized)) {
8538 team = team->t.t_parent;
8542 team = team->t.t_parent;
8551int __kmp_aux_get_team_num() {
8553 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8555 if (serialized > 1) {
8558 return team->t.t_master_tid;
8564int __kmp_aux_get_num_teams() {
8566 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8568 if (serialized > 1) {
8571 return team->t.t_parent->t.t_nproc;
8610typedef struct kmp_affinity_format_field_t {
8612 const char *long_name;
8615} kmp_affinity_format_field_t;
8617static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8618#if KMP_AFFINITY_SUPPORTED
8619 {
'A',
"thread_affinity",
's'},
8621 {
't',
"team_num",
'd'},
8622 {
'T',
"num_teams",
'd'},
8623 {
'L',
"nesting_level",
'd'},
8624 {
'n',
"thread_num",
'd'},
8625 {
'N',
"num_threads",
'd'},
8626 {
'a',
"ancestor_tnum",
'd'},
8628 {
'P',
"process_id",
'd'},
8629 {
'i',
"native_thread_id",
'd'}};
8632static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8634 kmp_str_buf_t *field_buffer) {
8635 int rc, format_index, field_value;
8636 const char *width_left, *width_right;
8637 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8638 static const int FORMAT_SIZE = 20;
8639 char format[FORMAT_SIZE] = {0};
8640 char absolute_short_name = 0;
8642 KMP_DEBUG_ASSERT(gtid >= 0);
8643 KMP_DEBUG_ASSERT(th);
8644 KMP_DEBUG_ASSERT(**ptr ==
'%');
8645 KMP_DEBUG_ASSERT(field_buffer);
8647 __kmp_str_buf_clear(field_buffer);
8654 __kmp_str_buf_cat(field_buffer,
"%", 1);
8665 right_justify =
false;
8667 right_justify =
true;
8671 width_left = width_right = NULL;
8672 if (**ptr >=
'0' && **ptr <=
'9') {
8680 format[format_index++] =
'%';
8682 format[format_index++] =
'-';
8684 format[format_index++] =
'0';
8685 if (width_left && width_right) {
8689 while (i < 8 && width_left < width_right) {
8690 format[format_index++] = *width_left;
8698 found_valid_name =
false;
8699 parse_long_name = (**ptr ==
'{');
8700 if (parse_long_name)
8702 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8703 sizeof(__kmp_affinity_format_table[0]);
8705 char short_name = __kmp_affinity_format_table[i].short_name;
8706 const char *long_name = __kmp_affinity_format_table[i].long_name;
8707 char field_format = __kmp_affinity_format_table[i].field_format;
8708 if (parse_long_name) {
8709 size_t length = KMP_STRLEN(long_name);
8710 if (strncmp(*ptr, long_name, length) == 0) {
8711 found_valid_name =
true;
8714 }
else if (**ptr == short_name) {
8715 found_valid_name =
true;
8718 if (found_valid_name) {
8719 format[format_index++] = field_format;
8720 format[format_index++] =
'\0';
8721 absolute_short_name = short_name;
8725 if (parse_long_name) {
8727 absolute_short_name = 0;
8735 switch (absolute_short_name) {
8737 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8740 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8743 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8746 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8749 static const int BUFFER_SIZE = 256;
8750 char buf[BUFFER_SIZE];
8751 __kmp_expand_host_name(buf, BUFFER_SIZE);
8752 rc = __kmp_str_buf_print(field_buffer, format, buf);
8755 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8758 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8761 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8765 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8766 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8768#if KMP_AFFINITY_SUPPORTED
8771 __kmp_str_buf_init(&buf);
8772 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8773 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8774 __kmp_str_buf_free(&buf);
8780 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8782 if (parse_long_name) {
8791 KMP_ASSERT(format_index <= FORMAT_SIZE);
8801size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8802 kmp_str_buf_t *buffer) {
8803 const char *parse_ptr;
8805 const kmp_info_t *th;
8806 kmp_str_buf_t field;
8808 KMP_DEBUG_ASSERT(buffer);
8809 KMP_DEBUG_ASSERT(gtid >= 0);
8811 __kmp_str_buf_init(&field);
8812 __kmp_str_buf_clear(buffer);
8814 th = __kmp_threads[gtid];
8820 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8821 parse_ptr = __kmp_affinity_format;
8823 KMP_DEBUG_ASSERT(parse_ptr);
8825 while (*parse_ptr !=
'\0') {
8827 if (*parse_ptr ==
'%') {
8829 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8830 __kmp_str_buf_catbuf(buffer, &field);
8834 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8839 __kmp_str_buf_free(&field);
8844void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8846 __kmp_str_buf_init(&buf);
8847 __kmp_aux_capture_affinity(gtid, format, &buf);
8848 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8849 __kmp_str_buf_free(&buf);
8853void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8854 int blocktime = arg;
8860 __kmp_save_internal_controls(thread);
8863 if (blocktime < KMP_MIN_BLOCKTIME)
8864 blocktime = KMP_MIN_BLOCKTIME;
8865 else if (blocktime > KMP_MAX_BLOCKTIME)
8866 blocktime = KMP_MAX_BLOCKTIME;
8868 set__blocktime_team(thread->th.th_team, tid, blocktime);
8869 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8873 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8875 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8876 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8882 set__bt_set_team(thread->th.th_team, tid, bt_set);
8883 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8885 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8886 "bt_intervals=%d, monitor_updates=%d\n",
8887 __kmp_gtid_from_tid(tid, thread->th.th_team),
8888 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8889 __kmp_monitor_wakeups));
8891 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8892 __kmp_gtid_from_tid(tid, thread->th.th_team),
8893 thread->th.th_team->t.t_id, tid, blocktime));
8897void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8898 if (!__kmp_init_serial) {
8899 __kmp_serial_initialize();
8901 __kmp_env_initialize(str);
8903 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8911PACKED_REDUCTION_METHOD_T
8912__kmp_determine_reduction_method(
8913 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8914 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8915 kmp_critical_name *lck) {
8926 PACKED_REDUCTION_METHOD_T retval;
8930 KMP_DEBUG_ASSERT(lck);
8932#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8934 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8935#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8937 retval = critical_reduce_block;
8940 team_size = __kmp_get_team_num_threads(global_tid);
8941 if (team_size == 1) {
8943 retval = empty_reduce_block;
8947 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8949#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8950 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8951 KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
8953#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8954 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \
8955 KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8957 int teamsize_cutoff = 4;
8959#if KMP_MIC_SUPPORTED
8960 if (__kmp_mic_type != non_mic) {
8961 teamsize_cutoff = 8;
8964 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8965 if (tree_available) {
8966 if (team_size <= teamsize_cutoff) {
8967 if (atomic_available) {
8968 retval = atomic_reduce_block;
8971 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8973 }
else if (atomic_available) {
8974 retval = atomic_reduce_block;
8977#error "Unknown or unsupported OS"
8982#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
8983 KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32
8985#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8986 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_SOLARIS || \
8987 KMP_OS_WASI || KMP_OS_AIX
8991 if (atomic_available) {
8992 if (num_vars <= 2) {
8993 retval = atomic_reduce_block;
8999 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
9000 if (atomic_available && (num_vars <= 3)) {
9001 retval = atomic_reduce_block;
9002 }
else if (tree_available) {
9003 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
9004 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
9005 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
9010#error "Unknown or unsupported OS"
9014#error "Unknown or unsupported architecture"
9022 if (__kmp_force_reduction_method != reduction_method_not_defined &&
9025 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
9027 int atomic_available, tree_available;
9029 switch ((forced_retval = __kmp_force_reduction_method)) {
9030 case critical_reduce_block:
9034 case atomic_reduce_block:
9035 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
9036 if (!atomic_available) {
9037 KMP_WARNING(RedMethodNotSupported,
"atomic");
9038 forced_retval = critical_reduce_block;
9042 case tree_reduce_block:
9043 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
9044 if (!tree_available) {
9045 KMP_WARNING(RedMethodNotSupported,
"tree");
9046 forced_retval = critical_reduce_block;
9048#if KMP_FAST_REDUCTION_BARRIER
9049 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
9058 retval = forced_retval;
9061 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
9063#undef FAST_REDUCTION_TREE_METHOD_GENERATED
9064#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
9069kmp_int32 __kmp_get_reduce_method(
void) {
9070 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
9075void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
9079void __kmp_hard_pause() {
9080 __kmp_pause_status = kmp_hard_paused;
9081 __kmp_internal_end_thread(-1);
9085void __kmp_resume_if_soft_paused() {
9086 if (__kmp_pause_status == kmp_soft_paused) {
9087 __kmp_pause_status = kmp_not_paused;
9089 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
9090 kmp_info_t *thread = __kmp_threads[gtid];
9092 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
9094 if (fl.is_sleeping())
9096 else if (__kmp_try_suspend_mx(thread)) {
9097 __kmp_unlock_suspend_mx(thread);
9100 if (fl.is_sleeping()) {
9103 }
else if (__kmp_try_suspend_mx(thread)) {
9104 __kmp_unlock_suspend_mx(thread);
9116int __kmp_pause_resource(kmp_pause_status_t level) {
9117 if (level == kmp_not_paused) {
9118 if (__kmp_pause_status == kmp_not_paused) {
9122 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9123 __kmp_pause_status == kmp_hard_paused);
9124 __kmp_pause_status = kmp_not_paused;
9127 }
else if (level == kmp_soft_paused) {
9128 if (__kmp_pause_status != kmp_not_paused) {
9135 }
else if (level == kmp_hard_paused) {
9136 if (__kmp_pause_status != kmp_not_paused) {
9149void __kmp_omp_display_env(
int verbose) {
9150 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9151 if (__kmp_init_serial == 0)
9152 __kmp_do_serial_initialize();
9153 __kmp_display_env_impl(!verbose, verbose);
9154 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9158void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9160 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9162 kmp_info_t **other_threads = team->t.t_threads;
9166 for (
int f = 1; f < old_nthreads; ++f) {
9167 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9169 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9175 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9176 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9180 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9182 team->t.t_threads[f]->th.th_used_in_team.store(2);
9183 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9186 team->t.b->go_release();
9192 int count = old_nthreads - 1;
9194 count = old_nthreads - 1;
9195 for (
int f = 1; f < old_nthreads; ++f) {
9196 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9197 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9198 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9199 void *, other_threads[f]->th.th_sleep_loc);
9200 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9203 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9209 team->t.b->update_num_threads(new_nthreads);
9210 team->t.b->go_reset();
9213void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9215 KMP_DEBUG_ASSERT(team);
9221 for (
int f = 1; f < new_nthreads; ++f) {
9222 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9223 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9225 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9226 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9227 (kmp_flag_32<false, false> *)NULL);
9233 int count = new_nthreads - 1;
9235 count = new_nthreads - 1;
9236 for (
int f = 1; f < new_nthreads; ++f) {
9237 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9245kmp_info_t **__kmp_hidden_helper_threads;
9246kmp_info_t *__kmp_hidden_helper_main_thread;
9247std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9249kmp_int32 __kmp_hidden_helper_threads_num = 8;
9250kmp_int32 __kmp_enable_hidden_helper = TRUE;
9252kmp_int32 __kmp_hidden_helper_threads_num = 0;
9253kmp_int32 __kmp_enable_hidden_helper = FALSE;
9257std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9259void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9264 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9265 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9266 __kmp_hidden_helper_threads_num)
9272 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9273 __kmp_hidden_helper_initz_release();
9274 __kmp_hidden_helper_main_thread_wait();
9276 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9277 __kmp_hidden_helper_worker_thread_signal();
9283void __kmp_hidden_helper_threads_initz_routine() {
9285 const int gtid = __kmp_register_root(TRUE);
9286 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9287 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9288 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9289 __kmp_hidden_helper_threads_num;
9291 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9296 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9298 __kmp_hidden_helper_threads_deinitz_release();
9318void __kmp_init_nesting_mode() {
9319 int levels = KMP_HW_LAST;
9320 __kmp_nesting_mode_nlevels = levels;
9321 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9322 for (
int i = 0; i < levels; ++i)
9323 __kmp_nesting_nth_level[i] = 0;
9324 if (__kmp_nested_nth.size < levels) {
9325 __kmp_nested_nth.nth =
9326 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9327 __kmp_nested_nth.size = levels;
9332void __kmp_set_nesting_mode_threads() {
9333 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9335 if (__kmp_nesting_mode == 1)
9336 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9337 else if (__kmp_nesting_mode > 1)
9338 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9340 if (__kmp_topology) {
9342 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9343 loc < __kmp_nesting_mode_nlevels;
9344 loc++, hw_level++) {
9345 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9346 if (__kmp_nesting_nth_level[loc] == 1)
9350 if (__kmp_nesting_mode > 1 && loc > 1) {
9351 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9352 int num_cores = __kmp_topology->get_count(core_level);
9353 int upper_levels = 1;
9354 for (
int level = 0; level < loc - 1; ++level)
9355 upper_levels *= __kmp_nesting_nth_level[level];
9356 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9357 __kmp_nesting_nth_level[loc - 1] =
9358 num_cores / __kmp_nesting_nth_level[loc - 2];
9360 __kmp_nesting_mode_nlevels = loc;
9361 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9363 if (__kmp_avail_proc >= 4) {
9364 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9365 __kmp_nesting_nth_level[1] = 2;
9366 __kmp_nesting_mode_nlevels = 2;
9368 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9369 __kmp_nesting_mode_nlevels = 1;
9371 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9373 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9374 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9376 set__nproc(thread, __kmp_nesting_nth_level[0]);
9377 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9378 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9379 if (get__max_active_levels(thread) > 1) {
9381 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9383 if (__kmp_nesting_mode == 1)
9384 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9389#if !KMP_STATS_ENABLED
9390void __kmp_reset_stats() {}
9393int __kmp_omp_debug_struct_info = FALSE;
9394int __kmp_debugging = FALSE;
9396#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9397void __kmp_itt_fini_ittlib() {}
9398void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)