mirror of https://gitlab.freedesktop.org/mesa/mesa
92 lines
2.6 KiB
C
92 lines
2.6 KiB
C
/*
|
|
* Copyright 2023 Advanced Micro Devices, Inc.
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "thread_sched.h"
|
|
#include "u_cpu_detect.h"
|
|
#include "u_debug.h"
|
|
|
|
DEBUG_GET_ONCE_BOOL_OPTION(pin_threads, "mesa_pin_threads", false)
|
|
|
|
bool
|
|
util_thread_scheduler_enabled(void)
|
|
{
|
|
#if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
|
|
return util_get_cpu_caps()->num_L3_caches > 1 ||
|
|
debug_get_option_pin_threads();
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
void
|
|
util_thread_scheduler_init_state(unsigned *state)
|
|
{
|
|
*state = UINT32_MAX;
|
|
|
|
util_thread_sched_apply_policy(thrd_current(), UTIL_THREAD_APP_CALLER, 0,
|
|
NULL); /* keep as NULL */
|
|
}
|
|
|
|
/**
|
|
* Apply the optimal thread scheduling policy for the given thread.
|
|
*
|
|
* "name" determines which thread the policy is being applied to.
|
|
*
|
|
* "app_thread_cpu" is the CPU where the application thread currently
|
|
* resides.
|
|
*
|
|
* "sched_state" is a per-gl_context state that this function uses to track
|
|
* what happened in previous invocations.
|
|
*/
|
|
bool
|
|
util_thread_sched_apply_policy(thrd_t thread, enum util_thread_name name,
|
|
unsigned app_thread_cpu, unsigned *sched_state)
|
|
{
|
|
#if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
|
|
if (debug_get_option_pin_threads()) {
|
|
/* Pin threads to a specific CPU. This is done only once. *sched_state
|
|
* is true if this is the first time we are doing it.
|
|
*/
|
|
if (sched_state && !*sched_state)
|
|
return false;
|
|
|
|
/* Each thread is assigned to a different CPU. */
|
|
unsigned mask = BITFIELD_BIT(name);
|
|
if (sched_state)
|
|
*sched_state = 0;
|
|
return util_set_thread_affinity(thread, &mask, NULL, 32);
|
|
}
|
|
|
|
/* Don't do anything for the app thread with the L3 chasing policy. */
|
|
if (name == UTIL_THREAD_APP_CALLER)
|
|
return false;
|
|
|
|
/* Move Mesa threads to the L3 core complex where the app thread
|
|
* resides. We call this "L3 chasing".
|
|
*
|
|
* This improves multithreading performance by up to 33% on Ryzen 3900X.
|
|
*/
|
|
const struct util_cpu_caps_t *caps = util_get_cpu_caps();
|
|
int L3_cache = caps->cpu_to_L3[app_thread_cpu];
|
|
|
|
/* Don't do anything if the app thread hasn't moved to a different
|
|
* core complex. (*sched_state contains the last set L3 index)
|
|
*/
|
|
if (L3_cache == U_CPU_INVALID_L3 ||
|
|
(sched_state && L3_cache == *sched_state))
|
|
return false;
|
|
|
|
/* Apply the policy. */
|
|
if (sched_state)
|
|
*sched_state = L3_cache;
|
|
|
|
return util_set_thread_affinity(thread, caps->L3_affinity_mask[L3_cache],
|
|
NULL, caps->num_cpu_mask_bits);
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|