radeonsi: use a compiler queue with a low priority for optimized shaders
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
89b6c93ae3
commit
86cc809726
|
@ -742,11 +742,16 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
|
|||
return;
|
||||
|
||||
util_queue_destroy(&sscreen->shader_compiler_queue);
|
||||
util_queue_destroy(&sscreen->shader_compiler_queue_low_priority);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sscreen->tm); i++)
|
||||
if (sscreen->tm[i])
|
||||
LLVMDisposeTargetMachine(sscreen->tm[i]);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sscreen->tm_low_priority); i++)
|
||||
if (sscreen->tm_low_priority[i])
|
||||
LLVMDisposeTargetMachine(sscreen->tm_low_priority[i]);
|
||||
|
||||
/* Free shader parts. */
|
||||
for (i = 0; i < ARRAY_SIZE(parts); i++) {
|
||||
while (parts[i]) {
|
||||
|
@ -860,7 +865,7 @@ static void si_test_vmfault(struct si_screen *sscreen)
|
|||
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
|
||||
{
|
||||
struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
|
||||
unsigned num_cpus, num_compiler_threads, i;
|
||||
unsigned num_threads, num_compiler_threads, num_compiler_threads_lowprio, i;
|
||||
|
||||
if (!sscreen) {
|
||||
return NULL;
|
||||
|
@ -885,9 +890,11 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
|
|||
/* Only enable as many threads as we have target machines, but at most
|
||||
* the number of CPUs - 1 if there is more than one.
|
||||
*/
|
||||
num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
num_cpus = MAX2(1, num_cpus - 1);
|
||||
num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm));
|
||||
num_threads = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
num_threads = MAX2(1, num_threads - 1);
|
||||
num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->tm));
|
||||
num_compiler_threads_lowprio =
|
||||
MIN2(num_threads, ARRAY_SIZE(sscreen->tm_low_priority));
|
||||
|
||||
if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
|
||||
32, num_compiler_threads, 0)) {
|
||||
|
@ -896,6 +903,20 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* The queue must be large enough so that adding optimized shaders
|
||||
* doesn't stall draw calls when the queue is full. Especially varying
|
||||
* packing generates a very high volume of optimized shader compilation
|
||||
* jobs.
|
||||
*/
|
||||
if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority,
|
||||
"si_shader_low",
|
||||
1024, num_compiler_threads,
|
||||
UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) {
|
||||
si_destroy_shader_cache(sscreen);
|
||||
FREE(sscreen);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
si_handle_env_var_force_family(sscreen);
|
||||
|
||||
if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
|
||||
|
@ -959,6 +980,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
|
|||
|
||||
for (i = 0; i < num_compiler_threads; i++)
|
||||
sscreen->tm[i] = si_create_llvm_target_machine(sscreen);
|
||||
for (i = 0; i < num_compiler_threads_lowprio; i++)
|
||||
sscreen->tm_low_priority[i] = si_create_llvm_target_machine(sscreen);
|
||||
|
||||
/* Create the auxiliary context. This must be done last. */
|
||||
sscreen->b.aux_context = si_create_context(&sscreen->b.b, 0);
|
||||
|
|
|
@ -110,6 +110,9 @@ struct si_screen {
|
|||
/* Shader compiler queue for multithreaded compilation. */
|
||||
struct util_queue shader_compiler_queue;
|
||||
LLVMTargetMachineRef tm[4]; /* used by the queue only */
|
||||
|
||||
struct util_queue shader_compiler_queue_low_priority;
|
||||
LLVMTargetMachineRef tm_low_priority[4];
|
||||
};
|
||||
|
||||
struct si_blend_color {
|
||||
|
|
|
@ -1450,8 +1450,8 @@ static void si_build_shader_variant(void *job, int thread_index)
|
|||
int r;
|
||||
|
||||
if (thread_index >= 0) {
|
||||
assert(thread_index < ARRAY_SIZE(sscreen->tm));
|
||||
tm = sscreen->tm[thread_index];
|
||||
assert(thread_index < ARRAY_SIZE(sscreen->tm_low_priority));
|
||||
tm = sscreen->tm_low_priority[thread_index];
|
||||
if (!debug->async)
|
||||
debug = NULL;
|
||||
} else {
|
||||
|
@ -1679,7 +1679,7 @@ again:
|
|||
!is_pure_monolithic &&
|
||||
thread_index < 0) {
|
||||
/* Compile it asynchronously. */
|
||||
util_queue_add_job(&sscreen->shader_compiler_queue,
|
||||
util_queue_add_job(&sscreen->shader_compiler_queue_low_priority,
|
||||
shader, &shader->optimized_ready,
|
||||
si_build_shader_variant, NULL);
|
||||
|
||||
|
@ -2258,7 +2258,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
|
|||
static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
|
||||
{
|
||||
if (shader->is_optimized) {
|
||||
util_queue_drop_job(&sctx->screen->shader_compiler_queue,
|
||||
util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority,
|
||||
&shader->optimized_ready);
|
||||
util_queue_fence_destroy(&shader->optimized_ready);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue