iris: Enable threaded shader compilation
There are a couple minor things that can be improved: 1. Eliminate (or reduce) the dynamic allocation of the threaded_compile_job. 2. For apps like shader-db, improve the case where nr_threads=0. Right now this adds thread switching and mutex overhead. 3. Other performance improvements? iris_uncompiled_shader::variants has some special properties that make it ripe for replacement with a lockless list. Without gathering some data, it's hard to guess what impact that could have. v2: Fix whitespace and formatting issues. Noticed by Ken. s/threaded_compile_job/iris_threaded_compile_job/g. Suggested by Ken. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11229>
This commit is contained in:
parent
9011cc7405
commit
42c34e1ac8
|
@ -44,6 +44,9 @@ iris_set_debug_callback(struct pipe_context *ctx,
|
|||
const struct pipe_debug_callback *cb)
|
||||
{
|
||||
struct iris_context *ice = (struct iris_context *)ctx;
|
||||
struct iris_screen *screen = (struct iris_screen *)ctx->screen;
|
||||
|
||||
util_queue_finish(&screen->shader_compiler_queue);
|
||||
|
||||
if (cb)
|
||||
ice->dbg = *cb;
|
||||
|
|
|
@ -412,6 +412,9 @@ struct iris_uncompiled_shader {
|
|||
|
||||
/** Lock for the variants list */
|
||||
simple_mtx_t lock;
|
||||
|
||||
/** For parallel shader compiles */
|
||||
struct util_queue_fence ready;
|
||||
};
|
||||
|
||||
enum iris_surface_group {
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "util/u_atomic.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/u_async_debug.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "compiler/nir/nir_serialize.h"
|
||||
|
@ -54,6 +55,14 @@
|
|||
.base.tex.compressed_multisample_layout_mask = ~0, \
|
||||
.base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
|
||||
|
||||
struct iris_threaded_compile_job {
|
||||
struct iris_screen *screen;
|
||||
struct u_upload_mgr *uploader;
|
||||
struct pipe_debug_callback *dbg;
|
||||
struct iris_uncompiled_shader *ish;
|
||||
struct iris_compiled_shader *shader;
|
||||
};
|
||||
|
||||
static unsigned
|
||||
get_new_program_id(struct iris_screen *screen)
|
||||
{
|
||||
|
@ -1174,6 +1183,42 @@ find_or_add_variant(const struct iris_screen *screen,
|
|||
return variant;
|
||||
}
|
||||
|
||||
static void
|
||||
iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
|
||||
UNUSED int thread_index)
|
||||
{
|
||||
free(_job);
|
||||
}
|
||||
|
||||
static void
|
||||
iris_schedule_compile(struct iris_screen *screen,
|
||||
struct util_queue_fence *ready_fence,
|
||||
struct pipe_debug_callback *dbg,
|
||||
struct iris_threaded_compile_job *job,
|
||||
util_queue_execute_func execute)
|
||||
|
||||
{
|
||||
util_queue_fence_init(ready_fence);
|
||||
|
||||
struct util_async_debug_callback async_debug;
|
||||
|
||||
if (dbg) {
|
||||
u_async_debug_init(&async_debug);
|
||||
job->dbg = &async_debug.base;
|
||||
}
|
||||
|
||||
util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
|
||||
iris_threaded_compile_job_delete, 0);
|
||||
|
||||
if (screen->driconf.sync_compile || dbg)
|
||||
util_queue_fence_wait(ready_fence);
|
||||
|
||||
if (dbg) {
|
||||
u_async_debug_drain(&async_debug, dbg);
|
||||
u_async_debug_cleanup(&async_debug);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compile a vertex shader, and upload the assembly.
|
||||
*/
|
||||
|
@ -2457,12 +2502,17 @@ iris_create_compute_state(struct pipe_context *ctx,
|
|||
}
|
||||
|
||||
static void
|
||||
iris_compile_shader(struct iris_screen *screen,
|
||||
struct u_upload_mgr *uploader,
|
||||
struct pipe_debug_callback *dbg,
|
||||
struct iris_uncompiled_shader *ish,
|
||||
struct iris_compiled_shader *shader)
|
||||
iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
|
||||
{
|
||||
const struct iris_threaded_compile_job *job =
|
||||
(struct iris_threaded_compile_job *) _job;
|
||||
|
||||
struct iris_screen *screen = job->screen;
|
||||
struct u_upload_mgr *uploader = job->uploader;
|
||||
struct pipe_debug_callback *dbg = job->dbg;
|
||||
struct iris_uncompiled_shader *ish = job->ish;
|
||||
struct iris_compiled_shader *shader = job->shader;
|
||||
|
||||
switch (ish->nir->info.stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
iris_compile_vs(screen, uploader, dbg, ish, shader);
|
||||
|
@ -2615,7 +2665,17 @@ iris_create_shader_state(struct pipe_context *ctx,
|
|||
|
||||
if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
|
||||
&key, key_size)) {
|
||||
iris_compile_shader(screen, uploader, &ice->dbg, ish, shader);
|
||||
assert(!util_queue_fence_is_signalled(&shader->ready));
|
||||
|
||||
struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
|
||||
|
||||
job->screen = screen;
|
||||
job->uploader = uploader;
|
||||
job->ish = ish;
|
||||
job->shader = shader;
|
||||
|
||||
iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
|
||||
iris_compile_shader);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2643,6 +2703,7 @@ iris_destroy_shader_state(struct pipe_context *ctx, void *state)
|
|||
}
|
||||
|
||||
simple_mtx_destroy(&ish->lock);
|
||||
util_queue_fence_destroy(&ish->ready);
|
||||
|
||||
ralloc_free(ish->nir);
|
||||
free(ish);
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/u_transfer_helper.h"
|
||||
|
@ -609,6 +610,7 @@ void
|
|||
iris_screen_destroy(struct iris_screen *screen)
|
||||
{
|
||||
iris_destroy_screen_measure(screen);
|
||||
util_queue_destroy(&screen->shader_compiler_queue);
|
||||
glsl_type_singleton_decref();
|
||||
iris_bo_unreference(screen->workaround_bo);
|
||||
u_transfer_helper_destroy(screen->base.transfer_helper);
|
||||
|
@ -649,6 +651,38 @@ iris_get_disk_shader_cache(struct pipe_screen *pscreen)
|
|||
return screen->disk_cache;
|
||||
}
|
||||
|
||||
static void
|
||||
iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
|
||||
unsigned max_threads)
|
||||
{
|
||||
struct iris_screen *screen = (struct iris_screen *) pscreen;
|
||||
util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads);
|
||||
}
|
||||
|
||||
static bool
|
||||
iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
|
||||
void *v_shader,
|
||||
enum pipe_shader_type p_stage)
|
||||
{
|
||||
struct iris_screen *screen = (struct iris_screen *) pscreen;
|
||||
|
||||
/* Threaded compilation is only used for the precompile. If precompile is
|
||||
* disabled, threaded compilation is "done."
|
||||
*/
|
||||
if (!screen->precompile)
|
||||
return true;
|
||||
|
||||
struct iris_uncompiled_shader *ish = v_shader;
|
||||
|
||||
/* When precompile is enabled, the first entry is the precompile variant.
|
||||
* Check the ready fence of the precompile variant.
|
||||
*/
|
||||
struct iris_compiled_shader *first =
|
||||
list_first_entry(&ish->variants, struct iris_compiled_shader, link);
|
||||
|
||||
return util_queue_fence_is_signalled(&first->ready);
|
||||
}
|
||||
|
||||
static int
|
||||
iris_getparam(int fd, int param, int *value)
|
||||
{
|
||||
|
@ -869,10 +903,36 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
|
|||
pscreen->query_memory_info = iris_query_memory_info;
|
||||
pscreen->get_driver_query_group_info = iris_get_monitor_group_info;
|
||||
pscreen->get_driver_query_info = iris_get_monitor_info;
|
||||
pscreen->is_parallel_shader_compilation_finished = iris_is_parallel_shader_compilation_finished;
|
||||
pscreen->set_max_shader_compiler_threads = iris_set_max_shader_compiler_threads;
|
||||
|
||||
genX_call(&screen->devinfo, init_screen_state, screen);
|
||||
|
||||
glsl_type_singleton_init_or_ref();
|
||||
|
||||
/* FINISHME: Big core vs little core (for CPUs that have both kinds of
|
||||
* cores) and, possibly, thread vs core should be considered here too.
|
||||
*/
|
||||
unsigned compiler_threads = 1;
|
||||
const struct util_cpu_caps_t *caps = util_get_cpu_caps();
|
||||
unsigned hw_threads = caps->nr_cpus;
|
||||
|
||||
if (hw_threads >= 12) {
|
||||
compiler_threads = hw_threads * 3 / 4;
|
||||
} else if (hw_threads >= 6) {
|
||||
compiler_threads = hw_threads - 2;
|
||||
} else if (hw_threads >= 2) {
|
||||
compiler_threads = hw_threads - 1;
|
||||
}
|
||||
|
||||
if (!util_queue_init(&screen->shader_compiler_queue,
|
||||
"sh", 64, compiler_threads,
|
||||
UTIL_QUEUE_INIT_RESIZE_IF_FULL |
|
||||
UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY,
|
||||
NULL)) {
|
||||
iris_screen_destroy(screen);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return pscreen;
|
||||
}
|
||||
|
|
|
@ -220,6 +220,8 @@ struct iris_screen {
|
|||
struct iris_bo *workaround_bo;
|
||||
struct iris_address workaround_address;
|
||||
|
||||
struct util_queue shader_compiler_queue;
|
||||
|
||||
struct disk_cache *disk_cache;
|
||||
|
||||
struct intel_measure_device measure;
|
||||
|
|
Loading…
Reference in New Issue