From 25ffb8401638a07d774cfc68ab6afc7d27780dd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 5 Sep 2018 23:13:56 -0400 Subject: [PATCH] radeonsi: pin the winsys thread to the requested L3 cache (v2) v2: rebase Reviewed-by: Brian Paul --- src/gallium/drivers/radeon/radeon_winsys.h | 8 ++++++++ src/gallium/drivers/radeonsi/si_pipe.c | 15 +++++++++++++++ src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 10 ++++++++++ src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 13 +++++++++++++ 4 files changed, 46 insertions(+) diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 99a793f9028..bb732ab314b 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -257,6 +257,14 @@ struct radeon_winsys { void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info); + /** + * A hint for the winsys that it should pin its execution threads to + * a group of cores sharing a specific L3 cache if the CPU has multiple + * L3 caches. This is needed for good multithreading performance on + * AMD Zen CPUs. + */ + void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache); + /************************************************************************** * Buffer management. Buffer attributes are mostly fixed over its lifetime. * diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index c259c260550..a5088adcf24 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -346,6 +346,20 @@ static void si_set_log_context(struct pipe_context *ctx, u_log_add_auto_logger(log, si_auto_log_cs, sctx); } +static void si_set_context_param(struct pipe_context *ctx, + enum pipe_context_param param, + unsigned value) +{ + struct radeon_winsys *ws = ((struct si_context *)ctx)->ws; + + switch (param) { + case PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE: + ws->pin_threads_to_L3_cache(ws, value); + break; + default:; + } +} + static struct pipe_context *si_create_context(struct pipe_screen *screen, unsigned flags) { @@ -366,6 +380,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->b.emit_string_marker = si_emit_string_marker; sctx->b.set_debug_callback = si_set_debug_callback; sctx->b.set_log_context = si_set_log_context; + sctx->b.set_context_param = si_set_context_param; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index dcbc075e3c5..f32bbd9d086 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -30,6 +30,7 @@ #include "amdgpu_cs.h" #include "amdgpu_public.h" +#include "util/u_cpu_detect.h" #include "util/u_hash_table.h" #include "util/hash_table.h" #include "util/xmlconfig.h" @@ -235,6 +236,14 @@ static const char* amdgpu_get_chip_name(struct radeon_winsys *ws) return amdgpu_get_marketing_name(dev); } +static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws, + unsigned cache) +{ + struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws; + + util_pin_thread_to_L3(ws->cs_queue.threads[0], cache, + util_cpu_caps.cores_per_L3); +} PUBLIC struct radeon_winsys * amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, @@ -314,6 +323,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, ws->base.query_value = amdgpu_query_value; ws->base.read_registers = amdgpu_read_registers; ws->base.get_chip_name = amdgpu_get_chip_name; + ws->base.pin_threads_to_L3_cache = amdgpu_pin_threads_to_L3_cache; amdgpu_bo_init_functions(ws); amdgpu_cs_init_functions(ws); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index f8702e7c601..19472a50ce1 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -29,6 +29,7 @@ #include "radeon_drm_cs.h" #include "radeon_drm_public.h" +#include "util/u_cpu_detect.h" #include "util/u_memory.h" #include "util/u_hash_table.h" @@ -797,6 +798,17 @@ static int handle_compare(void *key1, void *key2) return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); } +static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws, + unsigned cache) +{ + struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws; + + if (util_queue_is_initialized(&rws->cs_queue)) { + util_pin_thread_to_L3(rws->cs_queue.threads[0], cache, + util_cpu_caps.cores_per_L3); + } +} + PUBLIC struct radeon_winsys * radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config, radeon_screen_create_t screen_create) @@ -864,6 +876,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config, ws->base.unref = radeon_winsys_unref; ws->base.destroy = radeon_winsys_destroy; ws->base.query_info = radeon_query_info; + ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache; ws->base.cs_request_feature = radeon_cs_request_feature; ws->base.query_value = radeon_query_value; ws->base.read_registers = radeon_read_registers;