radeonsi: pin the winsys thread to the requested L3 cache (v2)

v2: rebase

Reviewed-by: Brian Paul <brianp@vmware.com>
This commit is contained in:
Marek Olšák 2018-09-05 23:13:56 -04:00
parent 8016639f63
commit 25ffb84016
4 changed files with 46 additions and 0 deletions

View File

@ -257,6 +257,14 @@ struct radeon_winsys {
void (*query_info)(struct radeon_winsys *ws,
struct radeon_info *info);
/**
* A hint for the winsys that it should pin its execution threads to
* a group of cores sharing a specific L3 cache if the CPU has multiple
* L3 caches. This is needed for good multithreading performance on
* AMD Zen CPUs.
*/
void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache);
/**************************************************************************
* Buffer management. Buffer attributes are mostly fixed over its lifetime.
*

View File

@ -346,6 +346,20 @@ static void si_set_log_context(struct pipe_context *ctx,
u_log_add_auto_logger(log, si_auto_log_cs, sctx);
}
static void si_set_context_param(struct pipe_context *ctx,
enum pipe_context_param param,
unsigned value)
{
struct radeon_winsys *ws = ((struct si_context *)ctx)->ws;
switch (param) {
case PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE:
ws->pin_threads_to_L3_cache(ws, value);
break;
default:;
}
}
static struct pipe_context *si_create_context(struct pipe_screen *screen,
unsigned flags)
{
@ -366,6 +380,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
sctx->b.emit_string_marker = si_emit_string_marker;
sctx->b.set_debug_callback = si_set_debug_callback;
sctx->b.set_log_context = si_set_log_context;
sctx->b.set_context_param = si_set_context_param;
sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;

View File

@ -30,6 +30,7 @@
#include "amdgpu_cs.h"
#include "amdgpu_public.h"
#include "util/u_cpu_detect.h"
#include "util/u_hash_table.h"
#include "util/hash_table.h"
#include "util/xmlconfig.h"
@ -235,6 +236,14 @@ static const char* amdgpu_get_chip_name(struct radeon_winsys *ws)
return amdgpu_get_marketing_name(dev);
}
static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws,
unsigned cache)
{
struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
util_pin_thread_to_L3(ws->cs_queue.threads[0], cache,
util_cpu_caps.cores_per_L3);
}
PUBLIC struct radeon_winsys *
amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
@ -314,6 +323,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
ws->base.query_value = amdgpu_query_value;
ws->base.read_registers = amdgpu_read_registers;
ws->base.get_chip_name = amdgpu_get_chip_name;
ws->base.pin_threads_to_L3_cache = amdgpu_pin_threads_to_L3_cache;
amdgpu_bo_init_functions(ws);
amdgpu_cs_init_functions(ws);

View File

@ -29,6 +29,7 @@
#include "radeon_drm_cs.h"
#include "radeon_drm_public.h"
#include "util/u_cpu_detect.h"
#include "util/u_memory.h"
#include "util/u_hash_table.h"
@ -797,6 +798,17 @@ static int handle_compare(void *key1, void *key2)
return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
}
static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws,
unsigned cache)
{
struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws;
if (util_queue_is_initialized(&rws->cs_queue)) {
util_pin_thread_to_L3(rws->cs_queue.threads[0], cache,
util_cpu_caps.cores_per_L3);
}
}
PUBLIC struct radeon_winsys *
radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
radeon_screen_create_t screen_create)
@ -864,6 +876,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
ws->base.unref = radeon_winsys_unref;
ws->base.destroy = radeon_winsys_destroy;
ws->base.query_info = radeon_query_info;
ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache;
ws->base.cs_request_feature = radeon_cs_request_feature;
ws->base.query_value = radeon_query_value;
ws->base.read_registers = radeon_read_registers;