radv/gfx10: add Wave32 support for vertex, tessellation and geometry shaders
It can be enabled with RADV_PERFTEST=gewave32. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
parent
953bbacc23
commit
8a86908e9a
|
@ -66,6 +66,7 @@ enum {
|
||||||
RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
|
RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
|
||||||
RADV_PERFTEST_CS_WAVE_32 = 0x100,
|
RADV_PERFTEST_CS_WAVE_32 = 0x100,
|
||||||
RADV_PERFTEST_PS_WAVE_32 = 0x200,
|
RADV_PERFTEST_PS_WAVE_32 = 0x200,
|
||||||
|
RADV_PERFTEST_GE_WAVE_32 = 0x400,
|
||||||
};
|
};
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|
|
@ -386,6 +386,7 @@ radv_physical_device_init(struct radv_physical_device *device,
|
||||||
/* Determine the number of threads per wave for all stages. */
|
/* Determine the number of threads per wave for all stages. */
|
||||||
device->cs_wave_size = 64;
|
device->cs_wave_size = 64;
|
||||||
device->ps_wave_size = 64;
|
device->ps_wave_size = 64;
|
||||||
|
device->ge_wave_size = 64;
|
||||||
|
|
||||||
if (device->rad_info.chip_class >= GFX10) {
|
if (device->rad_info.chip_class >= GFX10) {
|
||||||
if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
|
if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
|
||||||
|
@ -394,6 +395,9 @@ radv_physical_device_init(struct radv_physical_device *device,
|
||||||
/* For pixel shaders, wave64 is recommanded. */
|
/* For pixel shaders, wave64 is recommanded. */
|
||||||
if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
|
if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
|
||||||
device->ps_wave_size = 32;
|
device->ps_wave_size = 32;
|
||||||
|
|
||||||
|
if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
|
||||||
|
device->ge_wave_size = 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
radv_physical_device_init_mem_types(device);
|
radv_physical_device_init_mem_types(device);
|
||||||
|
@ -509,6 +513,7 @@ static const struct debug_control radv_perftest_options[] = {
|
||||||
{"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
|
{"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
|
||||||
{"cswave32", RADV_PERFTEST_CS_WAVE_32},
|
{"cswave32", RADV_PERFTEST_CS_WAVE_32},
|
||||||
{"pswave32", RADV_PERFTEST_PS_WAVE_32},
|
{"pswave32", RADV_PERFTEST_PS_WAVE_32},
|
||||||
|
{"gewave32", RADV_PERFTEST_GE_WAVE_32},
|
||||||
{NULL, 0}
|
{NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -295,7 +295,7 @@ get_tcs_num_patches(struct radv_shader_context *ctx)
|
||||||
|
|
||||||
/* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
|
/* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
|
||||||
if (ctx->options->chip_class == GFX6) {
|
if (ctx->options->chip_class == GFX6) {
|
||||||
unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
|
unsigned one_wave = ctx->options->ge_wave_size / MAX2(num_tcs_input_cp, num_tcs_output_cp);
|
||||||
num_patches = MIN2(num_patches, one_wave);
|
num_patches = MIN2(num_patches, one_wave);
|
||||||
}
|
}
|
||||||
return num_patches;
|
return num_patches;
|
||||||
|
@ -3038,7 +3038,8 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
|
||||||
LLVMValueRef wave_idx = ac_unpack_param(&ctx->ac, ctx->merged_wave_info, 24, 4);
|
LLVMValueRef wave_idx = ac_unpack_param(&ctx->ac, ctx->merged_wave_info, 24, 4);
|
||||||
vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
|
vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
|
||||||
LLVMBuildMul(ctx->ac.builder, wave_idx,
|
LLVMBuildMul(ctx->ac.builder, wave_idx,
|
||||||
LLVMConstInt(ctx->ac.i32, 64, false), ""), "");
|
LLVMConstInt(ctx->ac.i32,
|
||||||
|
ctx->ac.wave_size, false), ""), "");
|
||||||
lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
|
lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
|
||||||
LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), "");
|
LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), "");
|
||||||
}
|
}
|
||||||
|
@ -3140,7 +3141,7 @@ static LLVMValueRef get_thread_id_in_tg(struct radv_shader_context *ctx)
|
||||||
LLVMBuilderRef builder = ctx->ac.builder;
|
LLVMBuilderRef builder = ctx->ac.builder;
|
||||||
LLVMValueRef tmp;
|
LLVMValueRef tmp;
|
||||||
tmp = LLVMBuildMul(builder, get_wave_id_in_tg(ctx),
|
tmp = LLVMBuildMul(builder, get_wave_id_in_tg(ctx),
|
||||||
LLVMConstInt(ctx->ac.i32, 64, false), "");
|
LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false), "");
|
||||||
return LLVMBuildAdd(builder, tmp, ac_get_thread_id(&ctx->ac), "");
|
return LLVMBuildAdd(builder, tmp, ac_get_thread_id(&ctx->ac), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4190,7 +4191,7 @@ ac_setup_rings(struct radv_shader_context *ctx)
|
||||||
*/
|
*/
|
||||||
LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);
|
LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);
|
||||||
uint64_t stream_offset = 0;
|
uint64_t stream_offset = 0;
|
||||||
unsigned num_records = 64;
|
unsigned num_records = ctx->ac.wave_size;
|
||||||
LLVMValueRef base_ring;
|
LLVMValueRef base_ring;
|
||||||
|
|
||||||
base_ring =
|
base_ring =
|
||||||
|
@ -4223,7 +4224,7 @@ ac_setup_rings(struct radv_shader_context *ctx)
|
||||||
ring = LLVMBuildInsertElement(ctx->ac.builder,
|
ring = LLVMBuildInsertElement(ctx->ac.builder,
|
||||||
ring, tmp, ctx->ac.i32_0, "");
|
ring, tmp, ctx->ac.i32_0, "");
|
||||||
|
|
||||||
stream_offset += stride * 64;
|
stream_offset += stride * ctx->ac.wave_size;
|
||||||
|
|
||||||
ring = LLVMBuildBitCast(ctx->ac.builder, ring,
|
ring = LLVMBuildBitCast(ctx->ac.builder, ring,
|
||||||
ctx->ac.v4i32, "");
|
ctx->ac.v4i32, "");
|
||||||
|
@ -4325,7 +4326,7 @@ radv_nir_shader_wave_size(struct nir_shader *const *shaders, int shader_count,
|
||||||
return options->cs_wave_size;
|
return options->cs_wave_size;
|
||||||
else if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT)
|
else if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT)
|
||||||
return options->ps_wave_size;
|
return options->ps_wave_size;
|
||||||
return 64;
|
return options->ge_wave_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
|
|
|
@ -1782,7 +1782,7 @@ calculate_ngg_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
|
||||||
|
|
||||||
/* Round up towards full wave sizes for better ALU utilization. */
|
/* Round up towards full wave sizes for better ALU utilization. */
|
||||||
if (!max_vert_out_per_gs_instance) {
|
if (!max_vert_out_per_gs_instance) {
|
||||||
const unsigned wavesize = 64;
|
const unsigned wavesize = pipeline->device->physical_device->ge_wave_size;
|
||||||
unsigned orig_max_esverts;
|
unsigned orig_max_esverts;
|
||||||
unsigned orig_max_gsprims;
|
unsigned orig_max_gsprims;
|
||||||
do {
|
do {
|
||||||
|
@ -4125,6 +4125,14 @@ radv_compute_vgt_shader_stages_en(const struct radv_pipeline *pipeline)
|
||||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
|
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
|
||||||
stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
|
stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
|
||||||
|
|
||||||
|
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
|
||||||
|
pipeline->device->physical_device->ge_wave_size == 32) {
|
||||||
|
/* legacy GS only supports Wave64 */
|
||||||
|
stages |= S_028B54_HS_W32_EN(1) |
|
||||||
|
S_028B54_GS_W32_EN(radv_pipeline_has_ngg(pipeline)) |
|
||||||
|
S_028B54_VS_W32_EN(1);
|
||||||
|
}
|
||||||
|
|
||||||
return stages;
|
return stages;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -304,6 +304,7 @@ struct radv_physical_device {
|
||||||
/* Number of threads per wave. */
|
/* Number of threads per wave. */
|
||||||
uint8_t ps_wave_size;
|
uint8_t ps_wave_size;
|
||||||
uint8_t cs_wave_size;
|
uint8_t cs_wave_size;
|
||||||
|
uint8_t ge_wave_size;
|
||||||
|
|
||||||
/* This is the drivers on-disk cache used as a fallback as opposed to
|
/* This is the drivers on-disk cache used as a fallback as opposed to
|
||||||
* the pipeline cache defined by apps.
|
* the pipeline cache defined by apps.
|
||||||
|
|
|
@ -675,7 +675,7 @@ radv_get_shader_wave_size(const struct radv_physical_device *pdevice,
|
||||||
return pdevice->cs_wave_size;
|
return pdevice->cs_wave_size;
|
||||||
else if (stage == MESA_SHADER_FRAGMENT)
|
else if (stage == MESA_SHADER_FRAGMENT)
|
||||||
return pdevice->ps_wave_size;
|
return pdevice->ps_wave_size;
|
||||||
return 64;
|
return pdevice->ge_wave_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void radv_postprocess_config(const struct radv_physical_device *pdevice,
|
static void radv_postprocess_config(const struct radv_physical_device *pdevice,
|
||||||
|
@ -1144,6 +1144,7 @@ shader_variant_compile(struct radv_device *device,
|
||||||
options->address32_hi = device->physical_device->rad_info.address32_hi;
|
options->address32_hi = device->physical_device->rad_info.address32_hi;
|
||||||
options->cs_wave_size = device->physical_device->cs_wave_size;
|
options->cs_wave_size = device->physical_device->cs_wave_size;
|
||||||
options->ps_wave_size = device->physical_device->ps_wave_size;
|
options->ps_wave_size = device->physical_device->ps_wave_size;
|
||||||
|
options->ge_wave_size = device->physical_device->ge_wave_size;
|
||||||
|
|
||||||
if (options->supports_spill)
|
if (options->supports_spill)
|
||||||
tm_options |= AC_TM_SUPPORTS_SPILL;
|
tm_options |= AC_TM_SUPPORTS_SPILL;
|
||||||
|
|
|
@ -130,6 +130,7 @@ struct radv_nir_compiler_options {
|
||||||
uint32_t address32_hi;
|
uint32_t address32_hi;
|
||||||
uint8_t cs_wave_size;
|
uint8_t cs_wave_size;
|
||||||
uint8_t ps_wave_size;
|
uint8_t ps_wave_size;
|
||||||
|
uint8_t ge_wave_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum radv_ud_index {
|
enum radv_ud_index {
|
||||||
|
|
Loading…
Reference in New Issue