radv: allocate shaders to 32-bit address to skip PGM_HI

This reduces the number of emitted registers.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12466>
This commit is contained in:
Samuel Pitoiset 2021-08-19 09:04:46 +02:00 committed by Marge Bot
parent 2dc90ca8a4
commit e0353296da
3 changed files with 29 additions and 22 deletions

View File

@ -4420,9 +4420,7 @@ radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs, const struct radv_pipelin
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
uint32_t rsrc2 = shader->config.rsrc2;
radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
rsrc2 |= S_00B52C_LDS_SIZE(num_lds_blocks);
if (pipeline->device->physical_device->rad_info.chip_class == GFX7 &&
@ -4447,9 +4445,8 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
: pipeline->shaders[MESA_SHADER_VERTEX];
const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
radeon_emit(cs, shader->config.rsrc1);
radeon_emit(cs, shader->config.rsrc2);
@ -4592,13 +4589,9 @@ radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs, const struct radv_pipelin
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
} else {
radeon_set_sh_reg_seq(cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B414_MEM_BASE(va >> 40));
radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
}
radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2);
@ -4793,13 +4786,9 @@ radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
} else {
radeon_set_sh_reg_seq(cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B214_MEM_BASE(va >> 40));
radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
}
radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
@ -5576,9 +5565,7 @@ radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs, const struct radv_pipelin
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
struct radv_device *device = pipeline->device;
radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B834_DATA(va >> 40));
radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8);
radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
radeon_emit(cs, shader->config.rsrc1);

View File

@ -1059,7 +1059,7 @@ radv_alloc_shader_memory(struct radv_device *device, struct radv_shader_variant
slab->size = MAX2(256 * 1024, shader->code_size);
VkResult result = device->ws->buffer_create(
device->ws, slab->size, 256, RADEON_DOMAIN_VRAM,
RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT |
(device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0
: RADEON_FLAG_READ_ONLY),
RADV_BO_PRIORITY_SHADER, 0, &slab->bo);

View File

@ -79,6 +79,9 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI,
S_00B834_DATA(device->physical_device->rad_info.address32_hi >> 8));
radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
* renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
@ -291,6 +294,23 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
}
if (device->physical_device->rad_info.chip_class >= GFX10) {
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS,
S_00B524_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES,
S_00B324_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
} else if (device->physical_device->rad_info.chip_class == GFX9) {
radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS,
S_00B414_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES,
S_00B214_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
} else {
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS,
S_00B524_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES,
S_00B324_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
}
unsigned cu_mask_ps = 0xffffffff;
/* It's wasteful to enable all CUs for PS if shader arrays have a