From 16f2249a8042e48ca6dbad3687108d767a626df2 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 15 Apr 2024 22:32:06 -0500 Subject: [PATCH] nvk: Use the NAK helpers to fill QMDs Part-of: --- src/nouveau/vulkan/nvk_cmd_dispatch.c | 266 ++------------------------ 1 file changed, 20 insertions(+), 246 deletions(-) diff --git a/src/nouveau/vulkan/nvk_cmd_dispatch.c b/src/nouveau/vulkan/nvk_cmd_dispatch.c index 6842f8b366b1a..1614cc40694de 100644 --- a/src/nouveau/vulkan/nvk_cmd_dispatch.c +++ b/src/nouveau/vulkan/nvk_cmd_dispatch.c @@ -26,26 +26,6 @@ #include "nvk_clc597.h" #include "nvk_clc6c0.h" -#include "drf.h" -#include "cla0c0qmd.h" -#include "clc0c0qmd.h" -#include "clc3c0qmd.h" -#include "clc6c0qmd.h" - -#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a) -#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a) -#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a) -#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a) -#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a) -#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a) -#define NVC6C0_QMDV03_00_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC6C0, QMDV03_00, ##a) -#define NVC6C0_QMDV03_00_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC6C0, QMDV03_00, ##a) - -#define QMD_DEF_SET(qmd, class_id, version_major, version_minor, a...) \ - NVDEF_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a) -#define QMD_VAL_SET(qmd, class_id, version_major, version_minor, a...) \ - NVVAL_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a) - VkResult nvk_push_dispatch_state_init(struct nvk_device *dev, struct nv_push *p) { @@ -104,202 +84,6 @@ nvk_cmd_invalidate_compute_state(struct nvk_cmd_buffer *cmd) memset(&cmd->state.cs, 0, sizeof(cmd->state.cs)); } -static int -gv100_sm_config_smem_size(uint32_t size) -{ - if (size > 64 * 1024) size = 96 * 1024; - else if (size > 32 * 1024) size = 64 * 1024; - else if (size > 16 * 1024) size = 32 * 1024; - else if (size > 8 * 1024) size = 16 * 1024; - else size = 8 * 1024; - return (size / 4096) + 1; -} - -#define nvk_qmd_init_base(qmd, shader, class_id, version_major, version_minor) \ -do { \ - QMD_DEF_SET(qmd, class_id, version_major, version_minor, API_VISIBLE_CALL_LIMIT, NO_CHECK); \ - QMD_VAL_SET(qmd, class_id, version_major, version_minor, BARRIER_COUNT, shader->info.num_barriers); \ - QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION0, \ - shader->info.cs.local_size[0]); \ - QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION1, \ - shader->info.cs.local_size[1]); \ - QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION2, \ - shader->info.cs.local_size[2]); \ - QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_MAJOR_VERSION, version_major); \ - QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_VERSION, version_minor); \ - QMD_DEF_SET(qmd, class_id, version_major, version_minor, SAMPLER_INDEX, INDEPENDENTLY); \ - QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); \ - QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_LOW_SIZE, \ - align(shader->info.slm_size, 0x10)); \ - QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHARED_MEMORY_SIZE, \ - align(shader->info.cs.smem_size, 0x100)); \ -} while (0) - -static void -nva0c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader) -{ - nvk_qmd_init_base(qmd, shader, A0C0, 00, 06); - - if (shader->info.cs.smem_size <= (16 << 10)) - NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB); - else if (shader->info.cs.smem_size <= (32 << 10)) - NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB); - else if (shader->info.cs.smem_size <= (48 << 10)) - NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB); - else - unreachable("Invalid shared memory size"); - - uint64_t addr = shader->hdr_addr; - assert(addr < 0xffffffff); - NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, addr); - NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs); - NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30); -} - -static void -nvc0c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader) -{ - nvk_qmd_init_base(qmd, shader, C0C0, 02, 01); - - uint64_t addr = shader->hdr_addr; - assert(addr < 0xffffffff); - - NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); - NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, addr); - NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs); -} - -static void -nvc3c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader) -{ - nvk_qmd_init_base(qmd, shader, C3C0, 02, 02); - - NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); - /* those are all QMD 2.2+ */ - NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, - gv100_sm_config_smem_size(shader->info.cs.smem_size)); - NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, - gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE)); - NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, - gv100_sm_config_smem_size(shader->info.cs.smem_size)); - - NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs); - - uint64_t addr = shader->hdr_addr; - NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff); - NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32); -} - -static void -nvc6c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader) -{ - nvk_qmd_init_base(qmd, shader, C6C0, 03, 00); - - NVC6C0_QMDV03_00_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); - /* those are all QMD 2.2+ */ - NVC6C0_QMDV03_00_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, - gv100_sm_config_smem_size(shader->info.cs.smem_size)); - NVC6C0_QMDV03_00_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, - gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE)); - NVC6C0_QMDV03_00_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, - gv100_sm_config_smem_size(shader->info.cs.smem_size)); - - NVC6C0_QMDV03_00_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs); - - uint64_t addr = shader->hdr_addr; - NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff); - NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32); -} - -static void -nvk_qmd_init(struct nvk_physical_device *pdev, - uint32_t *qmd, const struct nvk_shader *shader) -{ - if (pdev->info.cls_compute >= AMPERE_COMPUTE_A) - nvc6c0_qmd_init(qmd, shader); - else if (pdev->info.cls_compute >= VOLTA_COMPUTE_A) - nvc3c0_qmd_init(qmd, shader); - else if (pdev->info.cls_compute >= PASCAL_COMPUTE_A) - nvc0c0_qmd_init(qmd, shader); - else if (pdev->info.cls_compute >= KEPLER_COMPUTE_A) - nva0c0_qmd_init(qmd, shader); - else - unreachable("Unknown GPU generation"); -} - -static void -nva0c0_qmd_set_dispatch_size(UNUSED struct nvk_device *dev, uint32_t *qmd, - uint32_t x, uint32_t y, uint32_t z) -{ - NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_WIDTH, x); - NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_HEIGHT, y); - NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_DEPTH, z); -} - -static void -nvc0c0_qmd_set_dispatch_size(UNUSED struct nvk_device *dev, uint32_t *qmd, - uint32_t x, uint32_t y, uint32_t z) -{ - NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_WIDTH, x); - NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_HEIGHT, y); - /* this field is different from older QMD versions */ - NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_DEPTH, z); -} - -static void -nvc6c0_qmd_set_dispatch_size(UNUSED struct nvk_device *dev, uint32_t *qmd, - uint32_t x, uint32_t y, uint32_t z) -{ - NVC6C0_QMDV03_00_VAL_SET(qmd, CTA_RASTER_WIDTH, x); - NVC6C0_QMDV03_00_VAL_SET(qmd, CTA_RASTER_HEIGHT, y); - /* this field is different from older QMD versions */ - NVC6C0_QMDV03_00_VAL_SET(qmd, CTA_RASTER_DEPTH, z); -} - -static uint32_t -qmd_dispatch_size_offset(const struct nv_device_info *devinfo) -{ - assert(devinfo->cls_compute >= VOLTA_COMPUTE_A); - uint32_t bit = DRF_LO(DRF_MW(NVC3C0_QMDV02_02_CTA_RASTER_WIDTH)); - assert(bit % 32 == 0); - assert(DRF_LO(DRF_MW(NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT)) == bit + 32); - assert(DRF_LO(DRF_MW(NVC3C0_QMDV02_02_CTA_RASTER_DEPTH)) == bit + 64); - return bit / 8; -} - -static inline void -nva0c0_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, - uint32_t size, uint64_t address) -{ - NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address); - NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32); - NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_SIZE, index, size); - NVA0C0_QMDV00_06_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE); -} - -static inline void -nvc0c0_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, - uint32_t size, uint64_t address) -{ - NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address); - NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32); - NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index, - DIV_ROUND_UP(size, 16)); - NVC0C0_QMDV02_01_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE); -} - -static inline void -nvc6c0_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, - uint32_t size, uint64_t address) -{ - NVC6C0_QMDV03_00_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address); - NVC6C0_QMDV03_00_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32); - NVC6C0_QMDV03_00_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index, - DIV_ROUND_UP(size, 16)); - NVC6C0_QMDV03_00_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE); -} - - void nvk_cmd_bind_compute_shader(struct nvk_cmd_buffer *cmd, struct nvk_shader *shader) @@ -350,28 +134,18 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd, desc->root.root_desc_addr = root_desc_addr; memcpy(root_desc_map, &desc->root, sizeof(desc->root)); - uint32_t qmd[64]; - memset(qmd, 0, sizeof(qmd)); - nvk_qmd_init(pdev, qmd, shader); - - if (nvk_cmd_buffer_compute_cls(cmd) >= AMPERE_COMPUTE_A) { - nvc6c0_qmd_set_dispatch_size(nvk_cmd_buffer_device(cmd), qmd, - desc->root.cs.group_count[0], - desc->root.cs.group_count[1], - desc->root.cs.group_count[2]); - } else if (nvk_cmd_buffer_compute_cls(cmd) >= PASCAL_COMPUTE_A) { - nvc0c0_qmd_set_dispatch_size(nvk_cmd_buffer_device(cmd), qmd, - desc->root.cs.group_count[0], - desc->root.cs.group_count[1], - desc->root.cs.group_count[2]); - } else { - assert(nvk_cmd_buffer_compute_cls(cmd) >= KEPLER_COMPUTE_A); - nva0c0_qmd_set_dispatch_size(nvk_cmd_buffer_device(cmd), qmd, - desc->root.cs.group_count[0], - desc->root.cs.group_count[1], - desc->root.cs.group_count[2]); - } + struct nak_qmd_info qmd_info = { + .addr = shader->hdr_addr, + .smem_size = shader->info.cs.smem_size, + .smem_max = NVK_MAX_SHARED_SIZE, + .global_size = { + desc->root.cs.group_count[0], + desc->root.cs.group_count[1], + desc->root.cs.group_count[2], + }, + }; + assert(shader->cbuf_map.cbuf_count <= ARRAY_SIZE(qmd_info.cbufs)); for (uint32_t c = 0; c < shader->cbuf_map.cbuf_count; c++) { const struct nvk_cbuf *cbuf = &shader->cbuf_map.cbufs[c]; @@ -392,17 +166,17 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd, ba.size = align(ba.size, min_cbuf_alignment); ba.size = MIN2(ba.size, NVK_MAX_CBUF_SIZE); - if (nvk_cmd_buffer_compute_cls(cmd) >= AMPERE_COMPUTE_A) { - nvc6c0_cp_launch_desc_set_cb(qmd, c, ba.size, ba.base_addr); - } else if (nvk_cmd_buffer_compute_cls(cmd) >= PASCAL_COMPUTE_A) { - nvc0c0_cp_launch_desc_set_cb(qmd, c, ba.size, ba.base_addr); - } else { - assert(nvk_cmd_buffer_compute_cls(cmd) >= KEPLER_COMPUTE_A); - nva0c0_cp_launch_desc_set_cb(qmd, c, ba.size, ba.base_addr); - } + qmd_info.cbufs[qmd_info.num_cbufs++] = (struct nak_qmd_cbuf) { + .index = c, + .addr = ba.base_addr, + .size = ba.size, + }; } } + uint32_t qmd[64]; + nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd, sizeof(qmd)); + uint64_t qmd_addr; result = nvk_cmd_buffer_upload_data(cmd, qmd, sizeof(qmd), 0x100, &qmd_addr); if (unlikely(result != VK_SUCCESS)) { @@ -536,7 +310,7 @@ nvk_mme_dispatch_indirect(struct mme_builder *b) mme_tu104_read_fifoed(b, dispatch_addr, mme_imm(3)); - uint32_t qmd_size_offset = qmd_dispatch_size_offset(b->devinfo); + uint32_t qmd_size_offset = nak_qmd_dispatch_size_offset(b->devinfo); uint32_t root_desc_size_offset = offsetof(struct nvk_root_descriptor_table, cs.group_count);