mirror of https://gitlab.freedesktop.org/mesa/mesa
nvk: Use the NAK helpers to fill QMDs
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28756>
This commit is contained in:
parent
9ace83603d
commit
16f2249a80
|
@ -26,26 +26,6 @@
|
|||
#include "nvk_clc597.h"
|
||||
#include "nvk_clc6c0.h"
|
||||
|
||||
#include "drf.h"
|
||||
#include "cla0c0qmd.h"
|
||||
#include "clc0c0qmd.h"
|
||||
#include "clc3c0qmd.h"
|
||||
#include "clc6c0qmd.h"
|
||||
|
||||
#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)
|
||||
#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)
|
||||
#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a)
|
||||
#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a)
|
||||
#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)
|
||||
#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)
|
||||
#define NVC6C0_QMDV03_00_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC6C0, QMDV03_00, ##a)
|
||||
#define NVC6C0_QMDV03_00_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC6C0, QMDV03_00, ##a)
|
||||
|
||||
#define QMD_DEF_SET(qmd, class_id, version_major, version_minor, a...) \
|
||||
NVDEF_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a)
|
||||
#define QMD_VAL_SET(qmd, class_id, version_major, version_minor, a...) \
|
||||
NVVAL_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a)
|
||||
|
||||
VkResult
|
||||
nvk_push_dispatch_state_init(struct nvk_device *dev, struct nv_push *p)
|
||||
{
|
||||
|
@ -104,202 +84,6 @@ nvk_cmd_invalidate_compute_state(struct nvk_cmd_buffer *cmd)
|
|||
memset(&cmd->state.cs, 0, sizeof(cmd->state.cs));
|
||||
}
|
||||
|
||||
static int
|
||||
gv100_sm_config_smem_size(uint32_t size)
|
||||
{
|
||||
if (size > 64 * 1024) size = 96 * 1024;
|
||||
else if (size > 32 * 1024) size = 64 * 1024;
|
||||
else if (size > 16 * 1024) size = 32 * 1024;
|
||||
else if (size > 8 * 1024) size = 16 * 1024;
|
||||
else size = 8 * 1024;
|
||||
return (size / 4096) + 1;
|
||||
}
|
||||
|
||||
#define nvk_qmd_init_base(qmd, shader, class_id, version_major, version_minor) \
|
||||
do { \
|
||||
QMD_DEF_SET(qmd, class_id, version_major, version_minor, API_VISIBLE_CALL_LIMIT, NO_CHECK); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, BARRIER_COUNT, shader->info.num_barriers); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION0, \
|
||||
shader->info.cs.local_size[0]); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION1, \
|
||||
shader->info.cs.local_size[1]); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION2, \
|
||||
shader->info.cs.local_size[2]); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_MAJOR_VERSION, version_major); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_VERSION, version_minor); \
|
||||
QMD_DEF_SET(qmd, class_id, version_major, version_minor, SAMPLER_INDEX, INDEPENDENTLY); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_LOW_SIZE, \
|
||||
align(shader->info.slm_size, 0x10)); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHARED_MEMORY_SIZE, \
|
||||
align(shader->info.cs.smem_size, 0x100)); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
nva0c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader)
|
||||
{
|
||||
nvk_qmd_init_base(qmd, shader, A0C0, 00, 06);
|
||||
|
||||
if (shader->info.cs.smem_size <= (16 << 10))
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB);
|
||||
else if (shader->info.cs.smem_size <= (32 << 10))
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB);
|
||||
else if (shader->info.cs.smem_size <= (48 << 10))
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
|
||||
else
|
||||
unreachable("Invalid shared memory size");
|
||||
|
||||
uint64_t addr = shader->hdr_addr;
|
||||
assert(addr < 0xffffffff);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, addr);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader)
|
||||
{
|
||||
nvk_qmd_init_base(qmd, shader, C0C0, 02, 01);
|
||||
|
||||
uint64_t addr = shader->hdr_addr;
|
||||
assert(addr < 0xffffffff);
|
||||
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, addr);
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc3c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader)
|
||||
{
|
||||
nvk_qmd_init_base(qmd, shader, C3C0, 02, 02);
|
||||
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
|
||||
/* those are all QMD 2.2+ */
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
|
||||
gv100_sm_config_smem_size(shader->info.cs.smem_size));
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
|
||||
gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE));
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
|
||||
gv100_sm_config_smem_size(shader->info.cs.smem_size));
|
||||
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs);
|
||||
|
||||
uint64_t addr = shader->hdr_addr;
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff);
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc6c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader)
|
||||
{
|
||||
nvk_qmd_init_base(qmd, shader, C6C0, 03, 00);
|
||||
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
|
||||
/* those are all QMD 2.2+ */
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
|
||||
gv100_sm_config_smem_size(shader->info.cs.smem_size));
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
|
||||
gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE));
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
|
||||
gv100_sm_config_smem_size(shader->info.cs.smem_size));
|
||||
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs);
|
||||
|
||||
uint64_t addr = shader->hdr_addr;
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff);
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32);
|
||||
}
|
||||
|
||||
static void
|
||||
nvk_qmd_init(struct nvk_physical_device *pdev,
|
||||
uint32_t *qmd, const struct nvk_shader *shader)
|
||||
{
|
||||
if (pdev->info.cls_compute >= AMPERE_COMPUTE_A)
|
||||
nvc6c0_qmd_init(qmd, shader);
|
||||
else if (pdev->info.cls_compute >= VOLTA_COMPUTE_A)
|
||||
nvc3c0_qmd_init(qmd, shader);
|
||||
else if (pdev->info.cls_compute >= PASCAL_COMPUTE_A)
|
||||
nvc0c0_qmd_init(qmd, shader);
|
||||
else if (pdev->info.cls_compute >= KEPLER_COMPUTE_A)
|
||||
nva0c0_qmd_init(qmd, shader);
|
||||
else
|
||||
unreachable("Unknown GPU generation");
|
||||
}
|
||||
|
||||
static void
|
||||
nva0c0_qmd_set_dispatch_size(UNUSED struct nvk_device *dev, uint32_t *qmd,
|
||||
uint32_t x, uint32_t y, uint32_t z)
|
||||
{
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_WIDTH, x);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_HEIGHT, y);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_DEPTH, z);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0c0_qmd_set_dispatch_size(UNUSED struct nvk_device *dev, uint32_t *qmd,
|
||||
uint32_t x, uint32_t y, uint32_t z)
|
||||
{
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_WIDTH, x);
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_HEIGHT, y);
|
||||
/* this field is different from older QMD versions */
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_DEPTH, z);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc6c0_qmd_set_dispatch_size(UNUSED struct nvk_device *dev, uint32_t *qmd,
|
||||
uint32_t x, uint32_t y, uint32_t z)
|
||||
{
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, CTA_RASTER_WIDTH, x);
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, CTA_RASTER_HEIGHT, y);
|
||||
/* this field is different from older QMD versions */
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, CTA_RASTER_DEPTH, z);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
qmd_dispatch_size_offset(const struct nv_device_info *devinfo)
|
||||
{
|
||||
assert(devinfo->cls_compute >= VOLTA_COMPUTE_A);
|
||||
uint32_t bit = DRF_LO(DRF_MW(NVC3C0_QMDV02_02_CTA_RASTER_WIDTH));
|
||||
assert(bit % 32 == 0);
|
||||
assert(DRF_LO(DRF_MW(NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT)) == bit + 32);
|
||||
assert(DRF_LO(DRF_MW(NVC3C0_QMDV02_02_CTA_RASTER_DEPTH)) == bit + 64);
|
||||
return bit / 8;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nva0c0_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,
|
||||
uint32_t size, uint64_t address)
|
||||
{
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_SIZE, index, size);
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nvc0c0_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,
|
||||
uint32_t size, uint64_t address)
|
||||
{
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index,
|
||||
DIV_ROUND_UP(size, 16));
|
||||
NVC0C0_QMDV02_01_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nvc6c0_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,
|
||||
uint32_t size, uint64_t address)
|
||||
{
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
|
||||
NVC6C0_QMDV03_00_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index,
|
||||
DIV_ROUND_UP(size, 16));
|
||||
NVC6C0_QMDV03_00_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
nvk_cmd_bind_compute_shader(struct nvk_cmd_buffer *cmd,
|
||||
struct nvk_shader *shader)
|
||||
|
@ -350,28 +134,18 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
|
|||
desc->root.root_desc_addr = root_desc_addr;
|
||||
memcpy(root_desc_map, &desc->root, sizeof(desc->root));
|
||||
|
||||
uint32_t qmd[64];
|
||||
memset(qmd, 0, sizeof(qmd));
|
||||
nvk_qmd_init(pdev, qmd, shader);
|
||||
|
||||
if (nvk_cmd_buffer_compute_cls(cmd) >= AMPERE_COMPUTE_A) {
|
||||
nvc6c0_qmd_set_dispatch_size(nvk_cmd_buffer_device(cmd), qmd,
|
||||
desc->root.cs.group_count[0],
|
||||
desc->root.cs.group_count[1],
|
||||
desc->root.cs.group_count[2]);
|
||||
} else if (nvk_cmd_buffer_compute_cls(cmd) >= PASCAL_COMPUTE_A) {
|
||||
nvc0c0_qmd_set_dispatch_size(nvk_cmd_buffer_device(cmd), qmd,
|
||||
desc->root.cs.group_count[0],
|
||||
desc->root.cs.group_count[1],
|
||||
desc->root.cs.group_count[2]);
|
||||
} else {
|
||||
assert(nvk_cmd_buffer_compute_cls(cmd) >= KEPLER_COMPUTE_A);
|
||||
nva0c0_qmd_set_dispatch_size(nvk_cmd_buffer_device(cmd), qmd,
|
||||
desc->root.cs.group_count[0],
|
||||
desc->root.cs.group_count[1],
|
||||
desc->root.cs.group_count[2]);
|
||||
}
|
||||
struct nak_qmd_info qmd_info = {
|
||||
.addr = shader->hdr_addr,
|
||||
.smem_size = shader->info.cs.smem_size,
|
||||
.smem_max = NVK_MAX_SHARED_SIZE,
|
||||
.global_size = {
|
||||
desc->root.cs.group_count[0],
|
||||
desc->root.cs.group_count[1],
|
||||
desc->root.cs.group_count[2],
|
||||
},
|
||||
};
|
||||
|
||||
assert(shader->cbuf_map.cbuf_count <= ARRAY_SIZE(qmd_info.cbufs));
|
||||
for (uint32_t c = 0; c < shader->cbuf_map.cbuf_count; c++) {
|
||||
const struct nvk_cbuf *cbuf = &shader->cbuf_map.cbufs[c];
|
||||
|
||||
|
@ -392,17 +166,17 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
|
|||
ba.size = align(ba.size, min_cbuf_alignment);
|
||||
ba.size = MIN2(ba.size, NVK_MAX_CBUF_SIZE);
|
||||
|
||||
if (nvk_cmd_buffer_compute_cls(cmd) >= AMPERE_COMPUTE_A) {
|
||||
nvc6c0_cp_launch_desc_set_cb(qmd, c, ba.size, ba.base_addr);
|
||||
} else if (nvk_cmd_buffer_compute_cls(cmd) >= PASCAL_COMPUTE_A) {
|
||||
nvc0c0_cp_launch_desc_set_cb(qmd, c, ba.size, ba.base_addr);
|
||||
} else {
|
||||
assert(nvk_cmd_buffer_compute_cls(cmd) >= KEPLER_COMPUTE_A);
|
||||
nva0c0_cp_launch_desc_set_cb(qmd, c, ba.size, ba.base_addr);
|
||||
}
|
||||
qmd_info.cbufs[qmd_info.num_cbufs++] = (struct nak_qmd_cbuf) {
|
||||
.index = c,
|
||||
.addr = ba.base_addr,
|
||||
.size = ba.size,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t qmd[64];
|
||||
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd, sizeof(qmd));
|
||||
|
||||
uint64_t qmd_addr;
|
||||
result = nvk_cmd_buffer_upload_data(cmd, qmd, sizeof(qmd), 0x100, &qmd_addr);
|
||||
if (unlikely(result != VK_SUCCESS)) {
|
||||
|
@ -536,7 +310,7 @@ nvk_mme_dispatch_indirect(struct mme_builder *b)
|
|||
|
||||
mme_tu104_read_fifoed(b, dispatch_addr, mme_imm(3));
|
||||
|
||||
uint32_t qmd_size_offset = qmd_dispatch_size_offset(b->devinfo);
|
||||
uint32_t qmd_size_offset = nak_qmd_dispatch_size_offset(b->devinfo);
|
||||
uint32_t root_desc_size_offset =
|
||||
offsetof(struct nvk_root_descriptor_table, cs.group_count);
|
||||
|
||||
|
|
Loading…
Reference in New Issue