amd: change chip_class naming to "enum amd_gfx_level gfx_level"
This aligns the naming with PAL. Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Acked-by: Pierre-Eric Pellou-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16469>
This commit is contained in:
parent
6dcf7f651f
commit
39800f0fa3
|
@ -92,7 +92,7 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wav
|
|||
break;
|
||||
case R_0286E8_SPI_TMPRING_SIZE:
|
||||
case R_00B860_COMPUTE_TMPRING_SIZE:
|
||||
if (info->chip_class >= GFX11)
|
||||
if (info->gfx_level >= GFX11)
|
||||
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 256;
|
||||
else
|
||||
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 1024;
|
||||
|
@ -126,7 +126,7 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wav
|
|||
*
|
||||
* For shader-db stats, set num_vgprs that the hw actually uses.
|
||||
*/
|
||||
if (info->chip_class == GFX10_3) {
|
||||
if (info->gfx_level == GFX10_3) {
|
||||
conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
|
||||
}
|
||||
|
||||
|
|
|
@ -67,7 +67,7 @@ struct ac_ib_parser {
|
|||
unsigned num_dw;
|
||||
const int *trace_ids;
|
||||
unsigned trace_id_count;
|
||||
enum chip_class chip_class;
|
||||
enum amd_gfx_level gfx_level;
|
||||
ac_debug_addr_callback addr_callback;
|
||||
void *addr_callback_data;
|
||||
|
||||
|
@ -109,12 +109,12 @@ static void print_named_value(FILE *file, const char *name, uint32_t value, int
|
|||
print_value(file, value, bits);
|
||||
}
|
||||
|
||||
static const struct si_reg *find_register(enum chip_class chip_class, unsigned offset)
|
||||
static const struct si_reg *find_register(enum amd_gfx_level gfx_level, unsigned offset)
|
||||
{
|
||||
const struct si_reg *table;
|
||||
unsigned table_size;
|
||||
|
||||
switch (chip_class) {
|
||||
switch (gfx_level) {
|
||||
case GFX11:
|
||||
table = gfx11_reg_table;
|
||||
table_size = ARRAY_SIZE(gfx11_reg_table);
|
||||
|
@ -154,17 +154,17 @@ static const struct si_reg *find_register(enum chip_class chip_class, unsigned o
|
|||
return NULL;
|
||||
}
|
||||
|
||||
const char *ac_get_register_name(enum chip_class chip_class, unsigned offset)
|
||||
const char *ac_get_register_name(enum amd_gfx_level gfx_level, unsigned offset)
|
||||
{
|
||||
const struct si_reg *reg = find_register(chip_class, offset);
|
||||
const struct si_reg *reg = find_register(gfx_level, offset);
|
||||
|
||||
return reg ? sid_strings + reg->name_offset : "(no name)";
|
||||
}
|
||||
|
||||
void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset, uint32_t value,
|
||||
void ac_dump_reg(FILE *file, enum amd_gfx_level gfx_level, unsigned offset, uint32_t value,
|
||||
uint32_t field_mask)
|
||||
{
|
||||
const struct si_reg *reg = find_register(chip_class, offset);
|
||||
const struct si_reg *reg = find_register(gfx_level, offset);
|
||||
|
||||
if (reg) {
|
||||
const char *reg_name = sid_strings + reg->name_offset;
|
||||
|
@ -252,7 +252,7 @@ static void ac_parse_set_reg_packet(FILE *f, unsigned count, unsigned reg_offset
|
|||
}
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
ac_dump_reg(f, ib->chip_class, reg + i * 4, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, reg + i * 4, ac_ib_get(ib), ~0);
|
||||
}
|
||||
|
||||
static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
||||
|
@ -297,30 +297,30 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
|||
ac_parse_set_reg_packet(f, count, SI_SH_REG_OFFSET, ib);
|
||||
break;
|
||||
case PKT3_ACQUIRE_MEM:
|
||||
ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
|
||||
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
|
||||
if (ib->chip_class >= GFX10)
|
||||
ac_dump_reg(f, ib->chip_class, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
|
||||
if (ib->gfx_level >= GFX10)
|
||||
ac_dump_reg(f, ib->gfx_level, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
|
||||
break;
|
||||
case PKT3_SURFACE_SYNC:
|
||||
if (ib->chip_class >= GFX7) {
|
||||
ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
|
||||
if (ib->gfx_level >= GFX7) {
|
||||
ac_dump_reg(f, ib->gfx_level, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
|
||||
} else {
|
||||
ac_dump_reg(f, ib->chip_class, R_0085F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_0085F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_0085F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0085F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0085F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0085F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
|
||||
}
|
||||
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
|
||||
break;
|
||||
case PKT3_EVENT_WRITE: {
|
||||
uint32_t event_dw = ac_ib_get(ib);
|
||||
ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
|
||||
ac_dump_reg(f, ib->gfx_level, R_028A90_VGT_EVENT_INITIATOR, event_dw,
|
||||
S_028A90_EVENT_TYPE(~0));
|
||||
print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
|
||||
print_named_value(f, "INV_L2", (event_dw >> 20) & 0x1, 1);
|
||||
|
@ -332,7 +332,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
|||
}
|
||||
case PKT3_EVENT_WRITE_EOP: {
|
||||
uint32_t event_dw = ac_ib_get(ib);
|
||||
ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
|
||||
ac_dump_reg(f, ib->gfx_level, R_028A90_VGT_EVENT_INITIATOR, event_dw,
|
||||
S_028A90_EVENT_TYPE(~0));
|
||||
print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
|
||||
print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1);
|
||||
|
@ -352,10 +352,10 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
|||
}
|
||||
case PKT3_RELEASE_MEM: {
|
||||
uint32_t event_dw = ac_ib_get(ib);
|
||||
if (ib->chip_class >= GFX10) {
|
||||
ac_dump_reg(f, ib->chip_class, R_490_RELEASE_MEM_OP, event_dw, ~0u);
|
||||
if (ib->gfx_level >= GFX10) {
|
||||
ac_dump_reg(f, ib->gfx_level, R_490_RELEASE_MEM_OP, event_dw, ~0u);
|
||||
} else {
|
||||
ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
|
||||
ac_dump_reg(f, ib->gfx_level, R_028A90_VGT_EVENT_INITIATOR, event_dw,
|
||||
S_028A90_EVENT_TYPE(~0));
|
||||
print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
|
||||
print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1);
|
||||
|
@ -387,52 +387,52 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
|||
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
|
||||
break;
|
||||
case PKT3_DRAW_INDEX_AUTO:
|
||||
ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
|
||||
break;
|
||||
case PKT3_DRAW_INDEX_2:
|
||||
ac_dump_reg(f, ib->chip_class, R_028A78_VGT_DMA_MAX_SIZE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_0287E8_VGT_DMA_BASE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_0287E4_VGT_DMA_BASE_HI, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_028A78_VGT_DMA_MAX_SIZE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0287E8_VGT_DMA_BASE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0287E4_VGT_DMA_BASE_HI, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
|
||||
break;
|
||||
case PKT3_INDEX_TYPE:
|
||||
ac_dump_reg(f, ib->chip_class, R_028A7C_VGT_DMA_INDEX_TYPE, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_028A7C_VGT_DMA_INDEX_TYPE, ac_ib_get(ib), ~0);
|
||||
break;
|
||||
case PKT3_NUM_INSTANCES:
|
||||
ac_dump_reg(f, ib->chip_class, R_030934_VGT_NUM_INSTANCES, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_030934_VGT_NUM_INSTANCES, ac_ib_get(ib), ~0);
|
||||
break;
|
||||
case PKT3_WRITE_DATA:
|
||||
ac_dump_reg(f, ib->chip_class, R_370_CONTROL, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_371_DST_ADDR_LO, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_372_DST_ADDR_HI, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_370_CONTROL, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_371_DST_ADDR_LO, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_372_DST_ADDR_HI, ac_ib_get(ib), ~0);
|
||||
/* The payload is written automatically */
|
||||
break;
|
||||
case PKT3_CP_DMA:
|
||||
ac_dump_reg(f, ib->chip_class, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_415_COMMAND, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_415_COMMAND, ac_ib_get(ib), ~0);
|
||||
break;
|
||||
case PKT3_DMA_DATA:
|
||||
ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_501_SRC_ADDR_LO, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->chip_class, R_415_COMMAND, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_501_SRC_ADDR_LO, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_415_COMMAND, ac_ib_get(ib), ~0);
|
||||
break;
|
||||
case PKT3_INDIRECT_BUFFER_SI:
|
||||
case PKT3_INDIRECT_BUFFER_CONST:
|
||||
case PKT3_INDIRECT_BUFFER_CIK: {
|
||||
uint32_t base_lo_dw = ac_ib_get(ib);
|
||||
ac_dump_reg(f, ib->chip_class, R_3F0_IB_BASE_LO, base_lo_dw, ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_3F0_IB_BASE_LO, base_lo_dw, ~0);
|
||||
uint32_t base_hi_dw = ac_ib_get(ib);
|
||||
ac_dump_reg(f, ib->chip_class, R_3F1_IB_BASE_HI, base_hi_dw, ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_3F1_IB_BASE_HI, base_hi_dw, ~0);
|
||||
uint32_t control_dw = ac_ib_get(ib);
|
||||
ac_dump_reg(f, ib->chip_class, R_3F2_IB_CONTROL, control_dw, ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, R_3F2_IB_CONTROL, control_dw, ~0);
|
||||
|
||||
if (!ib->addr_callback)
|
||||
break;
|
||||
|
@ -590,7 +590,7 @@ static void format_ib_output(FILE *f, char *out)
|
|||
* \param f file
|
||||
* \param ib_ptr IB
|
||||
* \param num_dw size of the IB
|
||||
* \param chip_class chip class
|
||||
* \param gfx_level gfx level
|
||||
* \param trace_ids the last trace IDs that are known to have been reached
|
||||
* and executed by the CP, typically read from a buffer
|
||||
* \param trace_id_count The number of entries in the trace_ids array.
|
||||
|
@ -599,7 +599,7 @@ static void format_ib_output(FILE *f, char *out)
|
|||
* \param addr_callback_data user data for addr_callback
|
||||
*/
|
||||
void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, const int *trace_ids,
|
||||
unsigned trace_id_count, enum chip_class chip_class,
|
||||
unsigned trace_id_count, enum amd_gfx_level gfx_level,
|
||||
ac_debug_addr_callback addr_callback, void *addr_callback_data)
|
||||
{
|
||||
struct ac_ib_parser ib = {0};
|
||||
|
@ -607,7 +607,7 @@ void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, const int *trace_i
|
|||
ib.num_dw = num_dw;
|
||||
ib.trace_ids = trace_ids;
|
||||
ib.trace_id_count = trace_id_count;
|
||||
ib.chip_class = chip_class;
|
||||
ib.gfx_level = gfx_level;
|
||||
ib.addr_callback = addr_callback;
|
||||
ib.addr_callback_data = addr_callback_data;
|
||||
|
||||
|
@ -637,7 +637,7 @@ void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, const int *trace_i
|
|||
* \param f file
|
||||
* \param ib IB
|
||||
* \param num_dw size of the IB
|
||||
* \param chip_class chip class
|
||||
* \param gfx_level gfx level
|
||||
* \param trace_ids the last trace IDs that are known to have been reached
|
||||
* and executed by the CP, typically read from a buffer
|
||||
* \param trace_id_count The number of entries in the trace_ids array.
|
||||
|
@ -646,12 +646,12 @@ void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, const int *trace_i
|
|||
* \param addr_callback_data user data for addr_callback
|
||||
*/
|
||||
void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, unsigned trace_id_count,
|
||||
const char *name, enum chip_class chip_class, ac_debug_addr_callback addr_callback,
|
||||
const char *name, enum amd_gfx_level gfx_level, ac_debug_addr_callback addr_callback,
|
||||
void *addr_callback_data)
|
||||
{
|
||||
fprintf(f, "------------------ %s begin ------------------\n", name);
|
||||
|
||||
ac_parse_ib_chunk(f, ib, num_dw, trace_ids, trace_id_count, chip_class, addr_callback,
|
||||
ac_parse_ib_chunk(f, ib, num_dw, trace_ids, trace_id_count, gfx_level, addr_callback,
|
||||
addr_callback_data);
|
||||
|
||||
fprintf(f, "------------------- %s end -------------------\n\n", name);
|
||||
|
@ -660,11 +660,11 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, unsign
|
|||
/**
|
||||
* Parse dmesg and return TRUE if a VM fault has been detected.
|
||||
*
|
||||
* \param chip_class chip class
|
||||
* \param gfx_level gfx level
|
||||
* \param old_dmesg_timestamp previous dmesg timestamp parsed at init time
|
||||
* \param out_addr detected VM fault addr
|
||||
*/
|
||||
bool ac_vm_fault_occured(enum chip_class chip_class, uint64_t *old_dmesg_timestamp,
|
||||
bool ac_vm_fault_occured(enum amd_gfx_level gfx_level, uint64_t *old_dmesg_timestamp,
|
||||
uint64_t *out_addr)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
|
@ -722,7 +722,7 @@ bool ac_vm_fault_occured(enum chip_class chip_class, uint64_t *old_dmesg_timesta
|
|||
|
||||
const char *header_line, *addr_line_prefix, *addr_line_format;
|
||||
|
||||
if (chip_class >= GFX9) {
|
||||
if (gfx_level >= GFX9) {
|
||||
/* Match this:
|
||||
* ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
|
||||
* ..: at page 0x0000000219f8f000 from 27
|
||||
|
@ -802,7 +802,7 @@ static int compare_wave(const void *p1, const void *p2)
|
|||
}
|
||||
|
||||
/* Return wave information. "waves" should be a large enough array. */
|
||||
unsigned ac_get_wave_info(enum chip_class chip_class,
|
||||
unsigned ac_get_wave_info(enum amd_gfx_level gfx_level,
|
||||
struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP])
|
||||
{
|
||||
#ifdef _WIN32
|
||||
|
@ -811,7 +811,7 @@ unsigned ac_get_wave_info(enum chip_class chip_class,
|
|||
char line[2000], cmd[128];
|
||||
unsigned num_waves = 0;
|
||||
|
||||
sprintf(cmd, "umr -O halt_waves -wa %s", chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
|
||||
sprintf(cmd, "umr -O halt_waves -wa %s", gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
|
||||
|
||||
FILE *p = popen(cmd, "r");
|
||||
if (!p)
|
||||
|
|
|
@ -56,20 +56,20 @@ struct ac_wave_info {
|
|||
|
||||
typedef void *(*ac_debug_addr_callback)(void *data, uint64_t addr);
|
||||
|
||||
const char *ac_get_register_name(enum chip_class chip_class, unsigned offset);
|
||||
void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset, uint32_t value,
|
||||
const char *ac_get_register_name(enum amd_gfx_level gfx_level, unsigned offset);
|
||||
void ac_dump_reg(FILE *file, enum amd_gfx_level gfx_level, unsigned offset, uint32_t value,
|
||||
uint32_t field_mask);
|
||||
void ac_parse_ib_chunk(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
|
||||
unsigned trace_id_count, enum chip_class chip_class,
|
||||
unsigned trace_id_count, enum amd_gfx_level gfx_level,
|
||||
ac_debug_addr_callback addr_callback, void *addr_callback_data);
|
||||
void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, unsigned trace_id_count,
|
||||
const char *name, enum chip_class chip_class, ac_debug_addr_callback addr_callback,
|
||||
const char *name, enum amd_gfx_level gfx_level, ac_debug_addr_callback addr_callback,
|
||||
void *addr_callback_data);
|
||||
|
||||
bool ac_vm_fault_occured(enum chip_class chip_class, uint64_t *old_dmesg_timestamp,
|
||||
bool ac_vm_fault_occured(enum amd_gfx_level gfx_level, uint64_t *old_dmesg_timestamp,
|
||||
uint64_t *out_addr);
|
||||
|
||||
unsigned ac_get_wave_info(enum chip_class chip_class,
|
||||
unsigned ac_get_wave_info(enum amd_gfx_level gfx_level,
|
||||
struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -318,7 +318,7 @@ has_tmz_support(amdgpu_device_handle dev,
|
|||
return false;
|
||||
|
||||
/* Find out ourselves if TMZ is enabled */
|
||||
if (info->chip_class < GFX9)
|
||||
if (info->gfx_level < GFX9)
|
||||
return false;
|
||||
|
||||
if (info->drm_minor < 36)
|
||||
|
@ -758,32 +758,32 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->lowercase_name[i] = tolower(info->name[i]);
|
||||
|
||||
if (info->family >= CHIP_GFX1100)
|
||||
info->chip_class = GFX11;
|
||||
info->gfx_level = GFX11;
|
||||
else if (info->family >= CHIP_SIENNA_CICHLID)
|
||||
info->chip_class = GFX10_3;
|
||||
info->gfx_level = GFX10_3;
|
||||
else if (info->family >= CHIP_NAVI10)
|
||||
info->chip_class = GFX10;
|
||||
info->gfx_level = GFX10;
|
||||
else if (info->family >= CHIP_VEGA10)
|
||||
info->chip_class = GFX9;
|
||||
info->gfx_level = GFX9;
|
||||
else if (info->family >= CHIP_TONGA)
|
||||
info->chip_class = GFX8;
|
||||
info->gfx_level = GFX8;
|
||||
else if (info->family >= CHIP_BONAIRE)
|
||||
info->chip_class = GFX7;
|
||||
info->gfx_level = GFX7;
|
||||
else if (info->family >= CHIP_TAHITI)
|
||||
info->chip_class = GFX6;
|
||||
info->gfx_level = GFX6;
|
||||
else {
|
||||
fprintf(stderr, "amdgpu: Unknown family.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Fix incorrect IP versions reported by the kernel. */
|
||||
if (info->chip_class == GFX10_3)
|
||||
if (info->gfx_level == GFX10_3)
|
||||
info->ip[AMD_IP_GFX].ver_minor = info->ip[AMD_IP_COMPUTE].ver_minor = 3;
|
||||
else if (info->chip_class == GFX10)
|
||||
else if (info->gfx_level == GFX10)
|
||||
info->ip[AMD_IP_GFX].ver_minor = info->ip[AMD_IP_COMPUTE].ver_minor = 1;
|
||||
|
||||
info->smart_access_memory = info->all_vram_visible &&
|
||||
info->chip_class >= GFX10_3 &&
|
||||
info->gfx_level >= GFX10_3 &&
|
||||
util_get_cpu_caps()->family >= CPU_AMD_ZEN3 &&
|
||||
util_get_cpu_caps()->family < CPU_AMD_LAST;
|
||||
|
||||
|
@ -809,7 +809,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->vram_bit_width = amdinfo->vram_bit_width;
|
||||
|
||||
/* Set which chips have uncached device memory. */
|
||||
info->has_l2_uncached = info->chip_class >= GFX9;
|
||||
info->has_l2_uncached = info->gfx_level >= GFX9;
|
||||
|
||||
/* Set hardware information. */
|
||||
/* convert the shader/memory clocks from KHz to MHz */
|
||||
|
@ -836,17 +836,17 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->si_TA_CS_BC_BASE_ADDR_allowed = true;
|
||||
info->has_bo_metadata = true;
|
||||
info->has_gpu_reset_status_query = true;
|
||||
info->has_eqaa_surface_allocator = info->chip_class < GFX11;
|
||||
info->has_eqaa_surface_allocator = info->gfx_level < GFX11;
|
||||
info->has_format_bc1_through_bc7 = true;
|
||||
/* DRM 3.1.0 doesn't flush TC for GFX8 correctly. */
|
||||
info->kernel_flushes_tc_l2_after_ib = info->chip_class != GFX8 || info->drm_minor >= 2;
|
||||
info->kernel_flushes_tc_l2_after_ib = info->gfx_level != GFX8 || info->drm_minor >= 2;
|
||||
info->has_indirect_compute_dispatch = true;
|
||||
/* GFX6 doesn't support unaligned loads. */
|
||||
info->has_unaligned_shader_loads = info->chip_class != GFX6;
|
||||
info->has_unaligned_shader_loads = info->gfx_level != GFX6;
|
||||
/* Disable sparse mappings on GFX6 due to VM faults in CP DMA. Enable them once
|
||||
* these faults are mitigated in software.
|
||||
*/
|
||||
info->has_sparse_vm_mappings = info->chip_class >= GFX7 && info->drm_minor >= 13;
|
||||
info->has_sparse_vm_mappings = info->gfx_level >= GFX7 && info->drm_minor >= 13;
|
||||
info->has_2d_tiling = true;
|
||||
info->has_read_registers_query = true;
|
||||
info->has_scheduled_fence_dependency = info->drm_minor >= 28;
|
||||
|
@ -866,7 +866,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
fprintf(stderr, "amdgpu: clock crystal frequency is 0, timestamps will be wrong\n");
|
||||
info->clock_crystal_freq = 1;
|
||||
}
|
||||
if (info->chip_class >= GFX10) {
|
||||
if (info->gfx_level >= GFX10) {
|
||||
info->tcc_cache_line_size = 128;
|
||||
|
||||
if (info->drm_minor >= 35) {
|
||||
|
@ -922,7 +922,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
|
||||
info->mc_arb_ramcfg = amdinfo->mc_arb_ramcfg;
|
||||
info->gb_addr_config = amdinfo->gb_addr_cfg;
|
||||
if (info->chip_class >= GFX9) {
|
||||
if (info->gfx_level >= GFX9) {
|
||||
info->num_tile_pipes = 1 << G_0098F8_NUM_PIPES(amdinfo->gb_addr_cfg);
|
||||
info->pipe_interleave_bytes = 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(amdinfo->gb_addr_cfg);
|
||||
} else {
|
||||
|
@ -936,12 +936,12 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
*
|
||||
* LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used.
|
||||
*/
|
||||
info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
|
||||
info->lds_size_per_workgroup = info->gfx_level >= GFX10 ? 128 * 1024 : 64 * 1024;
|
||||
/* lds_encode_granularity is the block size used for encoding registers.
|
||||
* lds_alloc_granularity is what the hardware will align the LDS size to.
|
||||
*/
|
||||
info->lds_encode_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
|
||||
info->lds_alloc_granularity = info->chip_class >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
|
||||
info->lds_encode_granularity = info->gfx_level >= GFX7 ? 128 * 4 : 64 * 4;
|
||||
info->lds_alloc_granularity = info->gfx_level >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
|
||||
|
||||
/* This is "align_mask" copied from the kernel, maximums of all IP versions. */
|
||||
info->ib_pad_dw_mask[AMD_IP_GFX] = 0xff;
|
||||
|
@ -958,15 +958,15 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
* on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc.
|
||||
* SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel.
|
||||
*/
|
||||
info->has_clear_state = info->chip_class >= GFX7;
|
||||
info->has_clear_state = info->gfx_level >= GFX7;
|
||||
|
||||
info->has_distributed_tess =
|
||||
info->chip_class >= GFX10 || (info->chip_class >= GFX8 && info->max_se >= 2);
|
||||
info->gfx_level >= GFX10 || (info->gfx_level >= GFX8 && info->max_se >= 2);
|
||||
|
||||
info->has_dcc_constant_encode =
|
||||
info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->chip_class >= GFX10;
|
||||
info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->gfx_level >= GFX10;
|
||||
|
||||
info->has_rbplus = info->family == CHIP_STONEY || info->chip_class >= GFX9;
|
||||
info->has_rbplus = info->family == CHIP_STONEY || info->gfx_level >= GFX9;
|
||||
|
||||
/* Some chips have RB+ registers, but don't support RB+. Those must
|
||||
* always disable it.
|
||||
|
@ -974,13 +974,13 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->rbplus_allowed =
|
||||
info->has_rbplus &&
|
||||
(info->family == CHIP_STONEY || info->family == CHIP_VEGA12 || info->family == CHIP_RAVEN ||
|
||||
info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->chip_class >= GFX10_3);
|
||||
info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->gfx_level >= GFX10_3);
|
||||
|
||||
info->has_out_of_order_rast =
|
||||
info->chip_class >= GFX8 && info->chip_class <= GFX9 && info->max_se >= 2;
|
||||
info->gfx_level >= GFX8 && info->gfx_level <= GFX9 && info->max_se >= 2;
|
||||
|
||||
/* Whether chips support double rate packed math instructions. */
|
||||
info->has_packed_math_16bit = info->chip_class >= GFX9;
|
||||
info->has_packed_math_16bit = info->gfx_level >= GFX9;
|
||||
|
||||
/* Whether chips support dot product instructions. A subset of these support a smaller
|
||||
* instruction encoding which accumulates with the destination.
|
||||
|
@ -991,13 +991,13 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
|
||||
/* TODO: Figure out how to use LOAD_CONTEXT_REG on GFX6-GFX7. */
|
||||
info->has_load_ctx_reg_pkt =
|
||||
info->chip_class >= GFX9 || (info->chip_class >= GFX8 && info->me_fw_feature >= 41);
|
||||
info->gfx_level >= GFX9 || (info->gfx_level >= GFX8 && info->me_fw_feature >= 41);
|
||||
|
||||
info->cpdma_prefetch_writes_memory = info->chip_class <= GFX8;
|
||||
info->cpdma_prefetch_writes_memory = info->gfx_level <= GFX8;
|
||||
|
||||
info->has_gfx9_scissor_bug = info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN;
|
||||
|
||||
info->has_tc_compat_zrange_bug = info->chip_class >= GFX8 && info->chip_class <= GFX9;
|
||||
info->has_tc_compat_zrange_bug = info->gfx_level >= GFX8 && info->gfx_level <= GFX9;
|
||||
|
||||
info->has_msaa_sample_loc_bug =
|
||||
(info->family >= CHIP_POLARIS10 && info->family <= CHIP_POLARIS12) ||
|
||||
|
@ -1006,7 +1006,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->has_ls_vgpr_init_bug = info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN;
|
||||
|
||||
/* Drawing from 0-sized index buffers causes hangs on gfx10. */
|
||||
info->has_zero_index_buffer_bug = info->chip_class == GFX10;
|
||||
info->has_zero_index_buffer_bug = info->gfx_level == GFX10;
|
||||
|
||||
/* Whether chips are affected by the image load/sample/gather hw bug when
|
||||
* DCC is enabled (ie. WRITE_COMPRESS_ENABLE should be 0).
|
||||
|
@ -1018,10 +1018,10 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
/* DB has a bug when ITERATE_256 is set to 1 that can cause a hang. The
|
||||
* workaround is to set DECOMPRESS_ON_Z_PLANES to 2 for 4X MSAA D/S images.
|
||||
*/
|
||||
info->has_two_planes_iterate256_bug = info->chip_class == GFX10;
|
||||
info->has_two_planes_iterate256_bug = info->gfx_level == GFX10;
|
||||
|
||||
/* GFX10+Sienna: NGG->legacy transitions require VGT_FLUSH. */
|
||||
info->has_vgt_flush_ngg_legacy_bug = info->chip_class == GFX10 ||
|
||||
info->has_vgt_flush_ngg_legacy_bug = info->gfx_level == GFX10 ||
|
||||
info->family == CHIP_SIENNA_CICHLID;
|
||||
|
||||
/* HW bug workaround when CS threadgroups > 256 threads and async compute
|
||||
|
@ -1032,23 +1032,23 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
*
|
||||
* FIXME: RADV doesn't limit the number of threads for async compute.
|
||||
*/
|
||||
info->has_cs_regalloc_hang_bug = info->chip_class == GFX6 ||
|
||||
info->has_cs_regalloc_hang_bug = info->gfx_level == GFX6 ||
|
||||
info->family == CHIP_BONAIRE ||
|
||||
info->family == CHIP_KABINI;
|
||||
|
||||
/* Support for GFX10.3 was added with F32_ME_FEATURE_VERSION_31 but the
|
||||
* feature version wasn't bumped.
|
||||
*/
|
||||
info->has_32bit_predication = (info->chip_class >= GFX10 &&
|
||||
info->has_32bit_predication = (info->gfx_level >= GFX10 &&
|
||||
info->me_fw_feature >= 32) ||
|
||||
(info->chip_class == GFX9 &&
|
||||
(info->gfx_level == GFX9 &&
|
||||
info->me_fw_feature >= 52);
|
||||
|
||||
/* Get the number of good compute units. */
|
||||
info->num_good_compute_units = 0;
|
||||
for (i = 0; i < info->max_se; i++) {
|
||||
for (j = 0; j < info->max_sa_per_se; j++) {
|
||||
if (info->chip_class >= GFX11) {
|
||||
if (info->gfx_level >= GFX11) {
|
||||
assert(info->max_sa_per_se <= 2);
|
||||
info->cu_mask[i][j] = amdinfo->cu_bitmap[i % 4][(i / 4) * 2 + j];
|
||||
} else if (info->family == CHIP_ARCTURUS) {
|
||||
|
@ -1073,7 +1073,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
}
|
||||
|
||||
/* Derive the number of enabled SEs from the CU mask. */
|
||||
if (info->chip_class >= GFX10_3 && info->max_se > 1) {
|
||||
if (info->gfx_level >= GFX10_3 && info->max_se > 1) {
|
||||
info->num_se = 0;
|
||||
|
||||
for (unsigned se = 0; se < info->max_se; se++) {
|
||||
|
@ -1092,7 +1092,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
/* On GFX10, only whole WGPs (in units of 2 CUs) can be disabled,
|
||||
* and max - min <= 2.
|
||||
*/
|
||||
unsigned cu_group = info->chip_class >= GFX10 ? 2 : 1;
|
||||
unsigned cu_group = info->gfx_level >= GFX10 ? 2 : 1;
|
||||
info->max_good_cu_per_sa =
|
||||
DIV_ROUND_UP(info->num_good_compute_units, (info->num_se * info->max_sa_per_se * cu_group)) *
|
||||
cu_group;
|
||||
|
@ -1108,16 +1108,16 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->pte_fragment_size = alignment_info.size_local;
|
||||
info->gart_page_size = alignment_info.size_remote;
|
||||
|
||||
if (info->chip_class == GFX6)
|
||||
if (info->gfx_level == GFX6)
|
||||
info->gfx_ib_pad_with_type2 = true;
|
||||
|
||||
/* GFX10 and maybe GFX9 need this alignment for cache coherency. */
|
||||
if (info->chip_class >= GFX9)
|
||||
if (info->gfx_level >= GFX9)
|
||||
info->ib_alignment = MAX2(info->ib_alignment, info->tcc_cache_line_size);
|
||||
|
||||
if ((info->drm_minor >= 31 && (info->family == CHIP_RAVEN || info->family == CHIP_RAVEN2 ||
|
||||
info->family == CHIP_RENOIR)) ||
|
||||
info->chip_class >= GFX10_3) {
|
||||
info->gfx_level >= GFX10_3) {
|
||||
if (info->max_render_backends == 1)
|
||||
info->use_display_dcc_unaligned = true;
|
||||
else
|
||||
|
@ -1126,10 +1126,10 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
|
||||
info->has_stable_pstate = info->drm_minor >= 45;
|
||||
|
||||
if (info->chip_class >= GFX11) {
|
||||
if (info->gfx_level >= GFX11) {
|
||||
info->pc_lines = 1024;
|
||||
info->pbb_max_alloc_count = 255; /* minimum is 2, maximum is 256 */
|
||||
} else if (info->chip_class >= GFX9 && info->has_graphics) {
|
||||
} else if (info->gfx_level >= GFX9 && info->has_graphics) {
|
||||
unsigned pc_lines = 0;
|
||||
|
||||
switch (info->family) {
|
||||
|
@ -1163,27 +1163,27 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
|
||||
info->pc_lines = pc_lines;
|
||||
|
||||
if (info->chip_class >= GFX10) {
|
||||
if (info->gfx_level >= GFX10) {
|
||||
info->pbb_max_alloc_count = pc_lines / 3;
|
||||
} else {
|
||||
info->pbb_max_alloc_count = MIN2(128, pc_lines / (4 * info->max_se));
|
||||
}
|
||||
}
|
||||
|
||||
if (info->chip_class >= GFX10_3)
|
||||
if (info->gfx_level >= GFX10_3)
|
||||
info->max_wave64_per_simd = 16;
|
||||
else if (info->chip_class == GFX10)
|
||||
else if (info->gfx_level == GFX10)
|
||||
info->max_wave64_per_simd = 20;
|
||||
else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
|
||||
info->max_wave64_per_simd = 8;
|
||||
else
|
||||
info->max_wave64_per_simd = 10;
|
||||
|
||||
if (info->chip_class >= GFX10) {
|
||||
if (info->gfx_level >= GFX10) {
|
||||
info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd;
|
||||
info->min_sgpr_alloc = 128;
|
||||
info->sgpr_alloc_granularity = 128;
|
||||
} else if (info->chip_class >= GFX8) {
|
||||
} else if (info->gfx_level >= GFX8) {
|
||||
info->num_physical_sgprs_per_simd = 800;
|
||||
info->min_sgpr_alloc = 16;
|
||||
info->sgpr_alloc_granularity = 16;
|
||||
|
@ -1194,9 +1194,9 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
}
|
||||
|
||||
info->has_3d_cube_border_color_mipmap = info->has_graphics || info->family == CHIP_ARCTURUS;
|
||||
info->never_stop_sq_perf_counters = info->chip_class == GFX10 ||
|
||||
info->chip_class == GFX10_3;
|
||||
info->never_send_perfcounter_stop = info->chip_class == GFX11;
|
||||
info->never_stop_sq_perf_counters = info->gfx_level == GFX10 ||
|
||||
info->gfx_level == GFX10_3;
|
||||
info->never_send_perfcounter_stop = info->gfx_level == GFX11;
|
||||
info->has_sqtt_rb_harvest_bug = (info->family == CHIP_DIMGREY_CAVEFISH ||
|
||||
info->family == CHIP_BEIGE_GOBY ||
|
||||
info->family == CHIP_YELLOW_CARP ||
|
||||
|
@ -1205,7 +1205,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->max_render_backends;
|
||||
|
||||
/* On GFX10.3, the polarity of AUTO_FLUSH_MODE is inverted. */
|
||||
info->has_sqtt_auto_flush_mode_bug = info->chip_class == GFX10_3;
|
||||
info->has_sqtt_auto_flush_mode_bug = info->gfx_level == GFX10_3;
|
||||
|
||||
info->max_sgpr_alloc = info->family == CHIP_TONGA || info->family == CHIP_ICELAND ? 96 : 104;
|
||||
|
||||
|
@ -1219,8 +1219,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->wave64_vgpr_alloc_granularity = 4;
|
||||
}
|
||||
|
||||
info->num_physical_wave64_vgprs_per_simd = info->chip_class >= GFX10 ? 512 : 256;
|
||||
info->num_simd_per_compute_unit = info->chip_class >= GFX10 ? 2 : 4;
|
||||
info->num_physical_wave64_vgprs_per_simd = info->gfx_level >= GFX10 ? 512 : 256;
|
||||
info->num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4;
|
||||
|
||||
/* The maximum number of scratch waves. The number is only a function of the number of CUs.
|
||||
* It should be large enough to hold at least 1 threadgroup. Use the minimum per-SA CU count.
|
||||
|
@ -1249,7 +1249,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
exit(1);
|
||||
}
|
||||
|
||||
ac_parse_ib(stdout, ib, size / 4, NULL, 0, "IB", info->chip_class, NULL, NULL);
|
||||
ac_parse_ib(stdout, ib, size / 4, NULL, 0, "IB", info->gfx_level, NULL, NULL);
|
||||
free(ib);
|
||||
exit(0);
|
||||
}
|
||||
|
@ -1298,7 +1298,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
|||
fprintf(f, " pci_id = 0x%x\n", info->pci_id);
|
||||
fprintf(f, " pci_rev_id = 0x%x\n", info->pci_rev_id);
|
||||
fprintf(f, " family = %i\n", info->family);
|
||||
fprintf(f, " chip_class = %i\n", info->chip_class);
|
||||
fprintf(f, " gfx_level = %i\n", info->gfx_level);
|
||||
fprintf(f, " family_id = %i\n", info->family_id);
|
||||
fprintf(f, " chip_external_rev = %i\n", info->chip_external_rev);
|
||||
fprintf(f, " clock_crystal_freq = %i KHz\n", info->clock_crystal_freq);
|
||||
|
@ -1459,15 +1459,15 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
|||
fprintf(f, " pbb_max_alloc_count = %u\n", info->pbb_max_alloc_count);
|
||||
|
||||
fprintf(f, "GB_ADDR_CONFIG: 0x%08x\n", info->gb_addr_config);
|
||||
if (info->chip_class >= GFX10) {
|
||||
if (info->gfx_level >= GFX10) {
|
||||
fprintf(f, " num_pipes = %u\n", 1 << G_0098F8_NUM_PIPES(info->gb_addr_config));
|
||||
fprintf(f, " pipe_interleave_size = %u\n",
|
||||
256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config));
|
||||
fprintf(f, " max_compressed_frags = %u\n",
|
||||
1 << G_0098F8_MAX_COMPRESSED_FRAGS(info->gb_addr_config));
|
||||
if (info->chip_class >= GFX10_3)
|
||||
if (info->gfx_level >= GFX10_3)
|
||||
fprintf(f, " num_pkrs = %u\n", 1 << G_0098F8_NUM_PKRS(info->gb_addr_config));
|
||||
} else if (info->chip_class == GFX9) {
|
||||
} else if (info->gfx_level == GFX9) {
|
||||
fprintf(f, " num_pipes = %u\n", 1 << G_0098F8_NUM_PIPES(info->gb_addr_config));
|
||||
fprintf(f, " pipe_interleave_size = %u\n",
|
||||
256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config));
|
||||
|
@ -1505,9 +1505,9 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
|||
}
|
||||
}
|
||||
|
||||
int ac_get_gs_table_depth(enum chip_class chip_class, enum radeon_family family)
|
||||
int ac_get_gs_table_depth(enum amd_gfx_level gfx_level, enum radeon_family family)
|
||||
{
|
||||
if (chip_class >= GFX9)
|
||||
if (gfx_level >= GFX9)
|
||||
return -1;
|
||||
|
||||
switch (family) {
|
||||
|
@ -1646,7 +1646,7 @@ void ac_get_harvested_configs(struct radeon_info *info, unsigned raster_config,
|
|||
assert(sh_per_se == 1 || sh_per_se == 2);
|
||||
assert(rb_per_pkr == 1 || rb_per_pkr == 2);
|
||||
|
||||
if (info->chip_class >= GFX7) {
|
||||
if (info->gfx_level >= GFX7) {
|
||||
unsigned raster_config_1 = *cik_raster_config_1_p;
|
||||
if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || (!se_mask[2] && !se_mask[3]))) {
|
||||
raster_config_1 &= C_028354_SE_PAIR_MAP;
|
||||
|
@ -1728,11 +1728,11 @@ unsigned ac_get_compute_resource_limits(struct radeon_info *info, unsigned waves
|
|||
{
|
||||
unsigned compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
|
||||
|
||||
if (info->chip_class >= GFX7) {
|
||||
if (info->gfx_level >= GFX7) {
|
||||
unsigned num_cu_per_se = info->num_good_compute_units / info->num_se;
|
||||
|
||||
/* Gfx9 should set the limit to max instead of 0 to fix high priority compute. */
|
||||
if (info->chip_class == GFX9 && !max_waves_per_sh) {
|
||||
if (info->gfx_level == GFX9 && !max_waves_per_sh) {
|
||||
max_waves_per_sh = info->max_good_cu_per_sa * info->num_simd_per_compute_unit *
|
||||
info->max_wave64_per_simd;
|
||||
}
|
||||
|
@ -1760,7 +1760,7 @@ unsigned ac_get_compute_resource_limits(struct radeon_info *info, unsigned waves
|
|||
void ac_get_hs_info(struct radeon_info *info,
|
||||
struct ac_hs_info *hs)
|
||||
{
|
||||
bool double_offchip_buffers = info->chip_class >= GFX7 &&
|
||||
bool double_offchip_buffers = info->gfx_level >= GFX7 &&
|
||||
info->family != CHIP_CARRIZO &&
|
||||
info->family != CHIP_STONEY;
|
||||
unsigned max_offchip_buffers_per_se;
|
||||
|
@ -1783,9 +1783,9 @@ void ac_get_hs_info(struct radeon_info *info,
|
|||
*
|
||||
* Follow AMDVLK here.
|
||||
*/
|
||||
if (info->chip_class >= GFX11) {
|
||||
if (info->gfx_level >= GFX11) {
|
||||
max_offchip_buffers_per_se = 256; /* TODO: we could decrease this to reduce memory/cache usage */
|
||||
} else if (info->chip_class >= GFX10) {
|
||||
} else if (info->gfx_level >= GFX10) {
|
||||
max_offchip_buffers_per_se = 128;
|
||||
} else if (info->family == CHIP_VEGA12 || info->family == CHIP_VEGA20) {
|
||||
/* Only certain chips can use the maximum value. */
|
||||
|
@ -1807,7 +1807,7 @@ void ac_get_hs_info(struct radeon_info *info,
|
|||
offchip_granularity = V_03093C_X_8K_DWORDS;
|
||||
}
|
||||
|
||||
switch (info->chip_class) {
|
||||
switch (info->gfx_level) {
|
||||
case GFX6:
|
||||
max_offchip_buffers = MIN2(max_offchip_buffers, 126);
|
||||
break;
|
||||
|
@ -1824,15 +1824,15 @@ void ac_get_hs_info(struct radeon_info *info,
|
|||
|
||||
hs->max_offchip_buffers = max_offchip_buffers;
|
||||
|
||||
if (info->chip_class >= GFX11) {
|
||||
if (info->gfx_level >= GFX11) {
|
||||
/* OFFCHIP_BUFFERING is per SE. */
|
||||
hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers_per_se - 1) |
|
||||
S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
|
||||
} else if (info->chip_class >= GFX10_3) {
|
||||
} else if (info->gfx_level >= GFX10_3) {
|
||||
hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
|
||||
S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
|
||||
} else if (info->chip_class >= GFX7) {
|
||||
if (info->chip_class >= GFX8)
|
||||
} else if (info->gfx_level >= GFX7) {
|
||||
if (info->gfx_level >= GFX8)
|
||||
--max_offchip_buffers;
|
||||
hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
|
||||
S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
|
||||
|
|
|
@ -63,7 +63,7 @@ struct radeon_info {
|
|||
uint32_t pci_id;
|
||||
uint32_t pci_rev_id;
|
||||
enum radeon_family family;
|
||||
enum chip_class chip_class;
|
||||
enum amd_gfx_level gfx_level;
|
||||
uint32_t family_id;
|
||||
uint32_t chip_external_rev;
|
||||
uint32_t clock_crystal_freq;
|
||||
|
@ -253,7 +253,7 @@ void ac_compute_driver_uuid(char *uuid, size_t size);
|
|||
|
||||
void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size);
|
||||
void ac_print_gpu_info(struct radeon_info *info, FILE *f);
|
||||
int ac_get_gs_table_depth(enum chip_class chip_class, enum radeon_family family);
|
||||
int ac_get_gs_table_depth(enum amd_gfx_level gfx_level, enum radeon_family family);
|
||||
void ac_get_raster_config(struct radeon_info *info, uint32_t *raster_config_p,
|
||||
uint32_t *raster_config_1_p, uint32_t *se_tile_repeat_p);
|
||||
void ac_get_harvested_configs(struct radeon_info *info, unsigned raster_config,
|
||||
|
|
|
@ -37,7 +37,7 @@ ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_
|
|||
|
||||
bool
|
||||
ac_nir_lower_indirect_derefs(nir_shader *shader,
|
||||
enum chip_class chip_class)
|
||||
enum amd_gfx_level gfx_level)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
|
@ -49,7 +49,7 @@ ac_nir_lower_indirect_derefs(nir_shader *shader,
|
|||
glsl_get_natural_size_align_bytes);
|
||||
|
||||
/* LLVM doesn't support VGPR indexing on GFX9. */
|
||||
bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
|
||||
bool llvm_has_working_vgpr_indexing = gfx_level != GFX9;
|
||||
|
||||
/* TODO: Indirect indexing of GS inputs is unimplemented.
|
||||
*
|
||||
|
|
|
@ -72,7 +72,7 @@ ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
|
|||
|
||||
void
|
||||
ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,
|
||||
enum chip_class chip_class,
|
||||
enum amd_gfx_level gfx_level,
|
||||
bool tes_reads_tessfactors,
|
||||
uint64_t tes_inputs_read,
|
||||
uint64_t tes_patch_inputs_read,
|
||||
|
@ -88,17 +88,17 @@ ac_nir_lower_tes_inputs_to_mem(nir_shader *shader,
|
|||
|
||||
void
|
||||
ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
|
||||
enum chip_class chip_class,
|
||||
enum amd_gfx_level gfx_level,
|
||||
unsigned num_reserved_es_outputs);
|
||||
|
||||
void
|
||||
ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
|
||||
enum chip_class chip_class,
|
||||
enum amd_gfx_level gfx_level,
|
||||
unsigned num_reserved_es_outputs);
|
||||
|
||||
bool
|
||||
ac_nir_lower_indirect_derefs(nir_shader *shader,
|
||||
enum chip_class chip_class);
|
||||
enum amd_gfx_level gfx_level);
|
||||
|
||||
void
|
||||
ac_nir_lower_ngg_nogs(nir_shader *shader,
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
|
||||
typedef struct {
|
||||
/* Which hardware generation we're dealing with */
|
||||
enum chip_class chip_class;
|
||||
enum amd_gfx_level gfx_level;
|
||||
|
||||
/* Number of ES outputs for which memory should be reserved.
|
||||
* When compacted, this should be the number of linked ES outputs.
|
||||
|
@ -127,7 +127,7 @@ lower_es_output_store(nir_builder *b,
|
|||
b->cursor = nir_before_instr(instr);
|
||||
nir_ssa_def *io_off = nir_build_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u);
|
||||
|
||||
if (st->chip_class <= GFX8) {
|
||||
if (st->gfx_level <= GFX8) {
|
||||
/* GFX6-8: ES is a separate HW stage, data is passed from ES to GS in VRAM. */
|
||||
nir_ssa_def *ring = nir_build_load_ring_esgs_amd(b);
|
||||
nir_ssa_def *es2gs_off = nir_build_load_ring_es2gs_offset_amd(b);
|
||||
|
@ -193,11 +193,11 @@ gs_per_vertex_input_offset(nir_builder *b,
|
|||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
nir_src *vertex_src = nir_get_io_arrayed_index_src(instr);
|
||||
nir_ssa_def *vertex_offset = st->chip_class >= GFX9
|
||||
nir_ssa_def *vertex_offset = st->gfx_level >= GFX9
|
||||
? gs_per_vertex_input_vertex_offset_gfx9(b, vertex_src)
|
||||
: gs_per_vertex_input_vertex_offset_gfx6(b, vertex_src);
|
||||
|
||||
unsigned base_stride = st->chip_class >= GFX9 ? 1 : 64 /* Wave size on GFX6-8 */;
|
||||
unsigned base_stride = st->gfx_level >= GFX9 ? 1 : 64 /* Wave size on GFX6-8 */;
|
||||
nir_ssa_def *io_off = nir_build_calc_io_offset(b, instr, nir_imm_int(b, base_stride * 4u), base_stride);
|
||||
nir_ssa_def *off = nir_iadd(b, io_off, vertex_offset);
|
||||
return nir_imul_imm(b, off, 4u);
|
||||
|
@ -212,7 +212,7 @@ lower_gs_per_vertex_input_load(nir_builder *b,
|
|||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
nir_ssa_def *off = gs_per_vertex_input_offset(b, st, intrin);
|
||||
|
||||
if (st->chip_class >= GFX9)
|
||||
if (st->gfx_level >= GFX9)
|
||||
return nir_build_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off,
|
||||
.align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
|
||||
|
||||
|
@ -230,11 +230,11 @@ filter_load_per_vertex_input(const nir_instr *instr, UNUSED const void *state)
|
|||
|
||||
void
|
||||
ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
|
||||
enum chip_class chip_class,
|
||||
enum amd_gfx_level gfx_level,
|
||||
unsigned num_reserved_es_outputs)
|
||||
{
|
||||
lower_esgs_io_state state = {
|
||||
.chip_class = chip_class,
|
||||
.gfx_level = gfx_level,
|
||||
.num_reserved_es_outputs = num_reserved_es_outputs,
|
||||
};
|
||||
|
||||
|
@ -246,11 +246,11 @@ ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
|
|||
|
||||
void
|
||||
ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
|
||||
enum chip_class chip_class,
|
||||
enum amd_gfx_level gfx_level,
|
||||
unsigned num_reserved_es_outputs)
|
||||
{
|
||||
lower_esgs_io_state state = {
|
||||
.chip_class = chip_class,
|
||||
.gfx_level = gfx_level,
|
||||
.num_reserved_es_outputs = num_reserved_es_outputs,
|
||||
};
|
||||
|
||||
|
|
|
@ -121,7 +121,7 @@
|
|||
|
||||
typedef struct {
|
||||
/* Which hardware generation we're dealing with */
|
||||
enum chip_class chip_class;
|
||||
enum amd_gfx_level gfx_level;
|
||||
|
||||
/* True if merged VS+TCS (on GFX9+) has the same number
|
||||
* of input and output patch size.
|
||||
|
@ -545,7 +545,7 @@ hs_emit_write_tess_factors(nir_shader *shader,
|
|||
nir_ssa_def *tess_factors_offset = nir_imul_imm(b, rel_patch_id, (inner_comps + outer_comps) * 4u);
|
||||
unsigned tess_factors_const_offset = 0;
|
||||
|
||||
if (st->chip_class <= GFX8) {
|
||||
if (st->gfx_level <= GFX8) {
|
||||
/* Store the dynamic HS control word. */
|
||||
nir_if *rel_patch_id_zero = nir_push_if(b, nir_ieq_imm(b, rel_patch_id, 0));
|
||||
nir_ssa_def *ctrlw = nir_imm_int(b, 0x80000000u);
|
||||
|
@ -671,7 +671,7 @@ ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
|
|||
|
||||
void
|
||||
ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,
|
||||
enum chip_class chip_class,
|
||||
enum amd_gfx_level gfx_level,
|
||||
bool tes_reads_tessfactors,
|
||||
uint64_t tes_inputs_read,
|
||||
uint64_t tes_patch_inputs_read,
|
||||
|
@ -683,7 +683,7 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,
|
|||
assert(shader->info.stage == MESA_SHADER_TESS_CTRL);
|
||||
|
||||
lower_tess_io_state state = {
|
||||
.chip_class = chip_class,
|
||||
.gfx_level = gfx_level,
|
||||
.tes_reads_tessfactors = tes_reads_tessfactors,
|
||||
.tes_inputs_read = tes_inputs_read,
|
||||
.tes_patch_inputs_read = tes_patch_inputs_read,
|
||||
|
|
|
@ -1156,7 +1156,7 @@ bool ac_init_perfcounters(const struct radeon_info *info,
|
|||
const struct ac_pc_block_gfxdescr *blocks;
|
||||
unsigned num_blocks;
|
||||
|
||||
switch (info->chip_class) {
|
||||
switch (info->gfx_level) {
|
||||
case GFX7:
|
||||
blocks = groups_CIK;
|
||||
num_blocks = ARRAY_SIZE(groups_CIK);
|
||||
|
|
|
@ -362,9 +362,9 @@ struct sqtt_file_chunk_asic_info {
|
|||
static_assert(sizeof(struct sqtt_file_chunk_asic_info) == 720,
|
||||
"sqtt_file_chunk_asic_info doesn't match RGP spec");
|
||||
|
||||
static enum sqtt_gfxip_level ac_chip_class_to_sqtt_gfxip_level(enum chip_class chip_class)
|
||||
static enum sqtt_gfxip_level ac_gfx_level_to_sqtt_gfxip_level(enum amd_gfx_level gfx_level)
|
||||
{
|
||||
switch (chip_class) {
|
||||
switch (gfx_level) {
|
||||
case GFX8:
|
||||
return SQTT_GFXIP_LEVEL_GFXIP_8;
|
||||
case GFX9:
|
||||
|
@ -374,7 +374,7 @@ static enum sqtt_gfxip_level ac_chip_class_to_sqtt_gfxip_level(enum chip_class c
|
|||
case GFX10_3:
|
||||
return SQTT_GFXIP_LEVEL_GFXIP_10_3;
|
||||
default:
|
||||
unreachable("Invalid chip class");
|
||||
unreachable("Invalid gfx level");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -431,7 +431,7 @@ static uint32_t ac_memory_ops_per_clock(uint32_t vram_type)
|
|||
static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
|
||||
struct sqtt_file_chunk_asic_info *chunk)
|
||||
{
|
||||
bool has_wave32 = rad_info->chip_class >= GFX10;
|
||||
bool has_wave32 = rad_info->gfx_level >= GFX10;
|
||||
|
||||
chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_ASIC_INFO;
|
||||
chunk->header.chunk_id.index = 0;
|
||||
|
@ -444,11 +444,11 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
|
|||
/* All chips older than GFX9 are affected by the "SPI not
|
||||
* differentiating pkr_id for newwave commands" bug.
|
||||
*/
|
||||
if (rad_info->chip_class < GFX9)
|
||||
if (rad_info->gfx_level < GFX9)
|
||||
chunk->flags |= SQTT_FILE_CHUNK_ASIC_INFO_FLAG_SC_PACKER_NUMBERING;
|
||||
|
||||
/* Only GFX9+ support PS1 events. */
|
||||
if (rad_info->chip_class >= GFX9)
|
||||
if (rad_info->gfx_level >= GFX9)
|
||||
chunk->flags |= SQTT_FILE_CHUNK_ASIC_INFO_FLAG_PS1_EVENT_TOKENS_ENABLED;
|
||||
|
||||
chunk->trace_shader_core_clock = rad_info->max_shader_clock * 1000000;
|
||||
|
@ -478,7 +478,7 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
|
|||
chunk->hardware_contexts = 8;
|
||||
chunk->gpu_type =
|
||||
rad_info->has_dedicated_vram ? SQTT_GPU_TYPE_DISCRETE : SQTT_GPU_TYPE_INTEGRATED;
|
||||
chunk->gfxip_level = ac_chip_class_to_sqtt_gfxip_level(rad_info->chip_class);
|
||||
chunk->gfxip_level = ac_gfx_level_to_sqtt_gfxip_level(rad_info->gfx_level);
|
||||
chunk->gpu_index = 0;
|
||||
|
||||
chunk->max_number_of_dedicated_cus = 0;
|
||||
|
@ -491,7 +491,7 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
|
|||
chunk->l2_cache_size = rad_info->l2_cache_size;
|
||||
chunk->l1_cache_size = rad_info->l1_cache_size;
|
||||
chunk->lds_size = rad_info->lds_size_per_workgroup;
|
||||
if (rad_info->chip_class >= GFX10) {
|
||||
if (rad_info->gfx_level >= GFX10) {
|
||||
/* RGP expects the LDS size in CU mode. */
|
||||
chunk->lds_size /= 2;
|
||||
}
|
||||
|
@ -501,7 +501,7 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
|
|||
chunk->alu_per_clock = 0.0;
|
||||
chunk->texture_per_clock = 0.0;
|
||||
chunk->prims_per_clock = rad_info->max_se;
|
||||
if (rad_info->chip_class == GFX10)
|
||||
if (rad_info->gfx_level == GFX10)
|
||||
chunk->prims_per_clock *= 2;
|
||||
chunk->pixels_per_clock = 0.0;
|
||||
|
||||
|
@ -722,9 +722,9 @@ struct sqtt_file_chunk_sqtt_desc {
|
|||
static_assert(sizeof(struct sqtt_file_chunk_sqtt_desc) == 32,
|
||||
"sqtt_file_chunk_sqtt_desc doesn't match RGP spec");
|
||||
|
||||
static enum sqtt_version ac_chip_class_to_sqtt_version(enum chip_class chip_class)
|
||||
static enum sqtt_version ac_gfx_level_to_sqtt_version(enum amd_gfx_level gfx_level)
|
||||
{
|
||||
switch (chip_class) {
|
||||
switch (gfx_level) {
|
||||
case GFX8:
|
||||
return SQTT_VERSION_2_2;
|
||||
case GFX9:
|
||||
|
@ -734,7 +734,7 @@ static enum sqtt_version ac_chip_class_to_sqtt_version(enum chip_class chip_clas
|
|||
case GFX10_3:
|
||||
return SQTT_VERSION_2_4;
|
||||
default:
|
||||
unreachable("Invalid chip class");
|
||||
unreachable("Invalid gfx level");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -749,7 +749,7 @@ static void ac_sqtt_fill_sqtt_desc(struct radeon_info *info,
|
|||
chunk->header.size_in_bytes = sizeof(*chunk);
|
||||
|
||||
chunk->sqtt_version =
|
||||
ac_chip_class_to_sqtt_version(info->chip_class);
|
||||
ac_gfx_level_to_sqtt_version(info->gfx_level);
|
||||
chunk->shader_engine_index = shader_engine_index;
|
||||
chunk->v1.instrumentation_spec_version = 1;
|
||||
chunk->v1.instrumentation_api_version = 0;
|
||||
|
@ -877,9 +877,9 @@ enum elf_gfxip_level
|
|||
EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036,
|
||||
};
|
||||
|
||||
static enum elf_gfxip_level ac_chip_class_to_elf_gfxip_level(enum chip_class chip_class)
|
||||
static enum elf_gfxip_level ac_gfx_level_to_elf_gfxip_level(enum amd_gfx_level gfx_level)
|
||||
{
|
||||
switch (chip_class) {
|
||||
switch (gfx_level) {
|
||||
case GFX8:
|
||||
return EF_AMDGPU_MACH_AMDGCN_GFX801;
|
||||
case GFX9:
|
||||
|
@ -889,7 +889,7 @@ static enum elf_gfxip_level ac_chip_class_to_elf_gfxip_level(enum chip_class chi
|
|||
case GFX10_3:
|
||||
return EF_AMDGPU_MACH_AMDGCN_GFX1030;
|
||||
default:
|
||||
unreachable("Invalid chip class");
|
||||
unreachable("Invalid gfx level");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1049,7 +1049,7 @@ static void ac_sqtt_dump_data(struct radeon_info *rad_info,
|
|||
struct sqtt_file_chunk_code_object_database code_object;
|
||||
struct sqtt_code_object_database_record code_object_record;
|
||||
uint32_t elf_size_calc = 0;
|
||||
uint32_t flags = ac_chip_class_to_elf_gfxip_level(rad_info->chip_class);
|
||||
uint32_t flags = ac_gfx_level_to_elf_gfxip_level(rad_info->gfx_level);
|
||||
|
||||
fseek(output, sizeof(struct sqtt_file_chunk_code_object_database), SEEK_CUR);
|
||||
file_offset += sizeof(struct sqtt_file_chunk_code_object_database);
|
||||
|
|
|
@ -257,7 +257,7 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
|
|||
memset(binary, 0, sizeof(*binary));
|
||||
memcpy(&binary->options, &i.options, sizeof(binary->options));
|
||||
binary->wave_size = i.wave_size;
|
||||
binary->chip_class = i.info->chip_class;
|
||||
binary->gfx_level = i.info->gfx_level;
|
||||
binary->num_parts = i.num_parts;
|
||||
binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
|
||||
if (!binary->parts)
|
||||
|
@ -297,7 +297,7 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
|
|||
|
||||
unsigned max_lds_size = 64 * 1024;
|
||||
|
||||
if (i.info->chip_class == GFX6 ||
|
||||
if (i.info->gfx_level == GFX6 ||
|
||||
(i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT))
|
||||
max_lds_size = 32 * 1024;
|
||||
|
||||
|
@ -456,11 +456,11 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
|
|||
|
||||
if (!i.info->has_graphics && i.info->family >= CHIP_ALDEBARAN)
|
||||
prefetch_distance = 16;
|
||||
else if (i.info->chip_class >= GFX10)
|
||||
else if (i.info->gfx_level >= GFX10)
|
||||
prefetch_distance = 3;
|
||||
|
||||
if (prefetch_distance) {
|
||||
if (i.info->chip_class >= GFX11)
|
||||
if (i.info->gfx_level >= GFX11)
|
||||
binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 128);
|
||||
else
|
||||
binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 64);
|
||||
|
@ -577,7 +577,7 @@ static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_id
|
|||
|
||||
/* TODO: resolve from other parts */
|
||||
|
||||
if (u->get_external_symbol(u->binary->chip_class, u->cb_data, name, value))
|
||||
if (u->get_external_symbol(u->binary->gfx_level, u->cb_data, name, value))
|
||||
return true;
|
||||
|
||||
report_errorf("symbol %s: unknown", name);
|
||||
|
|
|
@ -57,7 +57,7 @@ struct ac_rtld_options {
|
|||
/* Lightweight wrapper around underlying ELF objects. */
|
||||
struct ac_rtld_binary {
|
||||
struct ac_rtld_options options;
|
||||
enum chip_class chip_class;
|
||||
enum amd_gfx_level gfx_level;
|
||||
unsigned wave_size;
|
||||
|
||||
/* Required buffer sizes, currently read/executable only. */
|
||||
|
@ -84,7 +84,7 @@ struct ac_rtld_binary {
|
|||
* \param value to be filled in by the callback
|
||||
* \return whether the symbol was found successfully
|
||||
*/
|
||||
typedef bool (*ac_rtld_get_external_symbol_cb)(enum chip_class chip_class, void *cb_data,
|
||||
typedef bool (*ac_rtld_get_external_symbol_cb)(enum amd_gfx_level gfx_level, void *cb_data,
|
||||
const char *symbol, uint64_t *value);
|
||||
|
||||
/**
|
||||
|
|
|
@ -85,11 +85,11 @@ unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format)
|
|||
* Calculate the appropriate setting of VGT_GS_MODE when \p shader is a
|
||||
* geometry shader.
|
||||
*/
|
||||
uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class)
|
||||
uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum amd_gfx_level gfx_level)
|
||||
{
|
||||
unsigned cut_mode;
|
||||
|
||||
assert (chip_class < GFX11);
|
||||
assert (gfx_level < GFX11);
|
||||
|
||||
if (gs_max_vert_out <= 128) {
|
||||
cut_mode = V_028A40_GS_CUT_128;
|
||||
|
@ -103,20 +103,20 @@ uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class)
|
|||
}
|
||||
|
||||
return S_028A40_MODE(V_028A40_GS_SCENARIO_G) | S_028A40_CUT_MODE(cut_mode) |
|
||||
S_028A40_ES_WRITE_OPTIMIZE(chip_class <= GFX8) | S_028A40_GS_WRITE_OPTIMIZE(1) |
|
||||
S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0);
|
||||
S_028A40_ES_WRITE_OPTIMIZE(gfx_level <= GFX8) | S_028A40_GS_WRITE_OPTIMIZE(1) |
|
||||
S_028A40_ONCHIP(gfx_level >= GFX9 ? 1 : 0);
|
||||
}
|
||||
|
||||
/// Translate a (dfmt, nfmt) pair into a chip-appropriate combined format
|
||||
/// value for LLVM8+ tbuffer intrinsics.
|
||||
unsigned ac_get_tbuffer_format(enum chip_class chip_class, unsigned dfmt, unsigned nfmt)
|
||||
unsigned ac_get_tbuffer_format(enum amd_gfx_level gfx_level, unsigned dfmt, unsigned nfmt)
|
||||
{
|
||||
// Some games try to access vertex buffers without a valid format.
|
||||
// This is a game bug, but we should still handle it gracefully.
|
||||
if (dfmt == V_008F0C_GFX10_FORMAT_INVALID)
|
||||
return V_008F0C_GFX10_FORMAT_INVALID;
|
||||
|
||||
if (chip_class >= GFX11) {
|
||||
if (gfx_level >= GFX11) {
|
||||
switch (dfmt) {
|
||||
default:
|
||||
unreachable("bad dfmt");
|
||||
|
@ -311,7 +311,7 @@ unsigned ac_get_tbuffer_format(enum chip_class chip_class, unsigned dfmt, unsign
|
|||
return V_008F0C_GFX11_FORMAT_10_11_11_FLOAT;
|
||||
}
|
||||
}
|
||||
} else if (chip_class >= GFX10) {
|
||||
} else if (gfx_level >= GFX10) {
|
||||
unsigned format;
|
||||
switch (dfmt) {
|
||||
default:
|
||||
|
@ -417,12 +417,12 @@ const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt)
|
|||
return &data_format_table[dfmt];
|
||||
}
|
||||
|
||||
enum ac_image_dim ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
|
||||
enum ac_image_dim ac_get_sampler_dim(enum amd_gfx_level gfx_level, enum glsl_sampler_dim dim,
|
||||
bool is_array)
|
||||
{
|
||||
switch (dim) {
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
if (chip_class == GFX9)
|
||||
if (gfx_level == GFX9)
|
||||
return is_array ? ac_image_2darray : ac_image_2d;
|
||||
return is_array ? ac_image_1darray : ac_image_1d;
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
|
@ -444,15 +444,15 @@ enum ac_image_dim ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampl
|
|||
}
|
||||
}
|
||||
|
||||
enum ac_image_dim ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
|
||||
enum ac_image_dim ac_get_image_dim(enum amd_gfx_level gfx_level, enum glsl_sampler_dim sdim,
|
||||
bool is_array)
|
||||
{
|
||||
enum ac_image_dim dim = ac_get_sampler_dim(chip_class, sdim, is_array);
|
||||
enum ac_image_dim dim = ac_get_sampler_dim(gfx_level, sdim, is_array);
|
||||
|
||||
/* Match the resource type set in the descriptor. */
|
||||
if (dim == ac_image_cube || (chip_class <= GFX8 && dim == ac_image_3d))
|
||||
if (dim == ac_image_cube || (gfx_level <= GFX8 && dim == ac_image_3d))
|
||||
dim = ac_image_2darray;
|
||||
else if (sdim == GLSL_SAMPLER_DIM_2D && !is_array && chip_class == GFX9) {
|
||||
else if (sdim == GLSL_SAMPLER_DIM_2D && !is_array && gfx_level == GFX9) {
|
||||
/* When a single layer of a 3D texture is bound, the shader
|
||||
* will refer to a 2D target, but the descriptor has a 3D type.
|
||||
* Since the HW ignores BASE_ARRAY in this case, we need to
|
||||
|
@ -670,7 +670,7 @@ void ac_compute_late_alloc(const struct radeon_info *info, bool ngg, bool ngg_cu
|
|||
if (ngg && info->family == CHIP_NAVI14)
|
||||
return;
|
||||
|
||||
if (info->chip_class >= GFX10) {
|
||||
if (info->gfx_level >= GFX10) {
|
||||
/* For Wave32, the hw will launch twice the number of late alloc waves, so 1 == 2x wave32.
|
||||
* These limits are estimated because they are all safe but they vary in performance.
|
||||
*/
|
||||
|
@ -680,7 +680,7 @@ void ac_compute_late_alloc(const struct radeon_info *info, bool ngg, bool ngg_cu
|
|||
*late_alloc_wave64 = info->min_good_cu_per_sa * 4;
|
||||
|
||||
/* Limit LATE_ALLOC_GS to prevent a hang (hw bug) on gfx10. */
|
||||
if (info->chip_class == GFX10 && ngg)
|
||||
if (info->gfx_level == GFX10 && ngg)
|
||||
*late_alloc_wave64 = MIN2(*late_alloc_wave64, 64);
|
||||
|
||||
/* Gfx10: CU2 & CU3 must be disabled to prevent a hw deadlock.
|
||||
|
@ -688,7 +688,7 @@ void ac_compute_late_alloc(const struct radeon_info *info, bool ngg, bool ngg_cu
|
|||
*
|
||||
* The deadlock is caused by late alloc, which usually increases performance.
|
||||
*/
|
||||
*cu_mask &= info->chip_class == GFX10 ? ~BITFIELD_RANGE(2, 2) :
|
||||
*cu_mask &= info->gfx_level == GFX10 ? ~BITFIELD_RANGE(2, 2) :
|
||||
~BITFIELD_RANGE(1, 1);
|
||||
} else {
|
||||
if (info->min_good_cu_per_sa <= 4) {
|
||||
|
@ -724,7 +724,7 @@ unsigned ac_compute_cs_workgroup_size(uint16_t sizes[3], bool variable, unsigned
|
|||
return sizes[0] * sizes[1] * sizes[2];
|
||||
}
|
||||
|
||||
unsigned ac_compute_lshs_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
|
||||
unsigned ac_compute_lshs_workgroup_size(enum amd_gfx_level gfx_level, gl_shader_stage stage,
|
||||
unsigned tess_num_patches,
|
||||
unsigned tess_patch_in_vtx,
|
||||
unsigned tess_patch_out_vtx)
|
||||
|
@ -733,7 +733,7 @@ unsigned ac_compute_lshs_workgroup_size(enum chip_class chip_class, gl_shader_st
|
|||
* These two HW stages are merged on GFX9+.
|
||||
*/
|
||||
|
||||
bool merged_shaders = chip_class >= GFX9;
|
||||
bool merged_shaders = gfx_level >= GFX9;
|
||||
unsigned ls_workgroup_size = tess_num_patches * tess_patch_in_vtx;
|
||||
unsigned hs_workgroup_size = tess_num_patches * tess_patch_out_vtx;
|
||||
|
||||
|
@ -747,7 +747,7 @@ unsigned ac_compute_lshs_workgroup_size(enum chip_class chip_class, gl_shader_st
|
|||
unreachable("invalid LSHS shader stage");
|
||||
}
|
||||
|
||||
unsigned ac_compute_esgs_workgroup_size(enum chip_class chip_class, unsigned wave_size,
|
||||
unsigned ac_compute_esgs_workgroup_size(enum amd_gfx_level gfx_level, unsigned wave_size,
|
||||
unsigned es_verts, unsigned gs_inst_prims)
|
||||
{
|
||||
/* ESGS may operate in workgroups if on-chip GS (LDS rings) are enabled.
|
||||
|
@ -757,7 +757,7 @@ unsigned ac_compute_esgs_workgroup_size(enum chip_class chip_class, unsigned wav
|
|||
* GFX9+ (merged): implemented in Mesa.
|
||||
*/
|
||||
|
||||
if (chip_class <= GFX8)
|
||||
if (gfx_level <= GFX8)
|
||||
return wave_size;
|
||||
|
||||
unsigned workgroup_size = MAX2(es_verts, gs_inst_prims);
|
||||
|
@ -821,7 +821,7 @@ void ac_get_scratch_tmpring_size(const struct radeon_info *info, bool compute,
|
|||
*
|
||||
* Shaders with SCRATCH_EN=0 don't allocate scratch space.
|
||||
*/
|
||||
const unsigned size_shift = info->chip_class >= GFX11 ? 8 : 10;
|
||||
const unsigned size_shift = info->gfx_level >= GFX11 ? 8 : 10;
|
||||
const unsigned min_size_per_wave = BITFIELD_BIT(size_shift);
|
||||
|
||||
/* The LLVM shader backend should be reporting aligned scratch_sizes. */
|
||||
|
@ -837,7 +837,7 @@ void ac_get_scratch_tmpring_size(const struct radeon_info *info, bool compute,
|
|||
*max_seen_bytes_per_wave = MAX2(*max_seen_bytes_per_wave, bytes_per_wave);
|
||||
|
||||
unsigned max_scratch_waves = info->max_scratch_waves;
|
||||
if (info->chip_class >= GFX11 && !compute)
|
||||
if (info->gfx_level >= GFX11 && !compute)
|
||||
max_scratch_waves /= info->num_se; /* WAVES is per SE for SPI_TMPRING_SIZE. */
|
||||
|
||||
/* TODO: We could decrease WAVES to make the whole buffer fit into the infinity cache. */
|
||||
|
|
|
@ -94,16 +94,16 @@ unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool wri
|
|||
|
||||
unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format);
|
||||
|
||||
uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class);
|
||||
uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum amd_gfx_level gfx_level);
|
||||
|
||||
unsigned ac_get_tbuffer_format(enum chip_class chip_class, unsigned dfmt, unsigned nfmt);
|
||||
unsigned ac_get_tbuffer_format(enum amd_gfx_level gfx_level, unsigned dfmt, unsigned nfmt);
|
||||
|
||||
const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt);
|
||||
|
||||
enum ac_image_dim ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
|
||||
enum ac_image_dim ac_get_sampler_dim(enum amd_gfx_level gfx_level, enum glsl_sampler_dim dim,
|
||||
bool is_array);
|
||||
|
||||
enum ac_image_dim ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
|
||||
enum ac_image_dim ac_get_image_dim(enum amd_gfx_level gfx_level, enum glsl_sampler_dim sdim,
|
||||
bool is_array);
|
||||
|
||||
unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
|
||||
|
@ -119,12 +119,12 @@ void ac_compute_late_alloc(const struct radeon_info *info, bool ngg, bool ngg_cu
|
|||
|
||||
unsigned ac_compute_cs_workgroup_size(uint16_t sizes[3], bool variable, unsigned max);
|
||||
|
||||
unsigned ac_compute_lshs_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
|
||||
unsigned ac_compute_lshs_workgroup_size(enum amd_gfx_level gfx_level, gl_shader_stage stage,
|
||||
unsigned tess_num_patches,
|
||||
unsigned tess_patch_in_vtx,
|
||||
unsigned tess_patch_out_vtx);
|
||||
|
||||
unsigned ac_compute_esgs_workgroup_size(enum chip_class chip_class, unsigned wave_size,
|
||||
unsigned ac_compute_esgs_workgroup_size(enum amd_gfx_level gfx_level, unsigned wave_size,
|
||||
unsigned es_verts, unsigned gs_inst_prims);
|
||||
|
||||
unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims,
|
||||
|
|
|
@ -1240,7 +1240,7 @@ static const struct ac_reg_range Gfx11NonShadowedRanges[] =
|
|||
},
|
||||
};
|
||||
|
||||
void ac_get_reg_ranges(enum chip_class chip_class, enum radeon_family family,
|
||||
void ac_get_reg_ranges(enum amd_gfx_level gfx_level, enum radeon_family family,
|
||||
enum ac_reg_range_type type, unsigned *num_ranges,
|
||||
const struct ac_reg_range **ranges)
|
||||
{
|
||||
|
@ -1255,51 +1255,51 @@ void ac_get_reg_ranges(enum chip_class chip_class, enum radeon_family family,
|
|||
|
||||
switch (type) {
|
||||
case SI_REG_RANGE_UCONFIG:
|
||||
if (chip_class == GFX11)
|
||||
if (gfx_level == GFX11)
|
||||
RETURN(Gfx11UserConfigShadowRange);
|
||||
else if (chip_class == GFX10_3)
|
||||
else if (gfx_level == GFX10_3)
|
||||
RETURN(Gfx103UserConfigShadowRange);
|
||||
else if (chip_class == GFX10)
|
||||
else if (gfx_level == GFX10)
|
||||
RETURN(Nv10UserConfigShadowRange);
|
||||
else if (chip_class == GFX9)
|
||||
else if (gfx_level == GFX9)
|
||||
RETURN(Gfx9UserConfigShadowRange);
|
||||
break;
|
||||
case SI_REG_RANGE_CONTEXT:
|
||||
if (chip_class == GFX11)
|
||||
if (gfx_level == GFX11)
|
||||
RETURN(Gfx11ContextShadowRange);
|
||||
else if (chip_class == GFX10_3)
|
||||
else if (gfx_level == GFX10_3)
|
||||
RETURN(Gfx103ContextShadowRange);
|
||||
else if (chip_class == GFX10)
|
||||
else if (gfx_level == GFX10)
|
||||
RETURN(Nv10ContextShadowRange);
|
||||
else if (chip_class == GFX9)
|
||||
else if (gfx_level == GFX9)
|
||||
RETURN(Gfx9ContextShadowRange);
|
||||
break;
|
||||
case SI_REG_RANGE_SH:
|
||||
if (chip_class == GFX11)
|
||||
if (gfx_level == GFX11)
|
||||
RETURN(Gfx11ShShadowRange);
|
||||
else if (chip_class == GFX10_3 || chip_class == GFX10)
|
||||
else if (gfx_level == GFX10_3 || gfx_level == GFX10)
|
||||
RETURN(Gfx10ShShadowRange);
|
||||
else if (family == CHIP_RAVEN2 || family == CHIP_RENOIR)
|
||||
RETURN(Gfx9ShShadowRangeRaven2);
|
||||
else if (chip_class == GFX9)
|
||||
else if (gfx_level == GFX9)
|
||||
RETURN(Gfx9ShShadowRange);
|
||||
break;
|
||||
case SI_REG_RANGE_CS_SH:
|
||||
if (chip_class == GFX11)
|
||||
if (gfx_level == GFX11)
|
||||
RETURN(Gfx11CsShShadowRange);
|
||||
else if (chip_class == GFX10_3 || chip_class == GFX10)
|
||||
else if (gfx_level == GFX10_3 || gfx_level == GFX10)
|
||||
RETURN(Gfx10CsShShadowRange);
|
||||
else if (family == CHIP_RAVEN2 || family == CHIP_RENOIR)
|
||||
RETURN(Gfx9CsShShadowRangeRaven2);
|
||||
else if (chip_class == GFX9)
|
||||
else if (gfx_level == GFX9)
|
||||
RETURN(Gfx9CsShShadowRange);
|
||||
break;
|
||||
case SI_REG_RANGE_NON_SHADOWED:
|
||||
if (chip_class == GFX11)
|
||||
if (gfx_level == GFX11)
|
||||
RETURN(Gfx11NonShadowedRanges);
|
||||
else if (chip_class == GFX10_3)
|
||||
else if (gfx_level == GFX10_3)
|
||||
RETURN(Gfx103NonShadowedRanges);
|
||||
else if (chip_class == GFX10)
|
||||
else if (gfx_level == GFX10)
|
||||
RETURN(Navi10NonShadowedRanges);
|
||||
else
|
||||
assert(0);
|
||||
|
@ -4031,13 +4031,13 @@ void ac_emulate_clear_state(const struct radeon_info *info, struct radeon_cmdbuf
|
|||
unsigned reg_offset = R_02835C_PA_SC_TILE_STEERING_OVERRIDE;
|
||||
uint32_t reg_value = info->pa_sc_tile_steering_override;
|
||||
|
||||
if (info->chip_class >= GFX11) {
|
||||
if (info->gfx_level >= GFX11) {
|
||||
gfx11_emulate_clear_state(cs, 1, ®_offset, ®_value, set_context_reg_seq_array);
|
||||
} else if (info->chip_class == GFX10_3) {
|
||||
} else if (info->gfx_level == GFX10_3) {
|
||||
gfx103_emulate_clear_state(cs, 1, ®_offset, ®_value, set_context_reg_seq_array);
|
||||
} else if (info->chip_class == GFX10) {
|
||||
} else if (info->gfx_level == GFX10) {
|
||||
gfx10_emulate_clear_state(cs, 1, ®_offset, ®_value, set_context_reg_seq_array);
|
||||
} else if (info->chip_class == GFX9) {
|
||||
} else if (info->gfx_level == GFX9) {
|
||||
gfx9_emulate_clear_state(cs, set_context_reg_seq_array);
|
||||
} else {
|
||||
unreachable("unimplemented");
|
||||
|
@ -4047,7 +4047,7 @@ void ac_emulate_clear_state(const struct radeon_info *info, struct radeon_cmdbuf
|
|||
/* Debug helper to find if any registers are missing in the tables above.
|
||||
* Call this in the driver whenever you set a register.
|
||||
*/
|
||||
void ac_check_shadowed_regs(enum chip_class chip_class, enum radeon_family family,
|
||||
void ac_check_shadowed_regs(enum amd_gfx_level gfx_level, enum radeon_family family,
|
||||
unsigned reg_offset, unsigned count)
|
||||
{
|
||||
bool found = false;
|
||||
|
@ -4057,7 +4057,7 @@ void ac_check_shadowed_regs(enum chip_class chip_class, enum radeon_family famil
|
|||
const struct ac_reg_range *ranges;
|
||||
unsigned num_ranges;
|
||||
|
||||
ac_get_reg_ranges(chip_class, family, type, &num_ranges, &ranges);
|
||||
ac_get_reg_ranges(gfx_level, family, type, &num_ranges, &ranges);
|
||||
|
||||
for (unsigned i = 0; i < num_ranges; i++) {
|
||||
unsigned end_reg_offset = reg_offset + count * 4;
|
||||
|
@ -4080,10 +4080,10 @@ void ac_check_shadowed_regs(enum chip_class chip_class, enum radeon_family famil
|
|||
if (!found || !shadowed) {
|
||||
printf("register %s: ", !found ? "not found" : "not shadowed");
|
||||
if (count > 1) {
|
||||
printf("%s .. %s\n", ac_get_register_name(chip_class, reg_offset),
|
||||
ac_get_register_name(chip_class, reg_offset + (count - 1) * 4));
|
||||
printf("%s .. %s\n", ac_get_register_name(gfx_level, reg_offset),
|
||||
ac_get_register_name(gfx_level, reg_offset + (count - 1) * 4));
|
||||
} else {
|
||||
printf("%s\n", ac_get_register_name(chip_class, reg_offset));
|
||||
printf("%s\n", ac_get_register_name(gfx_level, reg_offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4102,13 +4102,13 @@ void ac_print_shadowed_regs(const struct radeon_info *info)
|
|||
const struct ac_reg_range *ranges;
|
||||
unsigned num_ranges;
|
||||
|
||||
ac_get_reg_ranges(info->chip_class, info->family, type, &num_ranges, &ranges);
|
||||
ac_get_reg_ranges(info->gfx_level, info->family, type, &num_ranges, &ranges);
|
||||
|
||||
for (unsigned i = 0; i < num_ranges; i++) {
|
||||
for (unsigned j = 0; j < ranges[i].size / 4; j++) {
|
||||
unsigned offset = ranges[i].offset + j * 4;
|
||||
|
||||
const char *name = ac_get_register_name(info->chip_class, offset);
|
||||
const char *name = ac_get_register_name(info->gfx_level, offset);
|
||||
unsigned value = -1;
|
||||
|
||||
#ifndef _WIN32
|
||||
|
|
|
@ -54,12 +54,12 @@ extern "C" {
|
|||
typedef void (*set_context_reg_seq_array_fn)(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
|
||||
const uint32_t *values);
|
||||
|
||||
void ac_get_reg_ranges(enum chip_class chip_class, enum radeon_family family,
|
||||
void ac_get_reg_ranges(enum amd_gfx_level gfx_level, enum radeon_family family,
|
||||
enum ac_reg_range_type type, unsigned *num_ranges,
|
||||
const struct ac_reg_range **ranges);
|
||||
void ac_emulate_clear_state(const struct radeon_info *info, struct radeon_cmdbuf *cs,
|
||||
set_context_reg_seq_array_fn set_context_reg_seq_array);
|
||||
void ac_check_shadowed_regs(enum chip_class chip_class, enum radeon_family family,
|
||||
void ac_check_shadowed_regs(enum amd_gfx_level gfx_level, enum radeon_family family,
|
||||
unsigned reg_offset, unsigned count);
|
||||
void ac_print_shadowed_regs(const struct radeon_info *info);
|
||||
|
||||
|
|
|
@ -67,7 +67,7 @@ ac_is_thread_trace_complete(struct radeon_info *rad_info,
|
|||
const struct ac_thread_trace_data *data,
|
||||
const struct ac_thread_trace_info *info)
|
||||
{
|
||||
if (rad_info->chip_class >= GFX10) {
|
||||
if (rad_info->gfx_level >= GFX10) {
|
||||
/* GFX10 doesn't have THREAD_TRACE_CNTR but it reports the number of
|
||||
* dropped bytes per SE via THREAD_TRACE_DROPPED_CNTR. Though, this
|
||||
* doesn't seem reliable because it might still report non-zero even if
|
||||
|
@ -90,7 +90,7 @@ uint32_t
|
|||
ac_get_expected_buffer_size(struct radeon_info *rad_info,
|
||||
const struct ac_thread_trace_info *info)
|
||||
{
|
||||
if (rad_info->chip_class >= GFX10) {
|
||||
if (rad_info->gfx_level >= GFX10) {
|
||||
uint32_t dropped_cntr_per_se = info->gfx10_dropped_cntr / rad_info->max_se;
|
||||
return ((info->cur_offset * 32) + dropped_cntr_per_se) / 1024;
|
||||
}
|
||||
|
|
|
@ -122,11 +122,11 @@ bool ac_modifier_supports_dcc_image_stores(uint64_t modifier)
|
|||
}
|
||||
|
||||
|
||||
bool ac_surface_supports_dcc_image_stores(enum chip_class chip_class,
|
||||
bool ac_surface_supports_dcc_image_stores(enum amd_gfx_level gfx_level,
|
||||
const struct radeon_surf *surf)
|
||||
{
|
||||
/* DCC image stores is only available for GFX10+. */
|
||||
if (chip_class < GFX10)
|
||||
if (gfx_level < GFX10)
|
||||
return false;
|
||||
|
||||
/* DCC image stores support the following settings:
|
||||
|
@ -151,7 +151,7 @@ bool ac_surface_supports_dcc_image_stores(enum chip_class chip_class,
|
|||
return (!surf->u.gfx9.color.dcc.independent_64B_blocks &&
|
||||
surf->u.gfx9.color.dcc.independent_128B_blocks &&
|
||||
surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_128B) ||
|
||||
(chip_class >= GFX10_3 && /* gfx10.3 */
|
||||
(gfx_level >= GFX10_3 && /* gfx10.3 */
|
||||
surf->u.gfx9.color.dcc.independent_64B_blocks &&
|
||||
surf->u.gfx9.color.dcc.independent_128B_blocks &&
|
||||
surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
|
||||
|
@ -199,18 +199,18 @@ bool ac_is_modifier_supported(const struct radeon_info *info,
|
|||
util_format_get_blocksizebits(format) > 64)
|
||||
return false;
|
||||
|
||||
if (info->chip_class < GFX9)
|
||||
if (info->gfx_level < GFX9)
|
||||
return false;
|
||||
|
||||
if(modifier == DRM_FORMAT_MOD_LINEAR)
|
||||
return true;
|
||||
|
||||
/* GFX8 may need a different modifier for each plane */
|
||||
if (info->chip_class < GFX9 && util_format_get_num_planes(format) > 1)
|
||||
if (info->gfx_level < GFX9 && util_format_get_num_planes(format) > 1)
|
||||
return false;
|
||||
|
||||
uint32_t allowed_swizzles = 0xFFFFFFFF;
|
||||
switch(info->chip_class) {
|
||||
switch(info->gfx_level) {
|
||||
case GFX9:
|
||||
allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x06000000 : 0x06660660;
|
||||
break;
|
||||
|
@ -264,7 +264,7 @@ bool ac_get_supported_modifiers(const struct radeon_info *info,
|
|||
/* The modifiers have to be added in descending order of estimated
|
||||
* performance. The drivers will prefer modifiers that come earlier
|
||||
* in the list. */
|
||||
switch (info->chip_class) {
|
||||
switch (info->gfx_level) {
|
||||
case GFX9: {
|
||||
unsigned pipe_xor_bits = MIN2(G_0098F8_NUM_PIPES(info->gb_addr_config) +
|
||||
G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config), 8);
|
||||
|
@ -340,7 +340,7 @@ bool ac_get_supported_modifiers(const struct radeon_info *info,
|
|||
}
|
||||
case GFX10:
|
||||
case GFX10_3: {
|
||||
bool rbplus = info->chip_class >= GFX10_3;
|
||||
bool rbplus = info->gfx_level >= GFX10_3;
|
||||
unsigned pipe_xor_bits = G_0098F8_NUM_PIPES(info->gb_addr_config);
|
||||
unsigned pkrs = rbplus ? G_0098F8_NUM_PKRS(info->gb_addr_config) : 0;
|
||||
|
||||
|
@ -357,7 +357,7 @@ bool ac_get_supported_modifiers(const struct radeon_info *info,
|
|||
AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
|
||||
AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B))
|
||||
|
||||
if (info->chip_class >= GFX10_3) {
|
||||
if (info->gfx_level >= GFX10_3) {
|
||||
if (info->max_render_backends == 1) {
|
||||
ADD_MOD(AMD_FMT_MOD | common_dcc |
|
||||
AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
|
||||
|
@ -370,8 +370,8 @@ bool ac_get_supported_modifiers(const struct radeon_info *info,
|
|||
AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B))
|
||||
}
|
||||
|
||||
if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14 || info->chip_class >= GFX10_3) {
|
||||
bool independent_128b = info->chip_class >= GFX10_3;
|
||||
if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14 || info->gfx_level >= GFX10_3) {
|
||||
bool independent_128b = info->gfx_level >= GFX10_3;
|
||||
|
||||
if (info->max_render_backends == 1) {
|
||||
ADD_MOD(AMD_FMT_MOD | common_dcc |
|
||||
|
@ -855,7 +855,7 @@ static void gfx6_set_micro_tile_mode(struct radeon_surf *surf, const struct rade
|
|||
{
|
||||
uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
|
||||
|
||||
if (info->chip_class >= GFX7)
|
||||
if (info->gfx_level >= GFX7)
|
||||
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
|
||||
else
|
||||
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
|
||||
|
@ -934,7 +934,7 @@ static int gfx6_surface_settings(ADDR_HANDLE addrlib, const struct radeon_info *
|
|||
|
||||
/* Compute tile swizzle. */
|
||||
/* TODO: fix tile swizzle with mipmapping for GFX6 */
|
||||
if ((info->chip_class >= GFX7 || config->info.levels == 1) && config->info.surf_index &&
|
||||
if ((info->gfx_level >= GFX7 || config->info.levels == 1) && config->info.surf_index &&
|
||||
surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
|
||||
!(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&
|
||||
!get_display_flag(config, surf)) {
|
||||
|
@ -972,7 +972,7 @@ static void ac_compute_cmask(const struct radeon_info *info, const struct ac_sur
|
|||
(config->info.samples >= 2 && !surf->fmask_size))
|
||||
return;
|
||||
|
||||
assert(info->chip_class <= GFX8);
|
||||
assert(info->gfx_level <= GFX8);
|
||||
|
||||
switch (num_pipes) {
|
||||
case 2:
|
||||
|
@ -1138,7 +1138,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *i
|
|||
* driver team).
|
||||
*/
|
||||
AddrSurfInfoIn.flags.dccCompatible =
|
||||
info->chip_class >= GFX8 && info->has_graphics && /* disable DCC on compute-only chips */
|
||||
info->gfx_level >= GFX8 && info->has_graphics && /* disable DCC on compute-only chips */
|
||||
!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
|
||||
!compressed &&
|
||||
((config->info.array_size == 1 && config->info.depth == 1) || config->info.levels == 1);
|
||||
|
@ -1203,7 +1203,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *i
|
|||
assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
|
||||
assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
|
||||
|
||||
if (info->chip_class == GFX6) {
|
||||
if (info->gfx_level == GFX6) {
|
||||
if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
|
||||
if (surf->bpe == 2)
|
||||
AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
|
||||
|
@ -1443,7 +1443,7 @@ static int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, const struct rad
|
|||
/* TODO: We could allow some of these: */
|
||||
sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */
|
||||
|
||||
if (info->chip_class >= GFX11) {
|
||||
if (info->gfx_level >= GFX11) {
|
||||
if ((1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) <= 16) {
|
||||
sin.forbiddenBlock.gfx11.thin256KB = 1;
|
||||
sin.forbiddenBlock.gfx11.thick256KB = 1;
|
||||
|
@ -1472,7 +1472,7 @@ static int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, const struct rad
|
|||
if (sin.flags.prt) {
|
||||
sin.forbiddenBlock.macroThin4KB = 1;
|
||||
sin.forbiddenBlock.macroThick4KB = 1;
|
||||
if (info->chip_class >= GFX11) {
|
||||
if (info->gfx_level >= GFX11) {
|
||||
sin.forbiddenBlock.gfx11.thin256KB = 1;
|
||||
sin.forbiddenBlock.gfx11.thick256KB = 1;
|
||||
}
|
||||
|
@ -1492,7 +1492,7 @@ static int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, const struct rad
|
|||
sin.preferredSwSet.sw_R = 1;
|
||||
}
|
||||
|
||||
if (info->chip_class >= GFX10 && in->resourceType == ADDR_RSRC_TEX_3D && in->numSlices > 1) {
|
||||
if (info->gfx_level >= GFX10 && in->resourceType == ADDR_RSRC_TEX_3D && in->numSlices > 1) {
|
||||
/* 3D textures should use S swizzle modes for the best performance.
|
||||
* THe only exception is 3D render targets, which prefer 64KB_D_X.
|
||||
*
|
||||
|
@ -1519,11 +1519,11 @@ static int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, const struct rad
|
|||
|
||||
static bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_mode)
|
||||
{
|
||||
if (info->chip_class >= GFX11)
|
||||
if (info->gfx_level >= GFX11)
|
||||
return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X ||
|
||||
sw_mode == ADDR_SW_256KB_Z_X || sw_mode == ADDR_SW_256KB_R_X;
|
||||
|
||||
if (info->chip_class >= GFX10)
|
||||
if (info->gfx_level >= GFX10)
|
||||
return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X;
|
||||
|
||||
return sw_mode != ADDR_SW_LINEAR;
|
||||
|
@ -1532,7 +1532,7 @@ static bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_m
|
|||
ASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info,
|
||||
const struct radeon_surf *surf)
|
||||
{
|
||||
if (info->chip_class <= GFX9) {
|
||||
if (info->gfx_level <= GFX9) {
|
||||
/* Only independent 64B blocks are supported. */
|
||||
return surf->u.gfx9.color.dcc.independent_64B_blocks && !surf->u.gfx9.color.dcc.independent_128B_blocks &&
|
||||
surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B;
|
||||
|
@ -1567,7 +1567,7 @@ ASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info,
|
|||
static bool gfx10_DCN_requires_independent_64B_blocks(const struct radeon_info *info,
|
||||
const struct ac_surf_config *config)
|
||||
{
|
||||
assert(info->chip_class >= GFX10);
|
||||
assert(info->gfx_level >= GFX10);
|
||||
|
||||
/* Older kernels have buggy DAL. */
|
||||
if (info->drm_minor <= 43)
|
||||
|
@ -1589,7 +1589,7 @@ void ac_modifier_max_extent(const struct radeon_info *info,
|
|||
if (ac_modifier_has_dcc(modifier)) {
|
||||
bool independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);
|
||||
|
||||
if (info->chip_class >= GFX10 && !independent_64B_blocks) {
|
||||
if (info->gfx_level >= GFX10 && !independent_64B_blocks) {
|
||||
/* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */
|
||||
*width = 2560;
|
||||
*height = 2560;
|
||||
|
@ -1613,7 +1613,7 @@ static bool is_dcc_supported_by_DCN(const struct radeon_info *info,
|
|||
if (info->use_display_dcc_unaligned && (rb_aligned || pipe_aligned))
|
||||
return false;
|
||||
|
||||
switch (info->chip_class) {
|
||||
switch (info->gfx_level) {
|
||||
case GFX6:
|
||||
case GFX7:
|
||||
case GFX8:
|
||||
|
@ -1631,7 +1631,7 @@ static bool is_dcc_supported_by_DCN(const struct radeon_info *info,
|
|||
case GFX10_3:
|
||||
case GFX11:
|
||||
/* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */
|
||||
if (info->chip_class == GFX10 && surf->u.gfx9.color.dcc.independent_128B_blocks)
|
||||
if (info->gfx_level == GFX10 && surf->u.gfx9.color.dcc.independent_128B_blocks)
|
||||
return false;
|
||||
|
||||
return (!gfx10_DCN_requires_independent_64B_blocks(info, config) ||
|
||||
|
@ -1651,7 +1651,7 @@ static void ac_copy_dcc_equation(const struct radeon_info *info,
|
|||
equation->meta_block_height = dcc->metaBlkHeight;
|
||||
equation->meta_block_depth = dcc->metaBlkDepth;
|
||||
|
||||
if (info->chip_class >= GFX10) {
|
||||
if (info->gfx_level >= GFX10) {
|
||||
/* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
assert(dcc->equation.gfx10_bits[i] == 0);
|
||||
|
@ -1683,7 +1683,7 @@ static void ac_copy_cmask_equation(const struct radeon_info *info,
|
|||
equation->meta_block_height = cmask->metaBlkHeight;
|
||||
equation->meta_block_depth = 1;
|
||||
|
||||
if (info->chip_class == GFX9) {
|
||||
if (info->gfx_level == GFX9) {
|
||||
assert(cmask->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit));
|
||||
|
||||
equation->u.gfx9.num_bits = cmask->equation.gfx9.num_bits;
|
||||
|
@ -1740,7 +1740,7 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
|
|||
for (unsigned i = 0; i < in->numMipLevels; i++) {
|
||||
surf->u.gfx9.prt_level_offset[i] = mip_info[i].macroBlockOffset + mip_info[i].mipTailOffset;
|
||||
|
||||
if (info->chip_class >= GFX10)
|
||||
if (info->gfx_level >= GFX10)
|
||||
surf->u.gfx9.prt_level_pitch[i] = mip_info[i].pitch;
|
||||
else
|
||||
surf->u.gfx9.prt_level_pitch[i] = out.mipChainPitch;
|
||||
|
@ -1857,7 +1857,7 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
|
|||
if (!surf->num_meta_levels)
|
||||
surf->meta_size = 0;
|
||||
|
||||
if (info->chip_class >= GFX10)
|
||||
if (info->gfx_level >= GFX10)
|
||||
ac_copy_htile_equation(info, &hout, &surf->u.gfx9.zs.htile_equation);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1919,10 +1919,10 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
|
|||
din.dataSurfaceSize = out.surfSize;
|
||||
din.firstMipIdInTail = out.firstMipIdInTail;
|
||||
|
||||
if (info->chip_class == GFX9)
|
||||
if (info->gfx_level == GFX9)
|
||||
simple_mtx_lock(&addrlib->lock);
|
||||
ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
|
||||
if (info->chip_class == GFX9)
|
||||
if (info->gfx_level == GFX9)
|
||||
simple_mtx_unlock(&addrlib->lock);
|
||||
|
||||
if (ret != ADDR_OK)
|
||||
|
@ -1973,7 +1973,7 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
|
|||
* TODO: Try to do the same thing for gfx9
|
||||
* if there are no regressions.
|
||||
*/
|
||||
if (info->chip_class >= GFX10)
|
||||
if (info->gfx_level >= GFX10)
|
||||
surf->num_meta_levels = i + 1;
|
||||
else
|
||||
surf->num_meta_levels = i;
|
||||
|
@ -2005,10 +2005,10 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
|
|||
assert(surf->tile_swizzle == 0);
|
||||
assert(surf->u.gfx9.color.dcc.pipe_aligned || surf->u.gfx9.color.dcc.rb_aligned);
|
||||
|
||||
if (info->chip_class == GFX9)
|
||||
if (info->gfx_level == GFX9)
|
||||
simple_mtx_lock(&addrlib->lock);
|
||||
ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
|
||||
if (info->chip_class == GFX9)
|
||||
if (info->gfx_level == GFX9)
|
||||
simple_mtx_unlock(&addrlib->lock);
|
||||
|
||||
if (ret != ADDR_OK)
|
||||
|
@ -2026,7 +2026,7 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
|
|||
}
|
||||
|
||||
/* FMASK (it doesn't exist on GFX11) */
|
||||
if (info->chip_class <= GFX10_3 && info->has_graphics &&
|
||||
if (info->gfx_level <= GFX10_3 && info->has_graphics &&
|
||||
in->numSamples > 1 && !(surf->flags & RADEON_SURF_NO_FMASK)) {
|
||||
ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0};
|
||||
ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
|
||||
|
@ -2082,9 +2082,9 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
|
|||
}
|
||||
|
||||
/* CMASK -- on GFX10 only for FMASK (and it doesn't exist on GFX11) */
|
||||
if (info->chip_class <= GFX10_3 && info->has_graphics &&
|
||||
if (info->gfx_level <= GFX10_3 && info->has_graphics &&
|
||||
in->swizzleMode != ADDR_SW_LINEAR && in->resourceType == ADDR_RSRC_TEX_2D &&
|
||||
((info->chip_class <= GFX9 && in->numSamples == 1 && in->flags.metaPipeUnaligned == 0 &&
|
||||
((info->gfx_level <= GFX9 && in->numSamples == 1 && in->flags.metaPipeUnaligned == 0 &&
|
||||
in->flags.metaRbUnaligned == 0) ||
|
||||
(surf->fmask_size && in->numSamples >= 2))) {
|
||||
ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
|
||||
|
@ -2112,10 +2112,10 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
|
|||
else
|
||||
cin.swizzleMode = in->swizzleMode;
|
||||
|
||||
if (info->chip_class == GFX9)
|
||||
if (info->gfx_level == GFX9)
|
||||
simple_mtx_lock(&addrlib->lock);
|
||||
ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout);
|
||||
if (info->chip_class == GFX9)
|
||||
if (info->gfx_level == GFX9)
|
||||
simple_mtx_unlock(&addrlib->lock);
|
||||
|
||||
if (ret != ADDR_OK)
|
||||
|
@ -2214,7 +2214,7 @@ static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_
|
|||
* must sample 1D textures as 2D. */
|
||||
if (config->is_3d)
|
||||
AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
|
||||
else if (info->chip_class != GFX9 && config->is_1d)
|
||||
else if (info->gfx_level != GFX9 && config->is_1d)
|
||||
AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D;
|
||||
else
|
||||
AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D;
|
||||
|
@ -2239,11 +2239,11 @@ static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_
|
|||
/* Optimal values for the L2 cache. */
|
||||
/* Don't change the DCC settings for imported buffers - they might differ. */
|
||||
if (!(surf->flags & RADEON_SURF_IMPORTED)) {
|
||||
if (info->chip_class == GFX9) {
|
||||
if (info->gfx_level == GFX9) {
|
||||
surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
|
||||
surf->u.gfx9.color.dcc.independent_128B_blocks = 0;
|
||||
surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
|
||||
} else if (info->chip_class >= GFX10) {
|
||||
} else if (info->gfx_level >= GFX10) {
|
||||
surf->u.gfx9.color.dcc.independent_64B_blocks = 0;
|
||||
surf->u.gfx9.color.dcc.independent_128B_blocks = 1;
|
||||
surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
|
||||
|
@ -2276,7 +2276,7 @@ static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_
|
|||
surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
|
||||
}
|
||||
|
||||
if ((info->chip_class >= GFX10_3 && info->family <= CHIP_YELLOW_CARP) ||
|
||||
if ((info->gfx_level >= GFX10_3 && info->family <= CHIP_YELLOW_CARP) ||
|
||||
/* Newer chips will skip this when possible to get better performance.
|
||||
* This is also possible for other gfx10.3 chips, but is disabled for
|
||||
* interoperability between different Mesa versions.
|
||||
|
@ -2302,7 +2302,7 @@ static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_
|
|||
case RADEON_SURF_MODE_1D:
|
||||
case RADEON_SURF_MODE_2D:
|
||||
if (surf->flags & RADEON_SURF_IMPORTED ||
|
||||
(info->chip_class >= GFX10 && surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) {
|
||||
(info->gfx_level >= GFX10 && surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) {
|
||||
AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode;
|
||||
break;
|
||||
}
|
||||
|
@ -2465,7 +2465,7 @@ static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_
|
|||
* used at the same time. We currently do not use rotated
|
||||
* in gfx9.
|
||||
*/
|
||||
assert(info->chip_class >= GFX10 || !"rotate micro tile mode is unsupported");
|
||||
assert(info->gfx_level >= GFX10 || !"rotate micro tile mode is unsupported");
|
||||
surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER;
|
||||
break;
|
||||
|
||||
|
@ -2530,11 +2530,11 @@ int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *inf
|
|||
|
||||
if (surf->meta_size &&
|
||||
/* dcc_size is computed on GFX9+ only if it's displayable. */
|
||||
(info->chip_class >= GFX9 || !get_display_flag(config, surf))) {
|
||||
(info->gfx_level >= GFX9 || !get_display_flag(config, surf))) {
|
||||
/* It's better when displayable DCC is immediately after
|
||||
* the image due to hw-specific reasons.
|
||||
*/
|
||||
if (info->chip_class >= GFX9 &&
|
||||
if (info->gfx_level >= GFX9 &&
|
||||
!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
|
||||
surf->u.gfx9.color.dcc.display_equation_valid) {
|
||||
/* Add space for the displayable DCC buffer. */
|
||||
|
@ -2623,7 +2623,7 @@ void ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_su
|
|||
{
|
||||
bool scanout;
|
||||
|
||||
if (info->chip_class >= GFX9) {
|
||||
if (info->gfx_level >= GFX9) {
|
||||
surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
|
||||
surf->u.gfx9.color.dcc.independent_64B_blocks =
|
||||
AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);
|
||||
|
@ -2663,7 +2663,7 @@ void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_su
|
|||
{
|
||||
*tiling_flags = 0;
|
||||
|
||||
if (info->chip_class >= GFX9) {
|
||||
if (info->gfx_level >= GFX9) {
|
||||
uint64_t dcc_offset = 0;
|
||||
|
||||
if (surf->meta_offset) {
|
||||
|
@ -2721,7 +2721,7 @@ bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_s
|
|||
if (surf->modifier != DRM_FORMAT_MOD_INVALID)
|
||||
return true;
|
||||
|
||||
if (info->chip_class >= GFX9)
|
||||
if (info->gfx_level >= GFX9)
|
||||
offset = surf->u.gfx9.surf_offset;
|
||||
else
|
||||
offset = (uint64_t)surf->u.legacy.level[0].offset_256B * 256;
|
||||
|
@ -2763,9 +2763,9 @@ bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_s
|
|||
}
|
||||
}
|
||||
|
||||
if (info->chip_class >= GFX8 && G_008F28_COMPRESSION_EN(desc[6])) {
|
||||
if (info->gfx_level >= GFX8 && G_008F28_COMPRESSION_EN(desc[6])) {
|
||||
/* Read DCC information. */
|
||||
switch (info->chip_class) {
|
||||
switch (info->gfx_level) {
|
||||
case GFX8:
|
||||
surf->meta_offset = (uint64_t)desc[7] << 8;
|
||||
break;
|
||||
|
@ -2811,7 +2811,7 @@ void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_s
|
|||
desc[0] = 0;
|
||||
desc[1] &= C_008F14_BASE_ADDRESS_HI;
|
||||
|
||||
switch (info->chip_class) {
|
||||
switch (info->gfx_level) {
|
||||
case GFX6:
|
||||
case GFX7:
|
||||
break;
|
||||
|
@ -2854,7 +2854,7 @@ void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_s
|
|||
*size_metadata = 10 * 4;
|
||||
|
||||
/* Dwords [10:..] contain the mipmap level offsets. */
|
||||
if (info->chip_class <= GFX8) {
|
||||
if (info->gfx_level <= GFX8) {
|
||||
for (unsigned i = 0; i < num_mipmap_levels; i++)
|
||||
metadata[10 + i] = surf->u.legacy.level[i].offset_256B;
|
||||
|
||||
|
@ -2899,9 +2899,9 @@ bool ac_surface_override_offset_stride(const struct radeon_info *info, struct ra
|
|||
*/
|
||||
bool require_equal_pitch = surf->surf_size != surf->total_size ||
|
||||
num_mipmap_levels != 1 ||
|
||||
info->chip_class >= GFX10;
|
||||
info->gfx_level >= GFX10;
|
||||
|
||||
if (info->chip_class >= GFX9) {
|
||||
if (info->gfx_level >= GFX9) {
|
||||
if (pitch) {
|
||||
if (surf->u.gfx9.surf_pitch != pitch && require_equal_pitch)
|
||||
return false;
|
||||
|
@ -2964,13 +2964,13 @@ unsigned ac_surface_get_nplanes(const struct radeon_surf *surf)
|
|||
return 1;
|
||||
}
|
||||
|
||||
uint64_t ac_surface_get_plane_offset(enum chip_class chip_class,
|
||||
uint64_t ac_surface_get_plane_offset(enum amd_gfx_level gfx_level,
|
||||
const struct radeon_surf *surf,
|
||||
unsigned plane, unsigned layer)
|
||||
{
|
||||
switch (plane) {
|
||||
case 0:
|
||||
if (chip_class >= GFX9) {
|
||||
if (gfx_level >= GFX9) {
|
||||
return surf->u.gfx9.surf_offset +
|
||||
layer * surf->u.gfx9.surf_slice_size;
|
||||
} else {
|
||||
|
@ -2989,13 +2989,13 @@ uint64_t ac_surface_get_plane_offset(enum chip_class chip_class,
|
|||
}
|
||||
}
|
||||
|
||||
uint64_t ac_surface_get_plane_stride(enum chip_class chip_class,
|
||||
uint64_t ac_surface_get_plane_stride(enum amd_gfx_level gfx_level,
|
||||
const struct radeon_surf *surf,
|
||||
unsigned plane, unsigned level)
|
||||
{
|
||||
switch (plane) {
|
||||
case 0:
|
||||
if (chip_class >= GFX9) {
|
||||
if (gfx_level >= GFX9) {
|
||||
return (surf->is_linear ? surf->u.gfx9.pitch[level] : surf->u.gfx9.surf_pitch) * surf->bpe;
|
||||
} else {
|
||||
return surf->u.legacy.level[level].nblk_x * surf->bpe;
|
||||
|
@ -3029,7 +3029,7 @@ uint64_t ac_surface_get_plane_size(const struct radeon_surf *surf,
|
|||
void ac_surface_print_info(FILE *out, const struct radeon_info *info,
|
||||
const struct radeon_surf *surf)
|
||||
{
|
||||
if (info->chip_class >= GFX9) {
|
||||
if (info->gfx_level >= GFX9) {
|
||||
fprintf(out,
|
||||
" Surf: size=%" PRIu64 ", slice_size=%" PRIu64 ", "
|
||||
"alignment=%u, swmode=%u, epitch=%u, pitch=%u, blk_w=%u, "
|
||||
|
@ -3133,7 +3133,7 @@ static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct
|
|||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_ssa_def *one = nir_imm_int(b, 1);
|
||||
|
||||
assert(info->chip_class >= GFX10);
|
||||
assert(info->gfx_level >= GFX10);
|
||||
|
||||
unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width);
|
||||
unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);
|
||||
|
@ -3188,7 +3188,7 @@ static nir_ssa_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct r
|
|||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_ssa_def *one = nir_imm_int(b, 1);
|
||||
|
||||
assert(info->chip_class >= GFX9);
|
||||
assert(info->gfx_level >= GFX9);
|
||||
|
||||
unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width);
|
||||
unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);
|
||||
|
@ -3253,7 +3253,7 @@ nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info
|
|||
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
|
||||
nir_ssa_def *sample, nir_ssa_def *pipe_xor)
|
||||
{
|
||||
if (info->chip_class >= GFX10) {
|
||||
if (info->gfx_level >= GFX10) {
|
||||
unsigned bpp_log2 = util_logbase2(bpe);
|
||||
|
||||
return gfx10_nir_meta_addr_from_coord(b, info, equation, bpp_log2 - 8, 1,
|
||||
|
@ -3276,7 +3276,7 @@ nir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_in
|
|||
{
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
if (info->chip_class >= GFX10) {
|
||||
if (info->gfx_level >= GFX10) {
|
||||
return gfx10_nir_meta_addr_from_coord(b, info, equation, -7, 1,
|
||||
cmask_pitch, cmask_slice_size,
|
||||
x, y, z, pipe_xor, bit_position);
|
||||
|
|
|
@ -460,10 +460,10 @@ void ac_modifier_max_extent(const struct radeon_info *info,
|
|||
uint64_t modifier, uint32_t *width, uint32_t *height);
|
||||
|
||||
unsigned ac_surface_get_nplanes(const struct radeon_surf *surf);
|
||||
uint64_t ac_surface_get_plane_offset(enum chip_class chip_class,
|
||||
uint64_t ac_surface_get_plane_offset(enum amd_gfx_level gfx_level,
|
||||
const struct radeon_surf *surf,
|
||||
unsigned plane, unsigned layer);
|
||||
uint64_t ac_surface_get_plane_stride(enum chip_class chip_class,
|
||||
uint64_t ac_surface_get_plane_stride(enum amd_gfx_level gfx_level,
|
||||
const struct radeon_surf *surf,
|
||||
unsigned plane, unsigned level);
|
||||
/* Of the whole miplevel, not an individual layer */
|
||||
|
@ -473,7 +473,7 @@ uint64_t ac_surface_get_plane_size(const struct radeon_surf *surf,
|
|||
void ac_surface_print_info(FILE *out, const struct radeon_info *info,
|
||||
const struct radeon_surf *surf);
|
||||
|
||||
bool ac_surface_supports_dcc_image_stores(enum chip_class chip_class,
|
||||
bool ac_surface_supports_dcc_image_stores(enum amd_gfx_level gfx_level,
|
||||
const struct radeon_surf *surf);
|
||||
|
||||
#ifdef AC_SURFACE_INCLUDE_NIR
|
||||
|
|
|
@ -261,7 +261,7 @@ static bool one_dcc_address_test(const char *name, const char *test, ADDR_HANDLE
|
|||
|
||||
/* Validate that the packed gfx9_meta_equation structure can fit all fields. */
|
||||
const struct gfx9_meta_equation eq;
|
||||
if (info->chip_class == GFX9) {
|
||||
if (info->gfx_level == GFX9) {
|
||||
/* The bit array is smaller in gfx9_meta_equation than in addrlib. */
|
||||
assert(dout.equation.gfx9.num_bits <= ARRAY_SIZE(eq.u.gfx9.bit));
|
||||
} else {
|
||||
|
@ -284,7 +284,7 @@ static bool one_dcc_address_test(const char *name, const char *test, ADDR_HANDLE
|
|||
}
|
||||
|
||||
unsigned addr;
|
||||
if (info->chip_class == GFX9) {
|
||||
if (info->gfx_level == GFX9) {
|
||||
addr = gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
|
||||
dout.metaBlkDepth, dout.pitch, dout.height,
|
||||
in.x, in.y, in.slice, in.sample, in.pipeXor, NULL);
|
||||
|
@ -321,7 +321,7 @@ static void run_dcc_address_test(const char *name, const struct radeon_info *inf
|
|||
unsigned last_size, max_samples, min_bpp, max_bpp;
|
||||
unsigned swizzle_modes[2], num_swizzle_modes = 0;
|
||||
|
||||
switch (info->chip_class) {
|
||||
switch (info->gfx_level) {
|
||||
case GFX9:
|
||||
swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_S_X;
|
||||
break;
|
||||
|
@ -334,7 +334,7 @@ static void run_dcc_address_test(const char *name, const struct radeon_info *inf
|
|||
swizzle_modes[num_swizzle_modes++] = ADDR_SW_256KB_R_X;
|
||||
break;
|
||||
default:
|
||||
unreachable("unhandled gfx version");
|
||||
unreachable("unhandled gfx level");
|
||||
}
|
||||
|
||||
if (full) {
|
||||
|
@ -366,7 +366,7 @@ static void run_dcc_address_test(const char *name, const struct radeon_info *inf
|
|||
for (unsigned swizzle_mode = 0; swizzle_mode < num_swizzle_modes; swizzle_mode++) {
|
||||
for (unsigned bpp = min_bpp; bpp <= max_bpp; bpp *= 2) {
|
||||
/* addrlib can do DccAddrFromCoord with MSAA images only on gfx9 */
|
||||
for (unsigned samples = 1; samples <= (info->chip_class == GFX9 ? max_samples : 1); samples *= 2) {
|
||||
for (unsigned samples = 1; samples <= (info->gfx_level == GFX9 ? max_samples : 1); samples *= 2) {
|
||||
for (int rb_aligned = true; rb_aligned >= (samples > 1 ? true : false); rb_aligned--) {
|
||||
for (int pipe_aligned = true; pipe_aligned >= (samples > 1 ? true : false); pipe_aligned--) {
|
||||
for (unsigned mrt_index = 0; mrt_index < 2; mrt_index++) {
|
||||
|
@ -501,7 +501,7 @@ static void run_htile_address_test(const char *name, const struct radeon_info *i
|
|||
unsigned first_size = 0, last_size = 6*6 - 1;
|
||||
unsigned swizzle_modes[2], num_swizzle_modes = 0;
|
||||
|
||||
switch (info->chip_class) {
|
||||
switch (info->gfx_level) {
|
||||
case GFX9:
|
||||
case GFX10:
|
||||
case GFX10_3:
|
||||
|
@ -512,7 +512,7 @@ static void run_htile_address_test(const char *name, const struct radeon_info *i
|
|||
swizzle_modes[num_swizzle_modes++] = ADDR_SW_256KB_Z_X;
|
||||
break;
|
||||
default:
|
||||
unreachable("unhandled gfx version");
|
||||
unreachable("unhandled gfx level");
|
||||
}
|
||||
|
||||
/* The test coverage is reduced for Gitlab CI because it timeouts. */
|
||||
|
@ -638,7 +638,7 @@ static bool one_cmask_address_test(const char *name, const char *test, ADDR_HAND
|
|||
|
||||
unsigned addr, bit_position;
|
||||
|
||||
if (info->chip_class == GFX9) {
|
||||
if (info->gfx_level == GFX9) {
|
||||
addr = gfx9_meta_addr_from_coord(info, &cout.equation.gfx9,
|
||||
cout.metaBlkWidth, cout.metaBlkHeight, 1,
|
||||
cout.pitch, cout.height,
|
||||
|
@ -672,11 +672,11 @@ static void run_cmask_address_test(const char *name, const struct radeon_info *i
|
|||
{
|
||||
unsigned total = 0;
|
||||
unsigned fails = 0;
|
||||
unsigned swizzle_mode = info->chip_class == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_Z_X;
|
||||
unsigned swizzle_mode = info->gfx_level == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_Z_X;
|
||||
unsigned first_size = 0, last_size = 6*6 - 1, max_bpp = 32;
|
||||
|
||||
/* GFX11 doesn't have CMASK. */
|
||||
if (info->chip_class >= GFX11)
|
||||
if (info->gfx_level >= GFX11)
|
||||
return;
|
||||
|
||||
/* The test coverage is reduced for Gitlab CI because it timeouts. */
|
||||
|
@ -738,7 +738,7 @@ int main(int argc, char **argv)
|
|||
struct radeon_info info = get_radeon_info(&testcases[i]);
|
||||
|
||||
/* Only GFX10+ is currently supported. */
|
||||
if (info.chip_class < GFX10)
|
||||
if (info.gfx_level < GFX10)
|
||||
continue;
|
||||
|
||||
run_htile_address_test(testcases[i].name, &info, full);
|
||||
|
|
|
@ -241,7 +241,7 @@ static void test_modifier(const struct radeon_info *info,
|
|||
.rb = G_0098F8_NUM_RB_PER_SE(info->gb_addr_config) +
|
||||
G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config),
|
||||
.se = G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config),
|
||||
.banks_or_pkrs = info->chip_class >= GFX10 ?
|
||||
.banks_or_pkrs = info->gfx_level >= GFX10 ?
|
||||
G_0098F8_NUM_PKRS(info->gb_addr_config) : G_0098F8_NUM_BANKS(info->gb_addr_config)
|
||||
};
|
||||
|
||||
|
@ -279,7 +279,7 @@ static void test_modifier(const struct radeon_info *info,
|
|||
uint64_t expected_offset = surf_size;
|
||||
|
||||
if (ac_modifier_has_dcc_retile(modifier)) {
|
||||
unsigned dcc_align = info->chip_class >= GFX10 ? 4096 : 65536;
|
||||
unsigned dcc_align = info->gfx_level >= GFX10 ? 4096 : 65536;
|
||||
unsigned dcc_pitch;
|
||||
uint64_t dcc_size = block_count(dims[i][0], dims[i][1],
|
||||
elem_bits, 20, &dcc_pitch,
|
||||
|
@ -296,9 +296,9 @@ static void test_modifier(const struct radeon_info *info,
|
|||
if (ac_modifier_has_dcc(modifier)) {
|
||||
uint64_t dcc_align = 1;
|
||||
unsigned block_bits;
|
||||
if (info->chip_class >= GFX10) {
|
||||
if (info->gfx_level >= GFX10) {
|
||||
unsigned num_pipes = G_0098F8_NUM_PIPES(info->gb_addr_config);
|
||||
if (info->chip_class >= GFX10_3 &&
|
||||
if (info->gfx_level >= GFX10_3 &&
|
||||
G_0098F8_NUM_PKRS(info->gb_addr_config) == num_pipes && num_pipes > 1)
|
||||
++num_pipes;
|
||||
block_bits = 16 +
|
||||
|
|
|
@ -35,7 +35,7 @@ typedef void (*gpu_init_func)(struct radeon_info *info);
|
|||
static void init_vega10(struct radeon_info *info)
|
||||
{
|
||||
info->family = CHIP_VEGA10;
|
||||
info->chip_class = GFX9;
|
||||
info->gfx_level = GFX9;
|
||||
info->family_id = AMDGPU_FAMILY_AI;
|
||||
info->chip_external_rev = 0x01;
|
||||
info->use_display_dcc_unaligned = false;
|
||||
|
@ -50,7 +50,7 @@ static void init_vega10(struct radeon_info *info)
|
|||
static void init_vega20(struct radeon_info *info)
|
||||
{
|
||||
info->family = CHIP_VEGA20;
|
||||
info->chip_class = GFX9;
|
||||
info->gfx_level = GFX9;
|
||||
info->family_id = AMDGPU_FAMILY_AI;
|
||||
info->chip_external_rev = 0x30;
|
||||
info->use_display_dcc_unaligned = false;
|
||||
|
@ -66,7 +66,7 @@ static void init_vega20(struct radeon_info *info)
|
|||
static void init_raven(struct radeon_info *info)
|
||||
{
|
||||
info->family = CHIP_RAVEN;
|
||||
info->chip_class = GFX9;
|
||||
info->gfx_level = GFX9;
|
||||
info->family_id = AMDGPU_FAMILY_RV;
|
||||
info->chip_external_rev = 0x01;
|
||||
info->use_display_dcc_unaligned = false;
|
||||
|
@ -81,7 +81,7 @@ static void init_raven(struct radeon_info *info)
|
|||
static void init_raven2(struct radeon_info *info)
|
||||
{
|
||||
info->family = CHIP_RAVEN2;
|
||||
info->chip_class = GFX9;
|
||||
info->gfx_level = GFX9;
|
||||
info->family_id = AMDGPU_FAMILY_RV;
|
||||
info->chip_external_rev = 0x82;
|
||||
info->use_display_dcc_unaligned = true;
|
||||
|
@ -96,7 +96,7 @@ static void init_raven2(struct radeon_info *info)
|
|||
static void init_navi10(struct radeon_info *info)
|
||||
{
|
||||
info->family = CHIP_NAVI10;
|
||||
info->chip_class = GFX10;
|
||||
info->gfx_level = GFX10;
|
||||
info->family_id = AMDGPU_FAMILY_NV;
|
||||
info->chip_external_rev = 3;
|
||||
info->use_display_dcc_unaligned = false;
|
||||
|
@ -110,7 +110,7 @@ static void init_navi10(struct radeon_info *info)
|
|||
static void init_navi14(struct radeon_info *info)
|
||||
{
|
||||
info->family = CHIP_NAVI14;
|
||||
info->chip_class = GFX10;
|
||||
info->gfx_level = GFX10;
|
||||
info->family_id = AMDGPU_FAMILY_NV;
|
||||
info->chip_external_rev = 0x15;
|
||||
info->use_display_dcc_unaligned = false;
|
||||
|
@ -124,7 +124,7 @@ static void init_navi14(struct radeon_info *info)
|
|||
static void init_gfx103(struct radeon_info *info)
|
||||
{
|
||||
info->family = CHIP_SIENNA_CICHLID; /* This doesn't affect tests. */
|
||||
info->chip_class = GFX10_3;
|
||||
info->gfx_level = GFX10_3;
|
||||
info->family_id = AMDGPU_FAMILY_NV;
|
||||
info->chip_external_rev = 0x28;
|
||||
info->use_display_dcc_unaligned = false;
|
||||
|
@ -140,7 +140,7 @@ static void init_gfx103(struct radeon_info *info)
|
|||
static void init_gfx11(struct radeon_info *info)
|
||||
{
|
||||
info->family = CHIP_UNKNOWN;
|
||||
info->chip_class = GFX11;
|
||||
info->gfx_level = GFX11;
|
||||
info->family_id = 0x00;
|
||||
info->chip_external_rev = 0x01;
|
||||
info->use_display_dcc_unaligned = false;
|
||||
|
@ -192,7 +192,7 @@ static struct radeon_info get_radeon_info(struct testcase *testcase)
|
|||
|
||||
testcase->init(&info);
|
||||
|
||||
switch(info.chip_class) {
|
||||
switch(info.gfx_level) {
|
||||
case GFX9:
|
||||
info.gb_addr_config = (info.gb_addr_config &
|
||||
C_0098F8_NUM_PIPES &
|
||||
|
@ -213,7 +213,7 @@ static struct radeon_info get_radeon_info(struct testcase *testcase)
|
|||
S_0098F8_NUM_PIPES(testcase->pipes) |
|
||||
S_0098F8_NUM_PKRS(testcase->banks_or_pkrs);
|
||||
/* 1 packer implies 1 RB except gfx10 where the field is ignored. */
|
||||
info.max_render_backends = info.chip_class == GFX10 || testcase->banks_or_pkrs ? 2 : 1;
|
||||
info.max_render_backends = info.gfx_level == GFX10 || testcase->banks_or_pkrs ? 2 : 1;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unhandled generation");
|
||||
|
|
|
@ -135,7 +135,7 @@ enum radeon_family
|
|||
CHIP_LAST,
|
||||
};
|
||||
|
||||
enum chip_class
|
||||
enum amd_gfx_level
|
||||
{
|
||||
CLASS_UNKNOWN = 0,
|
||||
R300,
|
||||
|
|
|
@ -47,7 +47,7 @@ extern const struct gfx10_format gfx11_format_table[PIPE_FORMAT_COUNT];
|
|||
static inline
|
||||
const struct gfx10_format* ac_get_gfx10_format_table(struct radeon_info *info)
|
||||
{
|
||||
if (info->chip_class >= GFX11)
|
||||
if (info->gfx_level >= GFX11)
|
||||
return gfx11_format_table;
|
||||
else
|
||||
return gfx10_format_table;
|
||||
|
|
|
@ -360,7 +360,7 @@ def main():
|
|||
print('Error reading {}'.format(sys.argv[1]), file=sys.stderr)
|
||||
raise
|
||||
|
||||
# The ac_debug code only distinguishes by chip_class
|
||||
# The ac_debug code only distinguishes by gfx_level
|
||||
regdb.merge_chips(['gfx8', 'fiji', 'stoney'], 'gfx8')
|
||||
|
||||
# Write it all out
|
||||
|
|
|
@ -42,19 +42,19 @@ struct constaddr_info {
|
|||
|
||||
struct asm_context {
|
||||
Program* program;
|
||||
enum chip_class chip_class;
|
||||
enum amd_gfx_level gfx_level;
|
||||
std::vector<std::pair<int, SOPP_instruction*>> branches;
|
||||
std::map<unsigned, constaddr_info> constaddrs;
|
||||
const int16_t* opcode;
|
||||
// TODO: keep track of branch instructions referring blocks
|
||||
// and, when emitting the block, correct the offset in instr
|
||||
asm_context(Program* program_) : program(program_), chip_class(program->chip_class)
|
||||
asm_context(Program* program_) : program(program_), gfx_level(program->gfx_level)
|
||||
{
|
||||
if (chip_class <= GFX7)
|
||||
if (gfx_level <= GFX7)
|
||||
opcode = &instr_info.opcode_gfx7[0];
|
||||
else if (chip_class <= GFX9)
|
||||
else if (gfx_level <= GFX9)
|
||||
opcode = &instr_info.opcode_gfx9[0];
|
||||
else if (chip_class >= GFX10)
|
||||
else if (gfx_level >= GFX10)
|
||||
opcode = &instr_info.opcode_gfx10[0];
|
||||
}
|
||||
|
||||
|
@ -121,11 +121,11 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
SOPK_instruction& sopk = instr->sopk();
|
||||
|
||||
if (instr->opcode == aco_opcode::s_subvector_loop_begin) {
|
||||
assert(ctx.chip_class >= GFX10);
|
||||
assert(ctx.gfx_level >= GFX10);
|
||||
assert(ctx.subvector_begin_pos == -1);
|
||||
ctx.subvector_begin_pos = out.size();
|
||||
} else if (instr->opcode == aco_opcode::s_subvector_loop_end) {
|
||||
assert(ctx.chip_class >= GFX10);
|
||||
assert(ctx.gfx_level >= GFX10);
|
||||
assert(ctx.subvector_begin_pos != -1);
|
||||
/* Adjust s_subvector_loop_begin instruction to the address after the end */
|
||||
out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos);
|
||||
|
@ -147,8 +147,8 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
}
|
||||
case Format::SOP1: {
|
||||
uint32_t encoding = (0b101111101 << 23);
|
||||
if (opcode >= 55 && ctx.chip_class <= GFX9) {
|
||||
assert(ctx.chip_class == GFX9 && opcode < 60);
|
||||
if (opcode >= 55 && ctx.gfx_level <= GFX9) {
|
||||
assert(ctx.gfx_level == GFX9 && opcode < 60);
|
||||
opcode = opcode - 4;
|
||||
}
|
||||
encoding |= !instr->definitions.empty() ? instr->definitions[0].physReg() << 16 : 0;
|
||||
|
@ -183,7 +183,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
bool is_load = !instr->definitions.empty();
|
||||
uint32_t encoding = 0;
|
||||
|
||||
if (ctx.chip_class <= GFX7) {
|
||||
if (ctx.gfx_level <= GFX7) {
|
||||
encoding = (0b11000 << 27);
|
||||
encoding |= opcode << 22;
|
||||
encoding |= instr->definitions.size() ? instr->definitions[0].physReg() << 15 : 0;
|
||||
|
@ -206,7 +206,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
return;
|
||||
}
|
||||
|
||||
if (ctx.chip_class <= GFX9) {
|
||||
if (ctx.gfx_level <= GFX9) {
|
||||
encoding = (0b110000 << 26);
|
||||
assert(!smem.dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
encoding |= smem.nv ? 1 << 15 : 0;
|
||||
|
@ -219,11 +219,11 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
encoding |= opcode << 18;
|
||||
encoding |= smem.glc ? 1 << 16 : 0;
|
||||
|
||||
if (ctx.chip_class <= GFX9) {
|
||||
if (ctx.gfx_level <= GFX9) {
|
||||
if (instr->operands.size() >= 2)
|
||||
encoding |= instr->operands[1].isConstant() ? 1 << 17 : 0; /* IMM - immediate enable */
|
||||
}
|
||||
if (ctx.chip_class == GFX9) {
|
||||
if (ctx.gfx_level == GFX9) {
|
||||
encoding |= soe ? 1 << 14 : 0;
|
||||
}
|
||||
|
||||
|
@ -239,13 +239,13 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
encoding = 0;
|
||||
|
||||
int32_t offset = 0;
|
||||
uint32_t soffset = ctx.chip_class >= GFX10
|
||||
uint32_t soffset = ctx.gfx_level >= GFX10
|
||||
? sgpr_null /* On GFX10 this is disabled by specifying SGPR_NULL */
|
||||
: 0; /* On GFX9, it is disabled by the SOE bit (and it's not present on
|
||||
GFX8 and below) */
|
||||
if (instr->operands.size() >= 2) {
|
||||
const Operand& op_off1 = instr->operands[1];
|
||||
if (ctx.chip_class <= GFX9) {
|
||||
if (ctx.gfx_level <= GFX9) {
|
||||
offset = op_off1.isConstant() ? op_off1.constantValue() : op_off1.physReg();
|
||||
} else {
|
||||
/* GFX10 only supports constants in OFFSET, so put the operand in SOFFSET if it's an
|
||||
|
@ -260,7 +260,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
|
||||
if (soe) {
|
||||
const Operand& op_off2 = instr->operands.back();
|
||||
assert(ctx.chip_class >= GFX9); /* GFX8 and below don't support specifying a constant
|
||||
assert(ctx.gfx_level >= GFX9); /* GFX8 and below don't support specifying a constant
|
||||
and an SGPR at the same time */
|
||||
assert(!op_off2.isConstant());
|
||||
soffset = op_off2.physReg();
|
||||
|
@ -307,12 +307,12 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
instr->opcode == aco_opcode::v_interp_p1lv_f16 ||
|
||||
instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
|
||||
instr->opcode == aco_opcode::v_interp_p2_f16) {
|
||||
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
|
||||
if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
|
||||
encoding = (0b110100 << 26);
|
||||
} else if (ctx.chip_class >= GFX10) {
|
||||
} else if (ctx.gfx_level >= GFX10) {
|
||||
encoding = (0b110101 << 26);
|
||||
} else {
|
||||
unreachable("Unknown chip_class.");
|
||||
unreachable("Unknown gfx_level.");
|
||||
}
|
||||
|
||||
encoding |= opcode << 16;
|
||||
|
@ -330,7 +330,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
}
|
||||
out.push_back(encoding);
|
||||
} else {
|
||||
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
|
||||
if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
|
||||
encoding = (0b110101 << 26); /* Vega ISA doc says 110010 but it's wrong */
|
||||
} else {
|
||||
encoding = (0b110010 << 26);
|
||||
|
@ -352,7 +352,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
case Format::DS: {
|
||||
DS_instruction& ds = instr->ds();
|
||||
uint32_t encoding = (0b110110 << 26);
|
||||
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
|
||||
if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
|
||||
encoding |= opcode << 17;
|
||||
encoding |= (ds.gds ? 1 : 0) << 16;
|
||||
} else {
|
||||
|
@ -384,20 +384,20 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
encoding |= (mubuf.lds ? 1 : 0) << 16;
|
||||
encoding |= (mubuf.glc ? 1 : 0) << 14;
|
||||
encoding |= (mubuf.idxen ? 1 : 0) << 13;
|
||||
assert(!mubuf.addr64 || ctx.chip_class <= GFX7);
|
||||
if (ctx.chip_class == GFX6 || ctx.chip_class == GFX7)
|
||||
assert(!mubuf.addr64 || ctx.gfx_level <= GFX7);
|
||||
if (ctx.gfx_level == GFX6 || ctx.gfx_level == GFX7)
|
||||
encoding |= (mubuf.addr64 ? 1 : 0) << 15;
|
||||
encoding |= (mubuf.offen ? 1 : 0) << 12;
|
||||
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
|
||||
if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
|
||||
assert(!mubuf.dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
encoding |= (mubuf.slc ? 1 : 0) << 17;
|
||||
} else if (ctx.chip_class >= GFX10) {
|
||||
} else if (ctx.gfx_level >= GFX10) {
|
||||
encoding |= (mubuf.dlc ? 1 : 0) << 15;
|
||||
}
|
||||
encoding |= 0x0FFF & mubuf.offset;
|
||||
out.push_back(encoding);
|
||||
encoding = 0;
|
||||
if (ctx.chip_class <= GFX7 || ctx.chip_class >= GFX10) {
|
||||
if (ctx.gfx_level <= GFX7 || ctx.gfx_level >= GFX10) {
|
||||
encoding |= (mubuf.slc ? 1 : 0) << 22;
|
||||
}
|
||||
encoding |= instr->operands[2].physReg() << 24;
|
||||
|
@ -413,10 +413,10 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
case Format::MTBUF: {
|
||||
MTBUF_instruction& mtbuf = instr->mtbuf();
|
||||
|
||||
uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf.dfmt, mtbuf.nfmt);
|
||||
uint32_t img_format = ac_get_tbuffer_format(ctx.gfx_level, mtbuf.dfmt, mtbuf.nfmt);
|
||||
uint32_t encoding = (0b111010 << 26);
|
||||
assert(img_format <= 0x7F);
|
||||
assert(!mtbuf.dlc || ctx.chip_class >= GFX10);
|
||||
assert(!mtbuf.dlc || ctx.gfx_level >= GFX10);
|
||||
encoding |= (mtbuf.dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
|
||||
encoding |= (mtbuf.glc ? 1 : 0) << 14;
|
||||
encoding |= (mtbuf.idxen ? 1 : 0) << 13;
|
||||
|
@ -424,7 +424,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
encoding |= 0x0FFF & mtbuf.offset;
|
||||
encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */
|
||||
|
||||
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
|
||||
if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
|
||||
encoding |= opcode << 15;
|
||||
} else {
|
||||
encoding |= (opcode & 0x07) << 16; /* 3 LSBs of 4-bit OPCODE */
|
||||
|
@ -442,7 +442,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
encoding |= (0xFF & reg) << 8;
|
||||
encoding |= (0xFF & instr->operands[1].physReg());
|
||||
|
||||
if (ctx.chip_class >= GFX10) {
|
||||
if (ctx.gfx_level >= GFX10) {
|
||||
encoding |= (((opcode & 0x08) >> 3) << 21); /* MSB of 4-bit OPCODE */
|
||||
}
|
||||
|
||||
|
@ -451,7 +451,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
}
|
||||
case Format::MIMG: {
|
||||
unsigned nsa_dwords = get_mimg_nsa_dwords(instr);
|
||||
assert(!nsa_dwords || ctx.chip_class >= GFX10);
|
||||
assert(!nsa_dwords || ctx.gfx_level >= GFX10);
|
||||
|
||||
MIMG_instruction& mimg = instr->mimg();
|
||||
uint32_t encoding = (0b111100 << 26);
|
||||
|
@ -462,7 +462,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
encoding |= mimg.tfe ? 1 << 16 : 0;
|
||||
encoding |= mimg.glc ? 1 << 13 : 0;
|
||||
encoding |= mimg.unrm ? 1 << 12 : 0;
|
||||
if (ctx.chip_class <= GFX9) {
|
||||
if (ctx.gfx_level <= GFX9) {
|
||||
assert(!mimg.dlc); /* Device-level coherent is not supported on GFX9 and lower */
|
||||
assert(!mimg.r128);
|
||||
encoding |= mimg.a16 ? 1 << 15 : 0;
|
||||
|
@ -486,9 +486,9 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
if (!instr->operands[1].isUndefined())
|
||||
encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 21; /* sampler */
|
||||
|
||||
assert(!mimg.d16 || ctx.chip_class >= GFX9);
|
||||
assert(!mimg.d16 || ctx.gfx_level >= GFX9);
|
||||
encoding |= mimg.d16 ? 1 << 31 : 0;
|
||||
if (ctx.chip_class >= GFX10) {
|
||||
if (ctx.gfx_level >= GFX10) {
|
||||
/* GFX10: A16 still exists, but is in a different place */
|
||||
encoding |= mimg.a16 ? 1 << 30 : 0;
|
||||
}
|
||||
|
@ -509,7 +509,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
FLAT_instruction& flat = instr->flatlike();
|
||||
uint32_t encoding = (0b110111 << 26);
|
||||
encoding |= opcode << 18;
|
||||
if (ctx.chip_class <= GFX9) {
|
||||
if (ctx.gfx_level <= GFX9) {
|
||||
assert(flat.offset <= 0x1fff);
|
||||
encoding |= flat.offset & 0x1fff;
|
||||
} else if (instr->isFlat()) {
|
||||
|
@ -528,7 +528,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
encoding |= flat.lds ? 1 << 13 : 0;
|
||||
encoding |= flat.glc ? 1 << 16 : 0;
|
||||
encoding |= flat.slc ? 1 << 17 : 0;
|
||||
if (ctx.chip_class >= GFX10) {
|
||||
if (ctx.gfx_level >= GFX10) {
|
||||
assert(!flat.nv);
|
||||
encoding |= flat.dlc ? 1 << 12 : 0;
|
||||
} else {
|
||||
|
@ -541,12 +541,12 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
if (instr->operands.size() >= 3)
|
||||
encoding |= (0xFF & instr->operands[2].physReg()) << 8;
|
||||
if (!instr->operands[1].isUndefined()) {
|
||||
assert(ctx.chip_class >= GFX10 || instr->operands[1].physReg() != 0x7F);
|
||||
assert(ctx.gfx_level >= GFX10 || instr->operands[1].physReg() != 0x7F);
|
||||
assert(instr->format != Format::FLAT);
|
||||
encoding |= instr->operands[1].physReg() << 16;
|
||||
} else if (instr->format != Format::FLAT ||
|
||||
ctx.chip_class >= GFX10) { /* SADDR is actually used with FLAT on GFX10 */
|
||||
if (ctx.chip_class <= GFX9)
|
||||
ctx.gfx_level >= GFX10) { /* SADDR is actually used with FLAT on GFX10 */
|
||||
if (ctx.gfx_level <= GFX9)
|
||||
encoding |= 0x7F << 16;
|
||||
else
|
||||
encoding |= sgpr_null << 16;
|
||||
|
@ -558,7 +558,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
case Format::EXP: {
|
||||
Export_instruction& exp = instr->exp();
|
||||
uint32_t encoding;
|
||||
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
|
||||
if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
|
||||
encoding = (0b110001 << 26);
|
||||
} else {
|
||||
encoding = (0b111110 << 26);
|
||||
|
@ -589,7 +589,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
if (instr->isVOP2()) {
|
||||
opcode = opcode + 0x100;
|
||||
} else if (instr->isVOP1()) {
|
||||
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9)
|
||||
if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9)
|
||||
opcode = opcode + 0x140;
|
||||
else
|
||||
opcode = opcode + 0x180;
|
||||
|
@ -600,15 +600,15 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
}
|
||||
|
||||
uint32_t encoding;
|
||||
if (ctx.chip_class <= GFX9) {
|
||||
if (ctx.gfx_level <= GFX9) {
|
||||
encoding = (0b110100 << 26);
|
||||
} else if (ctx.chip_class >= GFX10) {
|
||||
} else if (ctx.gfx_level >= GFX10) {
|
||||
encoding = (0b110101 << 26);
|
||||
} else {
|
||||
unreachable("Unknown chip_class.");
|
||||
unreachable("Unknown gfx_level.");
|
||||
}
|
||||
|
||||
if (ctx.chip_class <= GFX7) {
|
||||
if (ctx.gfx_level <= GFX7) {
|
||||
encoding |= opcode << 17;
|
||||
encoding |= (vop3.clamp ? 1 : 0) << 11;
|
||||
} else {
|
||||
|
@ -642,12 +642,12 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
VOP3P_instruction& vop3 = instr->vop3p();
|
||||
|
||||
uint32_t encoding;
|
||||
if (ctx.chip_class == GFX9) {
|
||||
if (ctx.gfx_level == GFX9) {
|
||||
encoding = (0b110100111 << 23);
|
||||
} else if (ctx.chip_class >= GFX10) {
|
||||
} else if (ctx.gfx_level >= GFX10) {
|
||||
encoding = (0b110011 << 26);
|
||||
} else {
|
||||
unreachable("Unknown chip_class.");
|
||||
unreachable("Unknown gfx_level.");
|
||||
}
|
||||
|
||||
encoding |= opcode << 16;
|
||||
|
@ -667,7 +667,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
out.push_back(encoding);
|
||||
|
||||
} else if (instr->isDPP16()) {
|
||||
assert(ctx.chip_class >= GFX8);
|
||||
assert(ctx.gfx_level >= GFX8);
|
||||
DPP16_instruction& dpp = instr->dpp16();
|
||||
|
||||
/* first emit the instruction without the DPP operand */
|
||||
|
@ -681,7 +681,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
encoding |= dpp.neg[1] << 22;
|
||||
encoding |= dpp.abs[0] << 21;
|
||||
encoding |= dpp.neg[0] << 20;
|
||||
if (ctx.chip_class >= GFX10)
|
||||
if (ctx.gfx_level >= GFX10)
|
||||
encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */
|
||||
encoding |= dpp.bound_ctrl << 19;
|
||||
encoding |= dpp.dpp_ctrl << 8;
|
||||
|
@ -689,7 +689,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
|||
out.push_back(encoding);
|
||||
return;
|
||||
} else if (instr->isDPP8()) {
|
||||
assert(ctx.chip_class >= GFX10);
|
||||
assert(ctx.gfx_level >= GFX10);
|
||||
DPP8_instruction& dpp = instr->dpp8();
|
||||
|
||||
/* first emit the instruction without the DPP operand */
|
||||
|
@ -938,7 +938,7 @@ fix_branches(asm_context& ctx, std::vector<uint32_t>& out)
|
|||
do {
|
||||
repeat = false;
|
||||
|
||||
if (ctx.chip_class == GFX10)
|
||||
if (ctx.gfx_level == GFX10)
|
||||
fix_branches_gfx10(ctx, out);
|
||||
|
||||
for (std::pair<int, SOPP_instruction*>& branch : ctx.branches) {
|
||||
|
@ -995,7 +995,7 @@ emit_program(Program* program, std::vector<uint32_t>& code)
|
|||
|
||||
unsigned exec_size = code.size() * sizeof(uint32_t);
|
||||
|
||||
if (program->chip_class >= GFX10) {
|
||||
if (program->gfx_level >= GFX10) {
|
||||
/* Pad output with s_code_end so instruction prefetching doesn't cause
|
||||
* page faults */
|
||||
unsigned final_size = align(code.size() + 3 * 16, 16);
|
||||
|
|
|
@ -361,10 +361,10 @@ public:
|
|||
Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool bits24=false)
|
||||
{
|
||||
assert(tmp.type() == RegType::vgpr);
|
||||
bool has_lshl_add = program->chip_class >= GFX9;
|
||||
bool has_lshl_add = program->gfx_level >= GFX9;
|
||||
/* v_mul_lo_u32 has 1.6x the latency of most VALU on GFX10 (8 vs 5 cycles),
|
||||
* compared to 4x the latency on <GFX10. */
|
||||
unsigned mul_cost = program->chip_class >= GFX10 ? 1 : (4 + Operand::c32(imm).isLiteral());
|
||||
unsigned mul_cost = program->gfx_level >= GFX10 ? 1 : (4 + Operand::c32(imm).isLiteral());
|
||||
if (imm == 0) {
|
||||
return copy(dst, Operand::zero());
|
||||
} else if (imm == 1) {
|
||||
|
@ -426,9 +426,9 @@ public:
|
|||
|
||||
if (!carry_in.op.isUndefined())
|
||||
return vop2(aco_opcode::v_addc_co_u32, Definition(dst), def(lm), a, b, carry_in);
|
||||
else if (program->chip_class >= GFX10 && carry_out)
|
||||
else if (program->gfx_level >= GFX10 && carry_out)
|
||||
return vop3(aco_opcode::v_add_co_u32_e64, Definition(dst), def(lm), a, b);
|
||||
else if (program->chip_class < GFX9 || carry_out)
|
||||
else if (program->gfx_level < GFX9 || carry_out)
|
||||
return vop2(aco_opcode::v_add_co_u32, Definition(dst), def(lm), a, b);
|
||||
else
|
||||
return vop2(aco_opcode::v_add_u32, Definition(dst), a, b);
|
||||
|
@ -436,7 +436,7 @@ public:
|
|||
|
||||
Result vsub32(Definition dst, Op a, Op b, bool carry_out=false, Op borrow=Op(Operand(s2)))
|
||||
{
|
||||
if (!borrow.op.isUndefined() || program->chip_class < GFX9)
|
||||
if (!borrow.op.isUndefined() || program->gfx_level < GFX9)
|
||||
carry_out = true;
|
||||
|
||||
bool reverse = !b.op.isTemp() || b.op.regClass().type() != RegType::vgpr;
|
||||
|
@ -457,10 +457,10 @@ public:
|
|||
op = reverse ? aco_opcode::v_subrev_u32 : aco_opcode::v_sub_u32;
|
||||
}
|
||||
bool vop3 = false;
|
||||
if (program->chip_class >= GFX10 && op == aco_opcode::v_subrev_co_u32) {
|
||||
if (program->gfx_level >= GFX10 && op == aco_opcode::v_subrev_co_u32) {
|
||||
vop3 = true;
|
||||
op = aco_opcode::v_subrev_co_u32_e64;
|
||||
} else if (program->chip_class >= GFX10 && op == aco_opcode::v_sub_co_u32) {
|
||||
} else if (program->gfx_level >= GFX10 && op == aco_opcode::v_sub_co_u32) {
|
||||
vop3 = true;
|
||||
op = aco_opcode::v_sub_co_u32_e64;
|
||||
}
|
||||
|
@ -485,13 +485,13 @@ public:
|
|||
|
||||
Result readlane(Definition dst, Op vsrc, Op lane)
|
||||
{
|
||||
if (program->chip_class >= GFX8)
|
||||
if (program->gfx_level >= GFX8)
|
||||
return vop3(aco_opcode::v_readlane_b32_e64, dst, vsrc, lane);
|
||||
else
|
||||
return vop2(aco_opcode::v_readlane_b32, dst, vsrc, lane);
|
||||
}
|
||||
Result writelane(Definition dst, Op val, Op lane, Op vsrc) {
|
||||
if (program->chip_class >= GFX8)
|
||||
if (program->gfx_level >= GFX8)
|
||||
return vop3(aco_opcode::v_writelane_b32_e64, dst, val, lane, vsrc);
|
||||
else
|
||||
return vop2(aco_opcode::v_writelane_b32, dst, val, lane, vsrc);
|
||||
|
|
|
@ -78,7 +78,7 @@ form_hard_clauses(Program* program)
|
|||
|
||||
clause_type type = clause_other;
|
||||
if (instr->isVMEM() && !instr->operands.empty()) {
|
||||
if (program->chip_class == GFX10 && instr->isMIMG() &&
|
||||
if (program->gfx_level == GFX10 && instr->isMIMG() &&
|
||||
get_mimg_nsa_dwords(instr.get()) > 0)
|
||||
type = clause_other;
|
||||
else
|
||||
|
|
|
@ -356,7 +356,7 @@ handle_instruction_gfx6(State& state, NOP_ctx_gfx6& ctx, aco_ptr<Instruction>& i
|
|||
int NOPs = 0;
|
||||
|
||||
if (instr->isSMEM()) {
|
||||
if (state.program->chip_class == GFX6) {
|
||||
if (state.program->gfx_level == GFX6) {
|
||||
/* A read of an SGPR by SMRD instruction requires 4 wait states
|
||||
* when the SGPR was written by a VALU instruction. According to LLVM,
|
||||
* there is also an undocumented hardware behavior when the buffer
|
||||
|
@ -382,7 +382,7 @@ handle_instruction_gfx6(State& state, NOP_ctx_gfx6& ctx, aco_ptr<Instruction>& i
|
|||
NOPs = MAX2(NOPs, ctx.setreg_then_getsetreg);
|
||||
}
|
||||
|
||||
if (state.program->chip_class == GFX9) {
|
||||
if (state.program->gfx_level == GFX9) {
|
||||
if (instr->opcode == aco_opcode::s_movrels_b32 ||
|
||||
instr->opcode == aco_opcode::s_movrels_b64 ||
|
||||
instr->opcode == aco_opcode::s_movreld_b32 ||
|
||||
|
@ -428,7 +428,7 @@ handle_instruction_gfx6(State& state, NOP_ctx_gfx6& ctx, aco_ptr<Instruction>& i
|
|||
* hangs on GFX6. Note that v_writelane_* is apparently not affected.
|
||||
* This hazard isn't documented anywhere but AMD confirmed that hazard.
|
||||
*/
|
||||
if (state.program->chip_class == GFX6 &&
|
||||
if (state.program->gfx_level == GFX6 &&
|
||||
(instr->opcode == aco_opcode::v_readlane_b32 || /* GFX6 doesn't have v_readlane_b32_e64 */
|
||||
instr->opcode == aco_opcode::v_readfirstlane_b32)) {
|
||||
handle_vintrp_then_read_hazard(state, &NOPs, 1, instr->operands[0]);
|
||||
|
@ -448,7 +448,7 @@ handle_instruction_gfx6(State& state, NOP_ctx_gfx6& ctx, aco_ptr<Instruction>& i
|
|||
if (!instr->isSALU() && instr->format != Format::SMEM)
|
||||
NOPs = MAX2(NOPs, ctx.set_vskip_mode_then_vector);
|
||||
|
||||
if (state.program->chip_class == GFX9) {
|
||||
if (state.program->gfx_level == GFX9) {
|
||||
bool lds_scratch_global = (instr->isScratch() || instr->isGlobal()) && instr->flatlike().lds;
|
||||
if (instr->isVINTRP() || lds_scratch_global ||
|
||||
instr->opcode == aco_opcode::ds_read_addtid_b32 ||
|
||||
|
@ -886,9 +886,9 @@ mitigate_hazards(Program* program)
|
|||
void
|
||||
insert_NOPs(Program* program)
|
||||
{
|
||||
if (program->chip_class >= GFX10_3)
|
||||
if (program->gfx_level >= GFX10_3)
|
||||
; /* no hazards/bugs to mitigate */
|
||||
else if (program->chip_class >= GFX10)
|
||||
else if (program->gfx_level >= GFX10)
|
||||
mitigate_hazards<NOP_ctx_gfx10, handle_instruction_gfx10>(program);
|
||||
else
|
||||
mitigate_hazards<NOP_ctx_gfx6, handle_instruction_gfx6>(program);
|
||||
|
|
|
@ -170,7 +170,7 @@ struct wait_entry {
|
|||
|
||||
struct wait_ctx {
|
||||
Program* program;
|
||||
enum chip_class chip_class;
|
||||
enum amd_gfx_level gfx_level;
|
||||
uint16_t max_vm_cnt;
|
||||
uint16_t max_exp_cnt;
|
||||
uint16_t max_lgkm_cnt;
|
||||
|
@ -192,11 +192,11 @@ struct wait_ctx {
|
|||
|
||||
wait_ctx() {}
|
||||
wait_ctx(Program* program_)
|
||||
: program(program_), chip_class(program_->chip_class),
|
||||
max_vm_cnt(program_->chip_class >= GFX9 ? 62 : 14), max_exp_cnt(6),
|
||||
max_lgkm_cnt(program_->chip_class >= GFX10 ? 62 : 14),
|
||||
max_vs_cnt(program_->chip_class >= GFX10 ? 62 : 0),
|
||||
unordered_events(event_smem | (program_->chip_class < GFX10 ? event_flat : 0))
|
||||
: program(program_), gfx_level(program_->gfx_level),
|
||||
max_vm_cnt(program_->gfx_level >= GFX9 ? 62 : 14), max_exp_cnt(6),
|
||||
max_lgkm_cnt(program_->gfx_level >= GFX10 ? 62 : 14),
|
||||
max_vs_cnt(program_->gfx_level >= GFX10 ? 62 : 0),
|
||||
unordered_events(event_smem | (program_->gfx_level < GFX10 ? event_flat : 0))
|
||||
{}
|
||||
|
||||
bool join(const wait_ctx* other, bool logical)
|
||||
|
@ -295,7 +295,7 @@ parse_wait_instr(wait_ctx& ctx, wait_imm& imm, Instruction* instr)
|
|||
imm.vs = std::min<uint8_t>(imm.vs, instr->sopk().imm);
|
||||
return true;
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt) {
|
||||
imm.combine(wait_imm(ctx.chip_class, instr->sopp().imm));
|
||||
imm.combine(wait_imm(ctx.gfx_level, instr->sopp().imm));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -339,7 +339,7 @@ force_waitcnt(wait_ctx& ctx, wait_imm& imm)
|
|||
if (ctx.lgkm_cnt)
|
||||
imm.lgkm = 0;
|
||||
|
||||
if (ctx.chip_class >= GFX10) {
|
||||
if (ctx.gfx_level >= GFX10) {
|
||||
if (ctx.vs_cnt)
|
||||
imm.vs = 0;
|
||||
}
|
||||
|
@ -362,11 +362,11 @@ kill(wait_imm& imm, Instruction* instr, wait_ctx& ctx, memory_sync_info sync_inf
|
|||
* It shouldn't cost anything anyways since we're about to do s_endpgm.
|
||||
*/
|
||||
if (ctx.lgkm_cnt && instr->opcode == aco_opcode::s_dcache_wb) {
|
||||
assert(ctx.chip_class >= GFX8);
|
||||
assert(ctx.gfx_level >= GFX8);
|
||||
imm.lgkm = 0;
|
||||
}
|
||||
|
||||
if (ctx.chip_class >= GFX10 && instr->isSMEM()) {
|
||||
if (ctx.gfx_level >= GFX10 && instr->isSMEM()) {
|
||||
/* GFX10: A store followed by a load at the same address causes a problem because
|
||||
* the load doesn't load the correct values unless we wait for the store first.
|
||||
* This is NOT mitigated by an s_nop.
|
||||
|
@ -547,7 +547,7 @@ update_counters(wait_ctx& ctx, wait_event event, memory_sync_info sync = memory_
|
|||
void
|
||||
update_counters_for_flat_load(wait_ctx& ctx, memory_sync_info sync = memory_sync_info())
|
||||
{
|
||||
assert(ctx.chip_class < GFX10);
|
||||
assert(ctx.gfx_level < GFX10);
|
||||
|
||||
if (ctx.lgkm_cnt <= ctx.max_lgkm_cnt)
|
||||
ctx.lgkm_cnt++;
|
||||
|
@ -634,7 +634,7 @@ gen(Instruction* instr, wait_ctx& ctx)
|
|||
}
|
||||
case Format::FLAT: {
|
||||
FLAT_instruction& flat = instr->flat();
|
||||
if (ctx.chip_class < GFX10 && !instr->definitions.empty())
|
||||
if (ctx.gfx_level < GFX10 && !instr->definitions.empty())
|
||||
update_counters_for_flat_load(ctx, flat.sync);
|
||||
else
|
||||
update_counters(ctx, event_flat, flat.sync);
|
||||
|
@ -649,7 +649,7 @@ gen(Instruction* instr, wait_ctx& ctx)
|
|||
|
||||
if (!instr->definitions.empty())
|
||||
insert_wait_entry(ctx, instr->definitions[0], event_smem);
|
||||
else if (ctx.chip_class >= GFX10 && !smem.sync.can_reorder())
|
||||
else if (ctx.gfx_level >= GFX10 && !smem.sync.can_reorder())
|
||||
ctx.pending_s_buffer_store = true;
|
||||
|
||||
break;
|
||||
|
@ -675,7 +675,7 @@ gen(Instruction* instr, wait_ctx& ctx)
|
|||
case Format::MIMG:
|
||||
case Format::GLOBAL: {
|
||||
wait_event ev =
|
||||
!instr->definitions.empty() || ctx.chip_class < GFX10 ? event_vmem : event_vmem_store;
|
||||
!instr->definitions.empty() || ctx.gfx_level < GFX10 ? event_vmem : event_vmem_store;
|
||||
update_counters(ctx, ev, get_sync_info(instr));
|
||||
|
||||
bool has_sampler = instr->isMIMG() && !instr->operands[1].isUndefined() &&
|
||||
|
@ -684,11 +684,11 @@ gen(Instruction* instr, wait_ctx& ctx)
|
|||
if (!instr->definitions.empty())
|
||||
insert_wait_entry(ctx, instr->definitions[0], ev, has_sampler);
|
||||
|
||||
if (ctx.chip_class == GFX6 && instr->format != Format::MIMG && instr->operands.size() == 4) {
|
||||
if (ctx.gfx_level == GFX6 && instr->format != Format::MIMG && instr->operands.size() == 4) {
|
||||
ctx.exp_cnt++;
|
||||
update_counters(ctx, event_vmem_gpr_lock);
|
||||
insert_wait_entry(ctx, instr->operands[3], event_vmem_gpr_lock);
|
||||
} else if (ctx.chip_class == GFX6 && instr->isMIMG() && !instr->operands[2].isUndefined()) {
|
||||
} else if (ctx.gfx_level == GFX6 && instr->isMIMG() && !instr->operands[2].isUndefined()) {
|
||||
ctx.exp_cnt++;
|
||||
update_counters(ctx, event_vmem_gpr_lock);
|
||||
insert_wait_entry(ctx, instr->operands[2], event_vmem_gpr_lock);
|
||||
|
@ -709,7 +709,7 @@ void
|
|||
emit_waitcnt(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions, wait_imm& imm)
|
||||
{
|
||||
if (imm.vs != wait_imm::unset_counter) {
|
||||
assert(ctx.chip_class >= GFX10);
|
||||
assert(ctx.gfx_level >= GFX10);
|
||||
SOPK_instruction* waitcnt_vs =
|
||||
create_instruction<SOPK_instruction>(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 0, 1);
|
||||
waitcnt_vs->definitions[0] = Definition(sgpr_null, s1);
|
||||
|
@ -720,7 +720,7 @@ emit_waitcnt(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions, wai
|
|||
if (!imm.empty()) {
|
||||
SOPP_instruction* waitcnt =
|
||||
create_instruction<SOPP_instruction>(aco_opcode::s_waitcnt, Format::SOPP, 0, 0);
|
||||
waitcnt->imm = imm.pack(ctx.chip_class);
|
||||
waitcnt->imm = imm.pack(ctx.gfx_level);
|
||||
waitcnt->block = -1;
|
||||
instructions.emplace_back(waitcnt);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -262,7 +262,7 @@ setup_vs_output_info(isel_context* ctx, nir_shader* nir,
|
|||
* as soon as it encounters a DONE pos export. When this happens, PS waves can launch
|
||||
* before the NGG (or VS) waves finish.
|
||||
*/
|
||||
ctx->program->early_rast = ctx->program->chip_class >= GFX10 && outinfo->param_exports == 0;
|
||||
ctx->program->early_rast = ctx->program->gfx_level >= GFX10 && outinfo->param_exports == 0;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -863,8 +863,8 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
|
|||
default: unreachable("Shader stage not implemented");
|
||||
}
|
||||
}
|
||||
bool gfx9_plus = options->chip_class >= GFX9;
|
||||
bool ngg = info->is_ngg && options->chip_class >= GFX10;
|
||||
bool gfx9_plus = options->gfx_level >= GFX9;
|
||||
bool ngg = info->is_ngg && options->gfx_level >= GFX10;
|
||||
HWStage hw_stage{};
|
||||
if (sw_stage == SWStage::VS && info->vs.as_es && !ngg)
|
||||
hw_stage = HWStage::ES;
|
||||
|
@ -907,8 +907,8 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
|
|||
else
|
||||
unreachable("Shader stage not implemented");
|
||||
|
||||
init_program(program, Stage{hw_stage, sw_stage}, info, options->chip_class,
|
||||
options->family, options->wgp_mode, config);
|
||||
init_program(program, Stage{hw_stage, sw_stage}, info, options->gfx_level, options->family,
|
||||
options->wgp_mode, config);
|
||||
|
||||
isel_context ctx = {};
|
||||
ctx.program = program;
|
||||
|
@ -921,7 +921,7 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
|
|||
|
||||
/* Mesh shading only works on GFX10.3+. */
|
||||
ASSERTED bool mesh_shading = ctx.stage.has(SWStage::TS) || ctx.stage.has(SWStage::MS);
|
||||
assert(!mesh_shading || ctx.program->chip_class >= GFX10_3);
|
||||
assert(!mesh_shading || ctx.program->gfx_level >= GFX10_3);
|
||||
|
||||
if (ctx.stage == tess_control_hs)
|
||||
setup_tcs_info(&ctx, shaders[0], NULL);
|
||||
|
|
|
@ -212,7 +212,7 @@ aco_compile_shader(const struct radv_nir_compiler_options* options,
|
|||
aco::insert_wait_states(program.get());
|
||||
aco::insert_NOPs(program.get());
|
||||
|
||||
if (program->chip_class >= GFX10)
|
||||
if (program->gfx_level >= GFX10)
|
||||
aco::form_hard_clauses(program.get());
|
||||
|
||||
if (program->collect_statistics || (aco::debug_flags & aco::DEBUG_PERF_INFO))
|
||||
|
|
|
@ -66,15 +66,15 @@ init()
|
|||
|
||||
void
|
||||
init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
||||
enum chip_class chip_class, enum radeon_family family, bool wgp_mode,
|
||||
enum amd_gfx_level gfx_level, enum radeon_family family, bool wgp_mode,
|
||||
ac_shader_config* config)
|
||||
{
|
||||
program->stage = stage;
|
||||
program->config = config;
|
||||
program->info = *info;
|
||||
program->chip_class = chip_class;
|
||||
program->gfx_level = gfx_level;
|
||||
if (family == CHIP_UNKNOWN) {
|
||||
switch (chip_class) {
|
||||
switch (gfx_level) {
|
||||
case GFX6: program->family = CHIP_TAHITI; break;
|
||||
case GFX7: program->family = CHIP_BONAIRE; break;
|
||||
case GFX8: program->family = CHIP_POLARIS10; break;
|
||||
|
@ -88,12 +88,10 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
|||
program->wave_size = info->wave_size;
|
||||
program->lane_mask = program->wave_size == 32 ? s1 : s2;
|
||||
|
||||
program->dev.lds_encoding_granule = chip_class >= GFX11 && stage == fragment_fs ? 1024
|
||||
: chip_class >= GFX7 ? 512
|
||||
: 256;
|
||||
program->dev.lds_alloc_granule =
|
||||
chip_class >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
|
||||
program->dev.lds_limit = chip_class >= GFX7 ? 65536 : 32768;
|
||||
program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024 :
|
||||
gfx_level >= GFX7 ? 512 : 256;
|
||||
program->dev.lds_alloc_granule = gfx_level >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
|
||||
program->dev.lds_limit = gfx_level >= GFX7 ? 65536 : 32768;
|
||||
/* apparently gfx702 also has 16-bank LDS but I can't find a family for that */
|
||||
program->dev.has_16bank_lds = family == CHIP_KABINI || family == CHIP_STONEY;
|
||||
|
||||
|
@ -101,17 +99,17 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
|||
program->dev.physical_vgprs = 256;
|
||||
program->dev.vgpr_alloc_granule = 4;
|
||||
|
||||
if (chip_class >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
program->dev.physical_sgprs = 5120; /* doesn't matter as long as it's at least 128 * 40 */
|
||||
program->dev.physical_vgprs = program->wave_size == 32 ? 1024 : 512;
|
||||
program->dev.sgpr_alloc_granule = 128;
|
||||
program->dev.sgpr_limit =
|
||||
108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
|
||||
if (chip_class == GFX10_3)
|
||||
if (gfx_level == GFX10_3)
|
||||
program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 16 : 8;
|
||||
else
|
||||
program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 8 : 4;
|
||||
} else if (program->chip_class >= GFX8) {
|
||||
} else if (program->gfx_level >= GFX8) {
|
||||
program->dev.physical_sgprs = 800;
|
||||
program->dev.sgpr_alloc_granule = 16;
|
||||
program->dev.sgpr_limit = 102;
|
||||
|
@ -124,14 +122,14 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
|||
}
|
||||
|
||||
program->dev.max_wave64_per_simd = 10;
|
||||
if (program->chip_class >= GFX10_3)
|
||||
if (program->gfx_level >= GFX10_3)
|
||||
program->dev.max_wave64_per_simd = 16;
|
||||
else if (program->chip_class == GFX10)
|
||||
else if (program->gfx_level == GFX10)
|
||||
program->dev.max_wave64_per_simd = 20;
|
||||
else if (program->family >= CHIP_POLARIS10 && program->family <= CHIP_VEGAM)
|
||||
program->dev.max_wave64_per_simd = 8;
|
||||
|
||||
program->dev.simd_per_cu = program->chip_class >= GFX10 ? 2 : 4;
|
||||
program->dev.simd_per_cu = program->gfx_level >= GFX10 ? 2 : 4;
|
||||
|
||||
switch (program->family) {
|
||||
/* GFX8 APUs */
|
||||
|
@ -146,13 +144,13 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
|||
|
||||
program->dev.sram_ecc_enabled = program->family == CHIP_ARCTURUS;
|
||||
/* apparently gfx702 also has fast v_fma_f32 but I can't find a family for that */
|
||||
program->dev.has_fast_fma32 = program->chip_class >= GFX9;
|
||||
program->dev.has_fast_fma32 = program->gfx_level >= GFX9;
|
||||
if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO ||
|
||||
program->family == CHIP_HAWAII)
|
||||
program->dev.has_fast_fma32 = true;
|
||||
program->dev.has_mac_legacy32 = program->chip_class <= GFX7 || program->chip_class >= GFX10;
|
||||
program->dev.has_mac_legacy32 = program->gfx_level <= GFX7 || program->gfx_level >= GFX10;
|
||||
|
||||
program->dev.fused_mad_mix = program->chip_class >= GFX10;
|
||||
program->dev.fused_mad_mix = program->gfx_level >= GFX10;
|
||||
if (program->family == CHIP_VEGA12 || program->family == CHIP_VEGA20 ||
|
||||
program->family == CHIP_ARCTURUS || program->family == CHIP_ALDEBARAN)
|
||||
program->dev.fused_mad_mix = true;
|
||||
|
@ -190,12 +188,12 @@ get_sync_info(const Instruction* instr)
|
|||
}
|
||||
|
||||
bool
|
||||
can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
||||
can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra)
|
||||
{
|
||||
if (!instr->isVALU())
|
||||
return false;
|
||||
|
||||
if (chip < GFX8 || instr->isDPP() || instr->isVOP3P())
|
||||
if (gfx_level < GFX8 || instr->isDPP() || instr->isVOP3P())
|
||||
return false;
|
||||
|
||||
if (instr->isSDWA())
|
||||
|
@ -205,9 +203,9 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
|||
VOP3_instruction& vop3 = instr->vop3();
|
||||
if (instr->format == Format::VOP3)
|
||||
return false;
|
||||
if (vop3.clamp && instr->isVOPC() && chip != GFX8)
|
||||
if (vop3.clamp && instr->isVOPC() && gfx_level != GFX8)
|
||||
return false;
|
||||
if (vop3.omod && chip < GFX9)
|
||||
if (vop3.omod && gfx_level < GFX9)
|
||||
return false;
|
||||
|
||||
// TODO: return true if we know we will use vcc
|
||||
|
@ -217,7 +215,7 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
|||
for (unsigned i = 1; i < instr->operands.size(); i++) {
|
||||
if (instr->operands[i].isLiteral())
|
||||
return false;
|
||||
if (chip < GFX9 && !instr->operands[i].isOfType(RegType::vgpr))
|
||||
if (gfx_level < GFX9 && !instr->operands[i].isOfType(RegType::vgpr))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -228,7 +226,7 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
|||
if (!instr->operands.empty()) {
|
||||
if (instr->operands[0].isLiteral())
|
||||
return false;
|
||||
if (chip < GFX9 && !instr->operands[0].isOfType(RegType::vgpr))
|
||||
if (gfx_level < GFX9 && !instr->operands[0].isOfType(RegType::vgpr))
|
||||
return false;
|
||||
if (instr->operands[0].bytes() > 4)
|
||||
return false;
|
||||
|
@ -239,11 +237,11 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
|||
bool is_mac = instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_mac_f16 ||
|
||||
instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_fmac_f16;
|
||||
|
||||
if (chip != GFX8 && is_mac)
|
||||
if (gfx_level != GFX8 && is_mac)
|
||||
return false;
|
||||
|
||||
// TODO: return true if we know we will use vcc
|
||||
if (!pre_ra && instr->isVOPC() && chip == GFX8)
|
||||
if (!pre_ra && instr->isVOPC() && gfx_level == GFX8)
|
||||
return false;
|
||||
if (!pre_ra && instr->operands.size() >= 3 && !is_mac)
|
||||
return false;
|
||||
|
@ -256,7 +254,7 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
|||
|
||||
/* updates "instr" and returns the old instruction (or NULL if no update was needed) */
|
||||
aco_ptr<Instruction>
|
||||
convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
|
||||
convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
if (instr->isSDWA())
|
||||
return NULL;
|
||||
|
@ -289,7 +287,7 @@ convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
|
|||
|
||||
sdwa.dst_sel = SubdwordSel(instr->definitions[0].bytes(), 0, false);
|
||||
|
||||
if (instr->definitions[0].getTemp().type() == RegType::sgpr && chip == GFX8)
|
||||
if (instr->definitions[0].getTemp().type() == RegType::sgpr && gfx_level == GFX8)
|
||||
instr->definitions[0].setFixed(vcc);
|
||||
if (instr->definitions.size() >= 2)
|
||||
instr->definitions[1].setFixed(vcc);
|
||||
|
@ -390,10 +388,10 @@ convert_to_DPP(aco_ptr<Instruction>& instr, bool dpp8)
|
|||
}
|
||||
|
||||
bool
|
||||
can_use_opsel(chip_class chip, aco_opcode op, int idx)
|
||||
can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx)
|
||||
{
|
||||
/* opsel is only GFX9+ */
|
||||
if (chip < GFX9)
|
||||
if (gfx_level < GFX9)
|
||||
return false;
|
||||
|
||||
switch (op) {
|
||||
|
@ -433,10 +431,10 @@ can_use_opsel(chip_class chip, aco_opcode op, int idx)
|
|||
}
|
||||
|
||||
bool
|
||||
instr_is_16bit(chip_class chip, aco_opcode op)
|
||||
instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op)
|
||||
{
|
||||
/* partial register writes are GFX9+, only */
|
||||
if (chip < GFX9)
|
||||
if (gfx_level < GFX9)
|
||||
return false;
|
||||
|
||||
switch (op) {
|
||||
|
@ -451,7 +449,7 @@ instr_is_16bit(chip_class chip, aco_opcode op)
|
|||
/* VOP2 */
|
||||
case aco_opcode::v_mac_f16:
|
||||
case aco_opcode::v_madak_f16:
|
||||
case aco_opcode::v_madmk_f16: return chip >= GFX9;
|
||||
case aco_opcode::v_madmk_f16: return gfx_level >= GFX9;
|
||||
case aco_opcode::v_add_f16:
|
||||
case aco_opcode::v_sub_f16:
|
||||
case aco_opcode::v_subrev_f16:
|
||||
|
@ -479,7 +477,7 @@ instr_is_16bit(chip_class chip, aco_opcode op)
|
|||
case aco_opcode::v_rndne_f16:
|
||||
case aco_opcode::v_fract_f16:
|
||||
case aco_opcode::v_sin_f16:
|
||||
case aco_opcode::v_cos_f16: return chip >= GFX10;
|
||||
case aco_opcode::v_cos_f16: return gfx_level >= GFX10;
|
||||
// TODO: confirm whether these write 16 or 32 bit on GFX10+
|
||||
// case aco_opcode::v_cvt_u16_f16:
|
||||
// case aco_opcode::v_cvt_i16_f16:
|
||||
|
@ -487,7 +485,7 @@ instr_is_16bit(chip_class chip, aco_opcode op)
|
|||
// case aco_opcode::v_cvt_norm_i16_f16:
|
||||
// case aco_opcode::v_cvt_norm_u16_f16:
|
||||
/* on GFX10, all opsel instructions preserve the high bits */
|
||||
default: return chip >= GFX10 && can_use_opsel(chip, op, -1);
|
||||
default: return gfx_level >= GFX10 && can_use_opsel(gfx_level, op, -1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -760,25 +758,25 @@ wait_imm::wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_)
|
|||
: vm(vm_), exp(exp_), lgkm(lgkm_), vs(vs_)
|
||||
{}
|
||||
|
||||
wait_imm::wait_imm(enum chip_class chip, uint16_t packed) : vs(unset_counter)
|
||||
wait_imm::wait_imm(enum amd_gfx_level gfx_level, uint16_t packed) : vs(unset_counter)
|
||||
{
|
||||
vm = packed & 0xf;
|
||||
if (chip >= GFX9)
|
||||
if (gfx_level >= GFX9)
|
||||
vm |= (packed >> 10) & 0x30;
|
||||
|
||||
exp = (packed >> 4) & 0x7;
|
||||
|
||||
lgkm = (packed >> 8) & 0xf;
|
||||
if (chip >= GFX10)
|
||||
if (gfx_level >= GFX10)
|
||||
lgkm |= (packed >> 8) & 0x30;
|
||||
}
|
||||
|
||||
uint16_t
|
||||
wait_imm::pack(enum chip_class chip) const
|
||||
wait_imm::pack(enum amd_gfx_level gfx_level) const
|
||||
{
|
||||
uint16_t imm = 0;
|
||||
assert(exp == unset_counter || exp <= 0x7);
|
||||
switch (chip) {
|
||||
switch (gfx_level) {
|
||||
case GFX11:
|
||||
assert(lgkm == unset_counter || lgkm <= 0x3f);
|
||||
assert(vm == unset_counter || vm <= 0x3f);
|
||||
|
@ -801,10 +799,10 @@ wait_imm::pack(enum chip_class chip) const
|
|||
imm = ((lgkm & 0xf) << 8) | ((exp & 0x7) << 4) | (vm & 0xf);
|
||||
break;
|
||||
}
|
||||
if (chip < GFX9 && vm == wait_imm::unset_counter)
|
||||
if (gfx_level < GFX9 && vm == wait_imm::unset_counter)
|
||||
imm |= 0xc000; /* should have no effect on pre-GFX9 and now we won't have to worry about the
|
||||
architecture when interpreting the immediate */
|
||||
if (chip < GFX10 && lgkm == wait_imm::unset_counter)
|
||||
if (gfx_level < GFX10 && lgkm == wait_imm::unset_counter)
|
||||
imm |= 0x3000; /* should have no effect on pre-GFX10 and now we won't have to worry about the
|
||||
architecture when interpreting the immediate */
|
||||
return imm;
|
||||
|
|
|
@ -270,9 +270,9 @@ struct wait_imm {
|
|||
|
||||
wait_imm();
|
||||
wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_);
|
||||
wait_imm(enum chip_class chip, uint16_t packed);
|
||||
wait_imm(enum amd_gfx_level chip, uint16_t packed);
|
||||
|
||||
uint16_t pack(enum chip_class chip) const;
|
||||
uint16_t pack(enum amd_gfx_level chip) const;
|
||||
|
||||
bool combine(const wait_imm& other);
|
||||
|
||||
|
@ -668,10 +668,10 @@ public:
|
|||
return Operand::c8(0);
|
||||
}
|
||||
|
||||
/* This is useful over the constructors when you want to take a chip class
|
||||
/* This is useful over the constructors when you want to take a gfx level
|
||||
* for 1/2 PI or an unknown operand size.
|
||||
*/
|
||||
static Operand get_const(enum chip_class chip, uint64_t val, unsigned bytes)
|
||||
static Operand get_const(enum amd_gfx_level chip, uint64_t val, unsigned bytes)
|
||||
{
|
||||
if (val == 0x3e22f983 && bytes == 4 && chip >= GFX8) {
|
||||
/* 1/2 PI can be an inline constant on GFX8+ */
|
||||
|
@ -1766,12 +1766,12 @@ memory_sync_info get_sync_info(const Instruction* instr);
|
|||
|
||||
bool is_dead(const std::vector<uint16_t>& uses, Instruction* instr);
|
||||
|
||||
bool can_use_opsel(chip_class chip, aco_opcode op, int idx);
|
||||
bool instr_is_16bit(chip_class chip, aco_opcode op);
|
||||
bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra);
|
||||
bool can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx);
|
||||
bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op);
|
||||
bool can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra);
|
||||
bool can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra, bool dpp8);
|
||||
/* updates "instr" and returns the old instruction (or NULL if no update was needed) */
|
||||
aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr);
|
||||
aco_ptr<Instruction> convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr);
|
||||
aco_ptr<Instruction> convert_to_DPP(aco_ptr<Instruction>& instr, bool dpp8);
|
||||
bool needs_exec_mask(const Instruction* instr);
|
||||
|
||||
|
@ -2053,7 +2053,7 @@ public:
|
|||
RegisterDemand max_reg_demand = RegisterDemand();
|
||||
ac_shader_config* config;
|
||||
struct aco_shader_info info;
|
||||
enum chip_class chip_class;
|
||||
enum amd_gfx_level gfx_level;
|
||||
enum radeon_family family;
|
||||
DeviceInfo dev;
|
||||
unsigned wave_size;
|
||||
|
@ -2151,7 +2151,7 @@ struct ra_test_policy {
|
|||
void init();
|
||||
|
||||
void init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
||||
enum chip_class chip_class, enum radeon_family family, bool wgp_mode,
|
||||
enum amd_gfx_level gfx_level, enum radeon_family family, bool wgp_mode,
|
||||
ac_shader_config* config);
|
||||
|
||||
void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
|
||||
|
|
|
@ -293,11 +293,11 @@ calc_waves_per_workgroup(Program* program)
|
|||
uint16_t
|
||||
get_extra_sgprs(Program* program)
|
||||
{
|
||||
if (program->chip_class >= GFX10) {
|
||||
if (program->gfx_level >= GFX10) {
|
||||
assert(!program->needs_flat_scr);
|
||||
assert(!program->dev.xnack_enabled);
|
||||
return 0;
|
||||
} else if (program->chip_class >= GFX8) {
|
||||
} else if (program->gfx_level >= GFX8) {
|
||||
if (program->needs_flat_scr)
|
||||
return 6;
|
||||
else if (program->dev.xnack_enabled)
|
||||
|
@ -439,7 +439,7 @@ live_var_analysis(Program* program)
|
|||
std::vector<PhiInfo> phi_info(program->blocks.size());
|
||||
RegisterDemand new_demand;
|
||||
|
||||
program->needs_vcc = program->chip_class >= GFX10;
|
||||
program->needs_vcc = program->gfx_level >= GFX10;
|
||||
|
||||
/* this implementation assumes that the block idx corresponds to the block's position in
|
||||
* program->blocks vector */
|
||||
|
|
|
@ -104,7 +104,7 @@ collect_parallelcopies(cssa_ctx& ctx)
|
|||
/* SGPR inline constants and literals on GFX10+ can be spilled
|
||||
* and reloaded directly (without intermediate register) */
|
||||
if (op.isConstant()) {
|
||||
if (ctx.program->chip_class >= GFX10)
|
||||
if (ctx.program->gfx_level >= GFX10)
|
||||
continue;
|
||||
if (op.size() == 1 && !op.isLiteral())
|
||||
continue;
|
||||
|
|
|
@ -68,7 +68,7 @@ uint8_t int8_mul_table[512] = {
|
|||
1, 250, 1, 251, 1, 252, 1, 253, 1, 254, 1, 255};
|
||||
|
||||
aco_opcode
|
||||
get_reduce_opcode(chip_class chip, ReduceOp op)
|
||||
get_reduce_opcode(amd_gfx_level gfx_level, ReduceOp op)
|
||||
{
|
||||
/* Because some 16-bit instructions are already VOP3 on GFX10, we use the
|
||||
* 32-bit opcodes (VOP2) which allows to remove the tempory VGPR and to use
|
||||
|
@ -77,9 +77,9 @@ get_reduce_opcode(chip_class chip, ReduceOp op)
|
|||
switch (op) {
|
||||
case iadd8:
|
||||
case iadd16:
|
||||
if (chip >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
return aco_opcode::v_add_u32;
|
||||
} else if (chip >= GFX8) {
|
||||
} else if (gfx_level >= GFX8) {
|
||||
return aco_opcode::v_add_u16;
|
||||
} else {
|
||||
return aco_opcode::v_add_co_u32;
|
||||
|
@ -87,9 +87,9 @@ get_reduce_opcode(chip_class chip, ReduceOp op)
|
|||
break;
|
||||
case imul8:
|
||||
case imul16:
|
||||
if (chip >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
return aco_opcode::v_mul_lo_u16_e64;
|
||||
} else if (chip >= GFX8) {
|
||||
} else if (gfx_level >= GFX8) {
|
||||
return aco_opcode::v_mul_lo_u16;
|
||||
} else {
|
||||
return aco_opcode::v_mul_u32_u24;
|
||||
|
@ -99,9 +99,9 @@ get_reduce_opcode(chip_class chip, ReduceOp op)
|
|||
case fmul16: return aco_opcode::v_mul_f16;
|
||||
case imax8:
|
||||
case imax16:
|
||||
if (chip >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
return aco_opcode::v_max_i32;
|
||||
} else if (chip >= GFX8) {
|
||||
} else if (gfx_level >= GFX8) {
|
||||
return aco_opcode::v_max_i16;
|
||||
} else {
|
||||
return aco_opcode::v_max_i32;
|
||||
|
@ -109,9 +109,9 @@ get_reduce_opcode(chip_class chip, ReduceOp op)
|
|||
break;
|
||||
case imin8:
|
||||
case imin16:
|
||||
if (chip >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
return aco_opcode::v_min_i32;
|
||||
} else if (chip >= GFX8) {
|
||||
} else if (gfx_level >= GFX8) {
|
||||
return aco_opcode::v_min_i16;
|
||||
} else {
|
||||
return aco_opcode::v_min_i32;
|
||||
|
@ -119,9 +119,9 @@ get_reduce_opcode(chip_class chip, ReduceOp op)
|
|||
break;
|
||||
case umin8:
|
||||
case umin16:
|
||||
if (chip >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
return aco_opcode::v_min_u32;
|
||||
} else if (chip >= GFX8) {
|
||||
} else if (gfx_level >= GFX8) {
|
||||
return aco_opcode::v_min_u16;
|
||||
} else {
|
||||
return aco_opcode::v_min_u32;
|
||||
|
@ -129,9 +129,9 @@ get_reduce_opcode(chip_class chip, ReduceOp op)
|
|||
break;
|
||||
case umax8:
|
||||
case umax16:
|
||||
if (chip >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
return aco_opcode::v_max_u32;
|
||||
} else if (chip >= GFX8) {
|
||||
} else if (gfx_level >= GFX8) {
|
||||
return aco_opcode::v_max_u16;
|
||||
} else {
|
||||
return aco_opcode::v_max_u32;
|
||||
|
@ -139,7 +139,7 @@ get_reduce_opcode(chip_class chip, ReduceOp op)
|
|||
break;
|
||||
case fmin16: return aco_opcode::v_min_f16;
|
||||
case fmax16: return aco_opcode::v_max_f16;
|
||||
case iadd32: return chip >= GFX9 ? aco_opcode::v_add_u32 : aco_opcode::v_add_co_u32;
|
||||
case iadd32: return gfx_level >= GFX9 ? aco_opcode::v_add_u32 : aco_opcode::v_add_co_u32;
|
||||
case imul32: return aco_opcode::v_mul_lo_u32;
|
||||
case fadd32: return aco_opcode::v_add_f32;
|
||||
case fmul32: return aco_opcode::v_mul_f32;
|
||||
|
@ -209,7 +209,7 @@ emit_int64_dpp_op(lower_context* ctx, PhysReg dst_reg, PhysReg src0_reg, PhysReg
|
|||
Operand vtmp_op[] = {Operand(vtmp_reg, v1), Operand(PhysReg{vtmp_reg + 1}, v1)};
|
||||
Operand vtmp_op64 = Operand(vtmp_reg, v2);
|
||||
if (op == iadd64) {
|
||||
if (ctx->program->chip_class >= GFX10) {
|
||||
if (ctx->program->gfx_level >= GFX10) {
|
||||
if (identity)
|
||||
bld.vop1(aco_opcode::v_mov_b32, vtmp_def[0], identity[0]);
|
||||
bld.vop1_dpp(aco_opcode::v_mov_b32, vtmp_def[0], src0[0], dpp_ctrl, row_mask, bank_mask,
|
||||
|
@ -323,7 +323,7 @@ emit_int64_op(lower_context* ctx, PhysReg dst_reg, PhysReg src0_reg, PhysReg src
|
|||
}
|
||||
|
||||
if (op == iadd64) {
|
||||
if (ctx->program->chip_class >= GFX10) {
|
||||
if (ctx->program->gfx_level >= GFX10) {
|
||||
bld.vop3(aco_opcode::v_add_co_u32_e64, dst[0], bld.def(bld.lm, vcc), src0[0], src1[0]);
|
||||
} else {
|
||||
bld.vop2(aco_opcode::v_add_co_u32, dst[0], bld.def(bld.lm, vcc), src0[0], src1[0]);
|
||||
|
@ -393,7 +393,7 @@ emit_dpp_op(lower_context* ctx, PhysReg dst_reg, PhysReg src0_reg, PhysReg src1_
|
|||
Operand src0(src0_reg, rc);
|
||||
Operand src1(src1_reg, rc);
|
||||
|
||||
aco_opcode opcode = get_reduce_opcode(ctx->program->chip_class, op);
|
||||
aco_opcode opcode = get_reduce_opcode(ctx->program->gfx_level, op);
|
||||
bool vop3 = is_vop3_reduce_opcode(opcode);
|
||||
|
||||
if (!vop3) {
|
||||
|
@ -433,7 +433,7 @@ emit_op(lower_context* ctx, PhysReg dst_reg, PhysReg src0_reg, PhysReg src1_reg,
|
|||
Operand src0(src0_reg, RegClass(src0_reg.reg() >= 256 ? RegType::vgpr : RegType::sgpr, size));
|
||||
Operand src1(src1_reg, rc);
|
||||
|
||||
aco_opcode opcode = get_reduce_opcode(ctx->program->chip_class, op);
|
||||
aco_opcode opcode = get_reduce_opcode(ctx->program->gfx_level, op);
|
||||
bool vop3 = is_vop3_reduce_opcode(opcode);
|
||||
|
||||
if (opcode == aco_opcode::num_opcodes) {
|
||||
|
@ -492,7 +492,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
/* p_exclusive_scan needs it to be a sgpr or inline constant for the v_writelane_b32
|
||||
* except on GFX10, where v_writelane_b32 can take a literal. */
|
||||
if (identity[i].isLiteral() && op == aco_opcode::p_exclusive_scan &&
|
||||
ctx->program->chip_class < GFX10) {
|
||||
ctx->program->gfx_level < GFX10) {
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg{sitmp + i}, s1), identity[i]);
|
||||
identity[i] = Operand(PhysReg{sitmp + i}, s1);
|
||||
|
||||
|
@ -511,7 +511,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
}
|
||||
|
||||
if (src.regClass() == v1b) {
|
||||
if (ctx->program->chip_class >= GFX8) {
|
||||
if (ctx->program->gfx_level >= GFX8) {
|
||||
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
|
||||
aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
|
||||
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
|
||||
|
@ -532,7 +532,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
Operand::c32(8u));
|
||||
}
|
||||
} else if (src.regClass() == v2b) {
|
||||
if (ctx->program->chip_class >= GFX10 &&
|
||||
if (ctx->program->gfx_level >= GFX10 &&
|
||||
(reduce_op == iadd16 || reduce_op == imax16 || reduce_op == imin16 ||
|
||||
reduce_op == umin16 || reduce_op == umax16)) {
|
||||
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(
|
||||
|
@ -543,7 +543,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
sdwa->sel[0] = SubdwordSel(2, 0, sext);
|
||||
sdwa->dst_sel = SubdwordSel::dword;
|
||||
bld.insert(std::move(sdwa));
|
||||
} else if (ctx->program->chip_class == GFX6 || ctx->program->chip_class == GFX7) {
|
||||
} else if (ctx->program->gfx_level == GFX6 || ctx->program->gfx_level == GFX7) {
|
||||
aco_opcode opcode;
|
||||
|
||||
if (reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16)
|
||||
|
@ -562,7 +562,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
if (cluster_size == 1)
|
||||
break;
|
||||
|
||||
if (ctx->program->chip_class <= GFX7) {
|
||||
if (ctx->program->gfx_level <= GFX7) {
|
||||
reduction_needs_last_op = true;
|
||||
emit_ds_swizzle(bld, vtmp, tmp, src.size(), (1 << 15) | dpp_quad_perm(1, 0, 3, 2));
|
||||
if (cluster_size == 2)
|
||||
|
@ -609,7 +609,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
if (cluster_size == 16)
|
||||
break;
|
||||
|
||||
if (ctx->program->chip_class >= GFX10) {
|
||||
if (ctx->program->gfx_level >= GFX10) {
|
||||
/* GFX10+ doesn't support row_bcast15 and row_bcast31 */
|
||||
for (unsigned i = 0; i < src.size(); i++)
|
||||
bld.vop3(aco_opcode::v_permlanex16_b32, Definition(PhysReg{vtmp + i}, v1),
|
||||
|
@ -641,7 +641,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
false);
|
||||
break;
|
||||
case aco_opcode::p_exclusive_scan:
|
||||
if (ctx->program->chip_class >= GFX10) { /* gfx10 doesn't support wf_sr1, so emulate it */
|
||||
if (ctx->program->gfx_level >= GFX10) { /* gfx10 doesn't support wf_sr1, so emulate it */
|
||||
/* shift rows right */
|
||||
emit_dpp_mov(ctx, vtmp, tmp, src.size(), dpp_row_sr(1), 0xf, 0xf, true);
|
||||
|
||||
|
@ -668,7 +668,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
}
|
||||
}
|
||||
std::swap(tmp, vtmp);
|
||||
} else if (ctx->program->chip_class >= GFX8) {
|
||||
} else if (ctx->program->gfx_level >= GFX8) {
|
||||
emit_dpp_mov(ctx, tmp, tmp, src.size(), dpp_wf_sr1, 0xf, 0xf, true);
|
||||
} else {
|
||||
// TODO: use LDS on CS with a single write and shifted read
|
||||
|
@ -718,7 +718,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
for (unsigned i = 0; i < src.size(); i++) {
|
||||
if (!identity[i].isConstant() ||
|
||||
identity[i].constantValue()) { /* bound_ctrl should take care of this overwise */
|
||||
if (ctx->program->chip_class < GFX10)
|
||||
if (ctx->program->gfx_level < GFX10)
|
||||
assert((identity[i].isConstant() && !identity[i].isLiteral()) ||
|
||||
identity[i].physReg() == PhysReg{sitmp + i});
|
||||
bld.writelane(Definition(PhysReg{tmp + i}, v1), identity[i], Operand::zero(),
|
||||
|
@ -728,7 +728,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
FALLTHROUGH;
|
||||
case aco_opcode::p_inclusive_scan:
|
||||
assert(cluster_size == ctx->program->wave_size);
|
||||
if (ctx->program->chip_class <= GFX7) {
|
||||
if (ctx->program->gfx_level <= GFX7) {
|
||||
emit_ds_swizzle(bld, vtmp, tmp, src.size(), ds_pattern_bitmode(0x1e, 0x00, 0x00));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0xAAAAAAAAu));
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1));
|
||||
|
@ -777,7 +777,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
|
|||
identity);
|
||||
emit_dpp_op(ctx, tmp, tmp, tmp, vtmp, reduce_op, src.size(), dpp_row_sr(8), 0xf, 0xf, false,
|
||||
identity);
|
||||
if (ctx->program->chip_class >= GFX10) {
|
||||
if (ctx->program->gfx_level >= GFX10) {
|
||||
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand::c32(16u),
|
||||
Operand::c32(16u));
|
||||
bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand::c32(16u),
|
||||
|
@ -847,7 +847,7 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr<Instruction>& instr, Builde
|
|||
* manually swap the data between the two halves using two shared VGPRs.
|
||||
*/
|
||||
|
||||
assert(program->chip_class >= GFX10);
|
||||
assert(program->gfx_level >= GFX10);
|
||||
assert(program->wave_size == 64);
|
||||
|
||||
unsigned shared_vgpr_reg_0 = align(program->config->num_vgprs, 4) + 256;
|
||||
|
@ -976,7 +976,7 @@ split_copy(lower_context* ctx, unsigned offset, Definition* def, Operand* op,
|
|||
op_reg.reg_b += offset;
|
||||
|
||||
/* 64-bit VGPR copies (implemented with v_lshrrev_b64) are slow before GFX10 */
|
||||
if (ctx->program->chip_class < GFX10 && src.def.regClass().type() == RegType::vgpr)
|
||||
if (ctx->program->gfx_level < GFX10 && src.def.regClass().type() == RegType::vgpr)
|
||||
max_size = MIN2(max_size, 4);
|
||||
unsigned max_align = src.def.regClass().type() == RegType::vgpr ? 4 : 16;
|
||||
|
||||
|
@ -998,7 +998,7 @@ split_copy(lower_context* ctx, unsigned offset, Definition* def, Operand* op,
|
|||
if (src.op.isConstant()) {
|
||||
assert(bytes >= 1 && bytes <= 8);
|
||||
uint64_t val = src.op.constantValue64() >> (offset * 8u);
|
||||
*op = Operand::get_const(ctx->program->chip_class, val, bytes);
|
||||
*op = Operand::get_const(ctx->program->gfx_level, val, bytes);
|
||||
} else {
|
||||
RegClass op_cls = src.op.regClass().resize(bytes);
|
||||
*op = Operand(op_reg, op_cls);
|
||||
|
@ -1045,7 +1045,7 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
|
|||
}
|
||||
}
|
||||
|
||||
if (op.bytes() == 4 && op.constantEquals(0x3e22f983) && ctx->program->chip_class >= GFX8)
|
||||
if (op.bytes() == 4 && op.constantEquals(0x3e22f983) && ctx->program->gfx_level >= GFX8)
|
||||
op.setFixed(PhysReg{248}); /* it can be an inline constant on GFX8+ */
|
||||
|
||||
if (dst.regClass() == s1) {
|
||||
|
@ -1066,7 +1066,7 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
|
|||
} else {
|
||||
assert(dst.regClass() == v1b || dst.regClass() == v2b);
|
||||
|
||||
if (dst.regClass() == v1b && ctx->program->chip_class >= GFX9) {
|
||||
if (dst.regClass() == v1b && ctx->program->gfx_level >= GFX9) {
|
||||
uint8_t val = op.constantValue();
|
||||
Operand op32 = Operand::c32((uint32_t)val | (val & 0x80u ? 0xffffff00u : 0u));
|
||||
if (op32.isLiteral()) {
|
||||
|
@ -1078,7 +1078,7 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
|
|||
} else {
|
||||
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op32);
|
||||
}
|
||||
} else if (dst.regClass() == v2b && ctx->program->chip_class >= GFX9 && !op.isLiteral()) {
|
||||
} else if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX9 && !op.isLiteral()) {
|
||||
if (op.constantValue() >= 0xfff0 || op.constantValue() <= 64) {
|
||||
/* use v_mov_b32 to avoid possible issues with denormal flushing or
|
||||
* NaN. v_add_f16 is still needed for float constants. */
|
||||
|
@ -1087,7 +1087,7 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
|
|||
} else {
|
||||
bld.vop2_sdwa(aco_opcode::v_add_f16, dst, op, Operand::zero());
|
||||
}
|
||||
} else if (dst.regClass() == v2b && ctx->program->chip_class >= GFX10 &&
|
||||
} else if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX10 &&
|
||||
(ctx->block->fp_mode.denorm16_64 & fp_denorm_keep_in)) {
|
||||
if (dst.physReg().byte() == 2) {
|
||||
Operand def_lo(dst.physReg().advance(-2), v2b);
|
||||
|
@ -1144,7 +1144,7 @@ swap_linear_vgpr(Builder& bld, Definition def, Operand op, bool preserve_scc, Ph
|
|||
Definition op_as_def = Definition(op.physReg(), op.regClass());
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
if (bld.program->chip_class >= GFX9) {
|
||||
if (bld.program->gfx_level >= GFX9) {
|
||||
bld.vop1(aco_opcode::v_swap_b32, def, op_as_def, op, def_as_op);
|
||||
} else {
|
||||
bld.vop2(aco_opcode::v_xor_b32, op_as_def, op, def_as_op);
|
||||
|
@ -1191,7 +1191,7 @@ do_copy(lower_context* ctx, Builder& bld, const copy_operation& copy, bool* pres
|
|||
bld.sop1(aco_opcode::s_mov_b32, def, op);
|
||||
} else if (def.regClass() == s2) {
|
||||
bld.sop1(aco_opcode::s_mov_b64, def, op);
|
||||
} else if (def.regClass().is_subdword() && ctx->program->chip_class < GFX8) {
|
||||
} else if (def.regClass().is_subdword() && ctx->program->gfx_level < GFX8) {
|
||||
if (op.physReg().byte()) {
|
||||
assert(def.physReg().byte() == 0);
|
||||
bld.vop2(aco_opcode::v_lshrrev_b32, def, Operand::c32(op.physReg().byte() * 8), op);
|
||||
|
@ -1284,7 +1284,7 @@ do_swap(lower_context* ctx, Builder& bld, const copy_operation& copy, bool prese
|
|||
Definition op_as_def = Definition(op.physReg(), op.regClass());
|
||||
if (def.regClass().is_linear_vgpr()) {
|
||||
swap_linear_vgpr(bld, def, op, preserve_scc, pi->scratch_sgpr);
|
||||
} else if (ctx->program->chip_class >= GFX9 && def.regClass() == v1) {
|
||||
} else if (ctx->program->gfx_level >= GFX9 && def.regClass() == v1) {
|
||||
bld.vop1(aco_opcode::v_swap_b32, def, op_as_def, op, def_as_op);
|
||||
} else if (def.regClass() == v1) {
|
||||
assert(def.physReg().byte() == 0 && op.physReg().byte() == 0);
|
||||
|
@ -1333,7 +1333,7 @@ do_swap(lower_context* ctx, Builder& bld, const copy_operation& copy, bool prese
|
|||
offset += def.bytes();
|
||||
}
|
||||
|
||||
if (ctx->program->chip_class <= GFX7)
|
||||
if (ctx->program->gfx_level <= GFX7)
|
||||
return;
|
||||
|
||||
/* fixup in case we swapped bytes we shouldn't have */
|
||||
|
@ -1352,8 +1352,8 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera
|
|||
}
|
||||
|
||||
bool can_use_pack = (ctx->block->fp_mode.denorm16_64 & fp_denorm_keep_in) &&
|
||||
(ctx->program->chip_class >= GFX10 ||
|
||||
(ctx->program->chip_class >= GFX9 && !lo.isLiteral() && !hi.isLiteral()));
|
||||
(ctx->program->gfx_level >= GFX10 ||
|
||||
(ctx->program->gfx_level >= GFX9 && !lo.isLiteral() && !hi.isLiteral()));
|
||||
|
||||
if (can_use_pack) {
|
||||
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi);
|
||||
|
@ -1365,7 +1365,7 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera
|
|||
/* a single alignbyte can be sufficient: hi can be a 32-bit integer constant */
|
||||
if (lo.physReg().byte() == 2 && hi.physReg().byte() == 0 &&
|
||||
(!hi.isConstant() || !Operand::c32(hi.constantValue()).isLiteral() ||
|
||||
ctx->program->chip_class >= GFX10)) {
|
||||
ctx->program->gfx_level >= GFX10)) {
|
||||
bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand::c32(2u));
|
||||
return;
|
||||
}
|
||||
|
@ -1404,7 +1404,7 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera
|
|||
assert(hi.physReg().byte() == 0);
|
||||
bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand::c32(16u), hi);
|
||||
hi.setFixed(def.physReg().advance(2));
|
||||
} else if (ctx->program->chip_class >= GFX8) {
|
||||
} else if (ctx->program->gfx_level >= GFX8) {
|
||||
/* either lo or hi can be placed with just a v_mov */
|
||||
assert(lo.physReg().byte() == 0 || hi.physReg().byte() == 2);
|
||||
Operand& op = lo.physReg().byte() == 0 ? lo : hi;
|
||||
|
@ -1413,7 +1413,7 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera
|
|||
op.setFixed(reg);
|
||||
}
|
||||
|
||||
if (ctx->program->chip_class >= GFX8) {
|
||||
if (ctx->program->gfx_level >= GFX8) {
|
||||
/* either hi or lo are already placed correctly */
|
||||
if (lo.physReg().reg() == def.physReg().reg())
|
||||
bld.vop1_sdwa(aco_opcode::v_mov_b32, def_hi, hi);
|
||||
|
@ -1467,7 +1467,7 @@ try_coalesce_copies(lower_context* ctx, std::map<PhysReg, copy_operation>& copy_
|
|||
|
||||
/* don't create 64-bit copies before GFX10 */
|
||||
if (copy.bytes >= 4 && copy.def.regClass().type() == RegType::vgpr &&
|
||||
ctx->program->chip_class < GFX10)
|
||||
ctx->program->gfx_level < GFX10)
|
||||
return;
|
||||
|
||||
unsigned new_size = copy.bytes + other->second.bytes;
|
||||
|
@ -1479,7 +1479,7 @@ try_coalesce_copies(lower_context* ctx, std::map<PhysReg, copy_operation>& copy_
|
|||
if (!Operand::is_constant_representable(val, new_size, true,
|
||||
copy.def.regClass().type() == RegType::vgpr))
|
||||
return;
|
||||
copy.op = Operand::get_const(ctx->program->chip_class, val, new_size);
|
||||
copy.op = Operand::get_const(ctx->program->gfx_level, val, new_size);
|
||||
} else {
|
||||
if (other->second.op.physReg() != copy.op.physReg().advance(copy.bytes))
|
||||
return;
|
||||
|
@ -1493,7 +1493,7 @@ try_coalesce_copies(lower_context* ctx, std::map<PhysReg, copy_operation>& copy_
|
|||
|
||||
void
|
||||
handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx,
|
||||
chip_class chip_class, Pseudo_instruction* pi)
|
||||
amd_gfx_level gfx_level, Pseudo_instruction* pi)
|
||||
{
|
||||
Builder bld(ctx->program, &ctx->instructions);
|
||||
unsigned num_instructions_before = ctx->instructions.size();
|
||||
|
@ -1599,7 +1599,7 @@ handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx,
|
|||
|
||||
/* on GFX6/7, we need some small workarounds as there is no
|
||||
* SDWA instruction to do partial register writes */
|
||||
if (ctx->program->chip_class < GFX8 && it->second.bytes < 4) {
|
||||
if (ctx->program->gfx_level < GFX8 && it->second.bytes < 4) {
|
||||
if (it->first.byte() == 0 && it->second.op.physReg().byte() == 0 && !it->second.is_used &&
|
||||
pi->opcode == aco_opcode::p_split_vector) {
|
||||
/* Other operations might overwrite the high bits, so change all users
|
||||
|
@ -1664,7 +1664,7 @@ handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx,
|
|||
bool partial_copy = (has_zero_use_bytes == 0xf) || (has_zero_use_bytes == 0xf0);
|
||||
for (std::pair<const PhysReg, copy_operation>& copy : copy_map) {
|
||||
/* on GFX6/7, we can only do copies with full registers */
|
||||
if (partial_copy || ctx->program->chip_class <= GFX7)
|
||||
if (partial_copy || ctx->program->gfx_level <= GFX7)
|
||||
break;
|
||||
for (uint16_t i = 0; i < copy.second.bytes; i++) {
|
||||
/* distance might underflow */
|
||||
|
@ -1790,7 +1790,7 @@ handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx,
|
|||
}
|
||||
|
||||
/* GFX6-7 can only swap full registers */
|
||||
if (ctx->program->chip_class <= GFX7)
|
||||
if (ctx->program->gfx_level <= GFX7)
|
||||
swap.bytes = align(swap.bytes, 4);
|
||||
|
||||
do_swap(ctx, bld, swap, preserve_scc, pi);
|
||||
|
@ -1868,7 +1868,7 @@ handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx,
|
|||
void
|
||||
emit_set_mode(Builder& bld, float_mode new_mode, bool set_round, bool set_denorm)
|
||||
{
|
||||
if (bld.program->chip_class >= GFX10) {
|
||||
if (bld.program->gfx_level >= GFX10) {
|
||||
if (set_round)
|
||||
bld.sopp(aco_opcode::s_round_mode, -1, new_mode.round);
|
||||
if (set_denorm)
|
||||
|
@ -1938,7 +1938,7 @@ lower_to_hw_instr(Program* program)
|
|||
: RegClass(instr->operands[0].getTemp().type(), def.size());
|
||||
std::map<PhysReg, copy_operation> copy_operations;
|
||||
copy_operations[def.physReg()] = {Operand(reg, op_rc), def, def.bytes()};
|
||||
handle_operands(copy_operations, &ctx, program->chip_class, pi);
|
||||
handle_operands(copy_operations, &ctx, program->gfx_level, pi);
|
||||
break;
|
||||
}
|
||||
case aco_opcode::p_create_vector: {
|
||||
|
@ -1967,7 +1967,7 @@ lower_to_hw_instr(Program* program)
|
|||
copy_operations[def.physReg()] = {op, def, op.bytes()};
|
||||
reg.reg_b += op.bytes();
|
||||
}
|
||||
handle_operands(copy_operations, &ctx, program->chip_class, pi);
|
||||
handle_operands(copy_operations, &ctx, program->gfx_level, pi);
|
||||
break;
|
||||
}
|
||||
case aco_opcode::p_split_vector: {
|
||||
|
@ -1982,7 +1982,7 @@ lower_to_hw_instr(Program* program)
|
|||
copy_operations[def.physReg()] = {op, def, def.bytes()};
|
||||
reg.reg_b += def.bytes();
|
||||
}
|
||||
handle_operands(copy_operations, &ctx, program->chip_class, pi);
|
||||
handle_operands(copy_operations, &ctx, program->gfx_level, pi);
|
||||
break;
|
||||
}
|
||||
case aco_opcode::p_parallelcopy:
|
||||
|
@ -1993,7 +1993,7 @@ lower_to_hw_instr(Program* program)
|
|||
copy_operations[instr->definitions[j].physReg()] = {
|
||||
instr->operands[j], instr->definitions[j], instr->operands[j].bytes()};
|
||||
}
|
||||
handle_operands(copy_operations, &ctx, program->chip_class, pi);
|
||||
handle_operands(copy_operations, &ctx, program->gfx_level, pi);
|
||||
break;
|
||||
}
|
||||
case aco_opcode::p_exit_early_if: {
|
||||
|
@ -2030,7 +2030,7 @@ lower_to_hw_instr(Program* program)
|
|||
|
||||
bld.reset(discard_block);
|
||||
bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1), 0,
|
||||
program->chip_class >= GFX11 ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL,
|
||||
program->gfx_level >= GFX11 ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL,
|
||||
false, true, true);
|
||||
bld.sopp(aco_opcode::s_endpgm);
|
||||
|
||||
|
@ -2072,7 +2072,7 @@ lower_to_hw_instr(Program* program)
|
|||
std::map<PhysReg, copy_operation> copy_operations;
|
||||
copy_operations[instr->definitions[0].physReg()] = {
|
||||
instr->operands[0], instr->definitions[0], instr->definitions[0].bytes()};
|
||||
handle_operands(copy_operations, &ctx, program->chip_class, pi);
|
||||
handle_operands(copy_operations, &ctx, program->gfx_level, pi);
|
||||
} else {
|
||||
assert(instr->operands[0].regClass().type() == RegType::vgpr);
|
||||
assert(instr->definitions[0].regClass().type() == RegType::sgpr);
|
||||
|
@ -2086,9 +2086,9 @@ lower_to_hw_instr(Program* program)
|
|||
break;
|
||||
}
|
||||
case aco_opcode::p_bpermute: {
|
||||
if (ctx.program->chip_class <= GFX7)
|
||||
if (ctx.program->gfx_level <= GFX7)
|
||||
emit_gfx6_bpermute(program, instr, bld);
|
||||
else if (ctx.program->chip_class >= GFX10 && ctx.program->wave_size == 64)
|
||||
else if (ctx.program->gfx_level >= GFX10 && ctx.program->wave_size == 64)
|
||||
emit_gfx10_wave64_bpermute(program, instr, bld);
|
||||
else
|
||||
unreachable("Current hardware supports ds_bpermute, don't emit p_bpermute.");
|
||||
|
@ -2129,7 +2129,7 @@ lower_to_hw_instr(Program* program)
|
|||
bld.def(s1, scc), op, Operand::c32((bits << 16) | offset));
|
||||
}
|
||||
} else if ((dst.regClass() == v1 && op.regClass() == v1) ||
|
||||
ctx.program->chip_class <= GFX7) {
|
||||
ctx.program->gfx_level <= GFX7) {
|
||||
assert(op.physReg().byte() == 0 && dst.physReg().byte() == 0);
|
||||
if (offset == (32 - bits) && op.regClass() != s1) {
|
||||
bld.vop2(signext ? aco_opcode::v_ashrrev_i32 : aco_opcode::v_lshrrev_b32, dst,
|
||||
|
@ -2171,13 +2171,13 @@ lower_to_hw_instr(Program* program)
|
|||
bld.sop2(aco_opcode::s_lshl_b32, dst, bld.def(s1, scc),
|
||||
Operand(dst.physReg(), s1), Operand::c32(offset));
|
||||
}
|
||||
} else if (dst.regClass() == v1 || ctx.program->chip_class <= GFX7) {
|
||||
} else if (dst.regClass() == v1 || ctx.program->gfx_level <= GFX7) {
|
||||
if (offset == (dst.bytes() * 8u - bits)) {
|
||||
bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op);
|
||||
} else if (offset == 0) {
|
||||
bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits));
|
||||
} else if (program->chip_class >= GFX9 ||
|
||||
(op.regClass() != s1 && program->chip_class >= GFX8)) {
|
||||
} else if (program->gfx_level >= GFX9 ||
|
||||
(op.regClass() != s1 && program->gfx_level >= GFX8)) {
|
||||
bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op).instr->sdwa().dst_sel =
|
||||
SubdwordSel(bits / 8, offset / 8, false);
|
||||
} else {
|
||||
|
@ -2237,7 +2237,7 @@ lower_to_hw_instr(Program* program)
|
|||
} else if (inst->isVALU() || inst->isVINTRP()) {
|
||||
num_vector++;
|
||||
/* VALU which writes SGPRs are always executed on GFX10+ */
|
||||
if (ctx.program->chip_class >= GFX10) {
|
||||
if (ctx.program->gfx_level >= GFX10) {
|
||||
for (Definition& def : inst->definitions) {
|
||||
if (def.regClass().type() == RegType::sgpr)
|
||||
num_scalar++;
|
||||
|
@ -2259,7 +2259,7 @@ lower_to_hw_instr(Program* program)
|
|||
|
||||
/* Under these conditions, we shouldn't remove the branch */
|
||||
unsigned est_cycles;
|
||||
if (ctx.program->chip_class >= GFX10)
|
||||
if (ctx.program->gfx_level >= GFX10)
|
||||
est_cycles = num_scalar * 2 + num_vector;
|
||||
else
|
||||
est_cycles = num_scalar * 4 + num_vector * 4;
|
||||
|
|
|
@ -193,15 +193,15 @@ struct ssa_info {
|
|||
|
||||
bool is_vec() { return label & label_vec; }
|
||||
|
||||
void set_constant(chip_class chip, uint64_t constant)
|
||||
void set_constant(amd_gfx_level gfx_level, uint64_t constant)
|
||||
{
|
||||
Operand op16 = Operand::c16(constant);
|
||||
Operand op32 = Operand::get_const(chip, constant, 4);
|
||||
Operand op32 = Operand::get_const(gfx_level, constant, 4);
|
||||
add_label(label_literal);
|
||||
val = constant;
|
||||
|
||||
/* check that no upper bits are lost in case of packed 16bit constants */
|
||||
if (chip >= GFX8 && !op16.isLiteral() && op16.constantValue64() == constant)
|
||||
if (gfx_level >= GFX8 && !op16.isLiteral() && op16.constantValue64() == constant)
|
||||
add_label(label_constant_16bit);
|
||||
|
||||
if (!op32.isLiteral())
|
||||
|
@ -515,7 +515,7 @@ can_use_VOP3(opt_ctx& ctx, const aco_ptr<Instruction>& instr)
|
|||
if (instr->isVOP3P())
|
||||
return false;
|
||||
|
||||
if (instr->operands.size() && instr->operands[0].isLiteral() && ctx.program->chip_class < GFX10)
|
||||
if (instr->operands.size() && instr->operands[0].isLiteral() && ctx.program->gfx_level < GFX10)
|
||||
return false;
|
||||
|
||||
if (instr->isDPP() || instr->isSDWA())
|
||||
|
@ -546,7 +546,7 @@ pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsi
|
|||
return false;
|
||||
|
||||
bool can_accept_sgpr =
|
||||
ctx.program->chip_class >= GFX9 ||
|
||||
ctx.program->gfx_level >= GFX9 ||
|
||||
std::none_of(instr->definitions.begin(), instr->definitions.end(),
|
||||
[](const Definition& def) { return def.regClass().is_subdword(); });
|
||||
|
||||
|
@ -597,7 +597,7 @@ pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsi
|
|||
bool
|
||||
can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
if (instr->isSDWA() && ctx.program->chip_class < GFX9)
|
||||
if (instr->isSDWA() && ctx.program->gfx_level < GFX9)
|
||||
return false;
|
||||
return instr->opcode != aco_opcode::v_readfirstlane_b32 &&
|
||||
instr->opcode != aco_opcode::v_readlane_b32 &&
|
||||
|
@ -642,7 +642,7 @@ is_operand_vgpr(Operand op)
|
|||
void
|
||||
to_SDWA(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
aco_ptr<Instruction> tmp = convert_to_SDWA(ctx.program->chip_class, instr);
|
||||
aco_ptr<Instruction> tmp = convert_to_SDWA(ctx.program->gfx_level, instr);
|
||||
if (!tmp)
|
||||
return;
|
||||
|
||||
|
@ -695,7 +695,7 @@ valu_can_accept_vgpr(aco_ptr<Instruction>& instr, unsigned operand)
|
|||
bool
|
||||
check_vop3_operands(opt_ctx& ctx, unsigned num_operands, Operand* operands)
|
||||
{
|
||||
int limit = ctx.program->chip_class >= GFX10 ? 2 : 1;
|
||||
int limit = ctx.program->gfx_level >= GFX10 ? 2 : 1;
|
||||
Operand literal32(s1);
|
||||
Operand literal64(s2);
|
||||
unsigned num_sgprs = 0;
|
||||
|
@ -714,7 +714,7 @@ check_vop3_operands(opt_ctx& ctx, unsigned num_operands, Operand* operands)
|
|||
return false;
|
||||
}
|
||||
} else if (op.isLiteral()) {
|
||||
if (ctx.program->chip_class < GFX10)
|
||||
if (ctx.program->gfx_level < GFX10)
|
||||
return false;
|
||||
|
||||
if (!literal32.isUndefined() && literal32.constantValue() != op.constantValue())
|
||||
|
@ -834,12 +834,12 @@ smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
uint32_t offset;
|
||||
bool prevent_overflow = smem.operands[0].size() > 2 || smem.prevent_overflow;
|
||||
if (info.is_constant_or_literal(32) &&
|
||||
((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) ||
|
||||
(ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) ||
|
||||
(ctx.program->chip_class >= GFX8 && info.val <= 0xFFFFF))) {
|
||||
((ctx.program->gfx_level == GFX6 && info.val <= 0x3FF) ||
|
||||
(ctx.program->gfx_level == GFX7 && info.val <= 0xFFFFFFFF) ||
|
||||
(ctx.program->gfx_level >= GFX8 && info.val <= 0xFFFFF))) {
|
||||
instr->operands[1] = Operand::c32(info.val);
|
||||
} else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, prevent_overflow) &&
|
||||
base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9 &&
|
||||
base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->gfx_level >= GFX9 &&
|
||||
offset % 4u == 0) {
|
||||
bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4);
|
||||
if (soe) {
|
||||
|
@ -895,7 +895,7 @@ get_constant_op(opt_ctx& ctx, ssa_info info, uint32_t bits)
|
|||
{
|
||||
if (bits == 64)
|
||||
return Operand::c32_or_c64(info.val, true);
|
||||
return Operand::get_const(ctx.program->chip_class, info.val, bits / 8u);
|
||||
return Operand::get_const(ctx.program->gfx_level, info.val, bits / 8u);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1023,13 +1023,13 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
|
|||
return true;
|
||||
} else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel.size() == 1 && !sel.sign_extend()) {
|
||||
return true;
|
||||
} else if (can_use_SDWA(ctx.program->chip_class, instr, true) &&
|
||||
(tmp.type() == RegType::vgpr || ctx.program->chip_class >= GFX9)) {
|
||||
} else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
|
||||
(tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
|
||||
if (instr->isSDWA() && instr->sdwa().sel[idx] != SubdwordSel::dword)
|
||||
return false;
|
||||
return true;
|
||||
} else if (instr->isVOP3() && sel.size() == 2 &&
|
||||
can_use_opsel(ctx.program->chip_class, instr->opcode, idx) &&
|
||||
can_use_opsel(ctx.program->gfx_level, instr->opcode, idx) &&
|
||||
!(instr->vop3().opsel & (1 << idx))) {
|
||||
return true;
|
||||
} else if (instr->opcode == aco_opcode::p_extract) {
|
||||
|
@ -1079,8 +1079,8 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
|
|||
(sel.size() == 1 && instr->operands[0].constantValue() >= 24u))) {
|
||||
/* The undesireable upper bits are already shifted out. */
|
||||
return;
|
||||
} else if (can_use_SDWA(ctx.program->chip_class, instr, true) &&
|
||||
(tmp.type() == RegType::vgpr || ctx.program->chip_class >= GFX9)) {
|
||||
} else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
|
||||
(tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
|
||||
to_SDWA(ctx, instr);
|
||||
static_cast<SDWA_instruction*>(instr.get())->sel[idx] = sel;
|
||||
} else if (instr->isVOP3()) {
|
||||
|
@ -1126,7 +1126,7 @@ check_sdwa_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
bool
|
||||
does_fp_op_flush_denorms(opt_ctx& ctx, aco_opcode op)
|
||||
{
|
||||
if (ctx.program->chip_class <= GFX8) {
|
||||
if (ctx.program->gfx_level <= GFX8) {
|
||||
switch (op) {
|
||||
case aco_opcode::v_min_f32:
|
||||
case aco_opcode::v_max_f32:
|
||||
|
@ -1318,7 +1318,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
}
|
||||
|
||||
if (info.is_constant(bits) && alu_can_accept_constant(instr->opcode, i) &&
|
||||
(!instr->isSDWA() || ctx.program->chip_class >= GFX9)) {
|
||||
(!instr->isSDWA() || ctx.program->gfx_level >= GFX9)) {
|
||||
Operand op = get_constant_op(ctx, info, bits);
|
||||
perfwarn(ctx.program, instr->opcode == aco_opcode::v_cndmask_b32 && i == 2,
|
||||
"v_cndmask_b32 with a constant selector", instr.get());
|
||||
|
@ -1353,7 +1353,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
* scratch accesses and other accesses and swizzling changing how
|
||||
* addressing works significantly, this probably applies to swizzled
|
||||
* MUBUF accesses. */
|
||||
bool vaddr_prevent_overflow = mubuf.swizzled && ctx.program->chip_class < GFX9;
|
||||
bool vaddr_prevent_overflow = mubuf.swizzled && ctx.program->gfx_level < GFX9;
|
||||
|
||||
if (mubuf.offen && i == 1 && info.is_constant_or_literal(32) &&
|
||||
mubuf.offset + info.val < 4096) {
|
||||
|
@ -1388,7 +1388,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
DS_instruction& ds = instr->ds();
|
||||
Temp base;
|
||||
uint32_t offset;
|
||||
bool has_usable_ds_offset = ctx.program->chip_class >= GFX7;
|
||||
bool has_usable_ds_offset = ctx.program->gfx_level >= GFX7;
|
||||
if (has_usable_ds_offset && i == 0 &&
|
||||
parse_base_offset(ctx, instr.get(), i, &base, &offset, false) &&
|
||||
base.regClass() == instr->operands[i].regClass() &&
|
||||
|
@ -1530,7 +1530,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
uint64_t val = info.val;
|
||||
for (Definition def : instr->definitions) {
|
||||
uint32_t mask = u_bit_consecutive(0, def.bytes() * 8u);
|
||||
ctx.info[def.tempId()].set_constant(ctx.program->chip_class, val & mask);
|
||||
ctx.info[def.tempId()].set_constant(ctx.program->gfx_level, val & mask);
|
||||
val >>= def.bytes() * 8u;
|
||||
}
|
||||
break;
|
||||
|
@ -1562,7 +1562,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
|
||||
Operand vec_op = vec->operands[vec_index];
|
||||
if (vec_op.isConstant()) {
|
||||
ctx.info[instr->definitions[i].tempId()].set_constant(ctx.program->chip_class,
|
||||
ctx.info[instr->definitions[i].tempId()].set_constant(ctx.program->gfx_level,
|
||||
vec_op.constantValue64());
|
||||
} else if (vec_op.isUndefined()) {
|
||||
ctx.info[instr->definitions[i].tempId()].set_undefined();
|
||||
|
@ -1598,7 +1598,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
uint32_t mask = u_bit_consecutive(0, instr->definitions[0].bytes() * 8u);
|
||||
uint32_t val = (info.val >> (dst_offset * 8u)) & mask;
|
||||
instr->operands[0] =
|
||||
Operand::get_const(ctx.program->chip_class, val, instr->definitions[0].bytes());
|
||||
Operand::get_const(ctx.program->gfx_level, val, instr->definitions[0].bytes());
|
||||
;
|
||||
}
|
||||
|
||||
|
@ -1648,7 +1648,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
// TODO
|
||||
} else if (instr->operands[0].isConstant()) {
|
||||
ctx.info[instr->definitions[0].tempId()].set_constant(
|
||||
ctx.program->chip_class, instr->operands[0].constantValue64());
|
||||
ctx.program->gfx_level, instr->operands[0].constantValue64());
|
||||
} else if (instr->operands[0].isTemp()) {
|
||||
ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
|
||||
if (ctx.info[instr->operands[0].tempId()].is_canonicalized())
|
||||
|
@ -1668,7 +1668,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
break;
|
||||
case aco_opcode::p_is_helper:
|
||||
if (!ctx.program->needs_wqm)
|
||||
ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, 0u);
|
||||
ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->gfx_level, 0u);
|
||||
break;
|
||||
case aco_opcode::v_mul_f64: ctx.info[instr->definitions[0].tempId()].set_mul(instr.get()); break;
|
||||
case aco_opcode::v_mul_f16:
|
||||
|
@ -1718,7 +1718,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
(!(fp16 ? ctx.fp_mode.preserve_signed_zero_inf_nan16_64
|
||||
: ctx.fp_mode.preserve_signed_zero_inf_nan32) ||
|
||||
instr->opcode == aco_opcode::v_mul_legacy_f32)) { /* 0.0 */
|
||||
ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, 0u);
|
||||
ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->gfx_level, 0u);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
@ -2048,7 +2048,7 @@ combine_ordering_test(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (op[1].type() == RegType::sgpr)
|
||||
std::swap(op[0], op[1]);
|
||||
unsigned num_sgprs = (op[0].type() == RegType::sgpr) + (op[1].type() == RegType::sgpr);
|
||||
if (num_sgprs > (ctx.program->chip_class >= GFX10 ? 2 : 1))
|
||||
if (num_sgprs > (ctx.program->gfx_level >= GFX10 ? 2 : 1))
|
||||
return false;
|
||||
|
||||
ctx.uses[op[0].id()]++;
|
||||
|
@ -2720,7 +2720,7 @@ combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode new_op
|
|||
if (instr->operands[!i].isTemp() &&
|
||||
instr->operands[!i].getTemp().type() == RegType::vgpr) {
|
||||
new_instr.reset(create_instruction<VOP2_instruction>(new_op, Format::VOP2, 3, 2));
|
||||
} else if (ctx.program->chip_class >= GFX10 ||
|
||||
} else if (ctx.program->gfx_level >= GFX10 ||
|
||||
(instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
|
||||
new_instr.reset(
|
||||
create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOP2), 3, 2));
|
||||
|
@ -2959,7 +2959,7 @@ apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
operand_mask |= 1u << i;
|
||||
}
|
||||
unsigned max_sgprs = 1;
|
||||
if (ctx.program->chip_class >= GFX10 && !is_shift64)
|
||||
if (ctx.program->gfx_level >= GFX10 && !is_shift64)
|
||||
max_sgprs = 2;
|
||||
if (has_literal)
|
||||
max_sgprs--;
|
||||
|
@ -3066,7 +3066,7 @@ apply_omod_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
return false;
|
||||
|
||||
/* omod flushes -0 to +0 and has no effect if denormals are enabled. SDWA omod is GFX9+. */
|
||||
bool can_use_omod = (can_vop3 || ctx.program->chip_class >= GFX9) && !instr->isVOP3P();
|
||||
bool can_use_omod = (can_vop3 || ctx.program->gfx_level >= GFX9) && !instr->isVOP3P();
|
||||
if (instr->definitions[0].bytes() == 4)
|
||||
can_use_omod =
|
||||
can_use_omod && ctx.fp_mode.denorm32 == 0 && !ctx.fp_mode.preserve_signed_zero_inf_nan32;
|
||||
|
@ -3133,13 +3133,13 @@ apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
assert(sel);
|
||||
|
||||
if (instr->isVOP3() && sel.size() == 2 && !sel.sign_extend() &&
|
||||
can_use_opsel(ctx.program->chip_class, instr->opcode, -1)) {
|
||||
can_use_opsel(ctx.program->gfx_level, instr->opcode, -1)) {
|
||||
if (instr->vop3().opsel & (1 << 3))
|
||||
return false;
|
||||
if (sel.offset())
|
||||
instr->vop3().opsel |= 1 << 3;
|
||||
} else {
|
||||
if (!can_use_SDWA(ctx.program->chip_class, instr, true))
|
||||
if (!can_use_SDWA(ctx.program->gfx_level, instr, true))
|
||||
return false;
|
||||
|
||||
to_SDWA(ctx, instr);
|
||||
|
@ -3224,7 +3224,7 @@ combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
instr->operands[!i].getTemp().type() == RegType::vgpr) {
|
||||
new_instr.reset(
|
||||
create_instruction<VOP2_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1));
|
||||
} else if (ctx.program->chip_class >= GFX10 ||
|
||||
} else if (ctx.program->gfx_level >= GFX10 ||
|
||||
(instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
|
||||
new_instr.reset(create_instruction<VOP3_instruction>(aco_opcode::v_cndmask_b32,
|
||||
asVOP3(Format::VOP2), 3, 1));
|
||||
|
@ -3484,11 +3484,11 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
bool
|
||||
can_use_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
if (ctx.program->chip_class < GFX9)
|
||||
if (ctx.program->gfx_level < GFX9)
|
||||
return false;
|
||||
|
||||
/* v_mad_mix* on GFX9 always flushes denormals for 16-bit inputs/outputs */
|
||||
if (ctx.program->chip_class == GFX9 && ctx.fp_mode.denorm16_64)
|
||||
if (ctx.program->gfx_level == GFX9 && ctx.fp_mode.denorm16_64)
|
||||
return false;
|
||||
|
||||
switch (instr->opcode) {
|
||||
|
@ -3808,12 +3808,12 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
bool legacy = info.instr->opcode == aco_opcode::v_mul_legacy_f32;
|
||||
bool mad_mix = is_add_mix || info.instr->isVOP3P();
|
||||
|
||||
bool has_fma = mad16 || mad64 || (legacy && ctx.program->chip_class >= GFX10_3) ||
|
||||
bool has_fma = mad16 || mad64 || (legacy && ctx.program->gfx_level >= GFX10_3) ||
|
||||
(mad32 && !legacy && !mad_mix && ctx.program->dev.has_fast_fma32) ||
|
||||
(mad_mix && ctx.program->dev.fused_mad_mix);
|
||||
bool has_mad = mad_mix ? !ctx.program->dev.fused_mad_mix
|
||||
: ((mad32 && ctx.program->chip_class < GFX10_3) ||
|
||||
(mad16 && ctx.program->chip_class <= GFX9));
|
||||
: ((mad32 && ctx.program->gfx_level < GFX10_3) ||
|
||||
(mad16 && ctx.program->gfx_level <= GFX9));
|
||||
bool can_use_fma = has_fma && !info.instr->definitions[0].isPrecise() &&
|
||||
!instr->definitions[0].isPrecise();
|
||||
bool can_use_mad =
|
||||
|
@ -3938,13 +3938,13 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
} else {
|
||||
aco_opcode mad_op = emit_fma ? aco_opcode::v_fma_f32 : aco_opcode::v_mad_f32;
|
||||
if (mul_instr->opcode == aco_opcode::v_mul_legacy_f32) {
|
||||
assert(emit_fma == (ctx.program->chip_class >= GFX10_3));
|
||||
assert(emit_fma == (ctx.program->gfx_level >= GFX10_3));
|
||||
mad_op = emit_fma ? aco_opcode::v_fma_legacy_f32 : aco_opcode::v_mad_legacy_f32;
|
||||
} else if (mad16) {
|
||||
mad_op = emit_fma ? (ctx.program->chip_class == GFX8 ? aco_opcode::v_fma_legacy_f16
|
||||
: aco_opcode::v_fma_f16)
|
||||
: (ctx.program->chip_class == GFX8 ? aco_opcode::v_mad_legacy_f16
|
||||
: aco_opcode::v_mad_f16);
|
||||
mad_op = emit_fma ? (ctx.program->gfx_level == GFX8 ? aco_opcode::v_fma_legacy_f16
|
||||
: aco_opcode::v_fma_f16)
|
||||
: (ctx.program->gfx_level == GFX8 ? aco_opcode::v_mad_legacy_f16
|
||||
: aco_opcode::v_mad_f16);
|
||||
} else if (mad64) {
|
||||
mad_op = aco_opcode::v_fma_f64;
|
||||
}
|
||||
|
@ -3992,14 +3992,14 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
return;
|
||||
}
|
||||
}
|
||||
} else if (instr->opcode == aco_opcode::v_or_b32 && ctx.program->chip_class >= GFX9) {
|
||||
} else if (instr->opcode == aco_opcode::v_or_b32 && ctx.program->gfx_level >= GFX9) {
|
||||
if (combine_three_valu_op(ctx, instr, aco_opcode::s_or_b32, aco_opcode::v_or3_b32, "012",
|
||||
1 | 2)) {
|
||||
} else if (combine_three_valu_op(ctx, instr, aco_opcode::v_or_b32, aco_opcode::v_or3_b32,
|
||||
"012", 1 | 2)) {
|
||||
} else if (combine_add_or_then_and_lshl(ctx, instr)) {
|
||||
}
|
||||
} else if (instr->opcode == aco_opcode::v_xor_b32 && ctx.program->chip_class >= GFX10) {
|
||||
} else if (instr->opcode == aco_opcode::v_xor_b32 && ctx.program->gfx_level >= GFX10) {
|
||||
if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xor3_b32, "012",
|
||||
1 | 2)) {
|
||||
} else if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xor3_b32,
|
||||
|
@ -4008,7 +4008,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
} else if (instr->opcode == aco_opcode::v_add_u16) {
|
||||
combine_three_valu_op(
|
||||
ctx, instr, aco_opcode::v_mul_lo_u16,
|
||||
ctx.program->chip_class == GFX8 ? aco_opcode::v_mad_legacy_u16 : aco_opcode::v_mad_u16,
|
||||
ctx.program->gfx_level == GFX8 ? aco_opcode::v_mad_legacy_u16 : aco_opcode::v_mad_u16,
|
||||
"120", 1 | 2);
|
||||
} else if (instr->opcode == aco_opcode::v_add_u16_e64) {
|
||||
combine_three_valu_op(ctx, instr, aco_opcode::v_mul_lo_u16_e64, aco_opcode::v_mad_u16, "120",
|
||||
|
@ -4018,7 +4018,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
} else if (combine_add_bcnt(ctx, instr)) {
|
||||
} else if (combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24,
|
||||
aco_opcode::v_mad_u32_u24, "120", 1 | 2)) {
|
||||
} else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) {
|
||||
} else if (ctx.program->gfx_level >= GFX9 && !instr->usesModifiers()) {
|
||||
if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120",
|
||||
1 | 2)) {
|
||||
} else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32,
|
||||
|
@ -4052,11 +4052,11 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
instr->opcode == aco_opcode::v_subrev_co_u32 ||
|
||||
instr->opcode == aco_opcode::v_subrev_co_u32_e64) {
|
||||
combine_add_sub_b2i(ctx, instr, aco_opcode::v_subbrev_co_u32, 1);
|
||||
} else if (instr->opcode == aco_opcode::v_lshlrev_b32 && ctx.program->chip_class >= GFX9) {
|
||||
} else if (instr->opcode == aco_opcode::v_lshlrev_b32 && ctx.program->gfx_level >= GFX9) {
|
||||
combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add_lshl_u32, "120",
|
||||
2);
|
||||
} else if ((instr->opcode == aco_opcode::s_add_u32 || instr->opcode == aco_opcode::s_add_i32) &&
|
||||
ctx.program->chip_class >= GFX9) {
|
||||
ctx.program->gfx_level >= GFX9) {
|
||||
combine_salu_lshl_add(ctx, instr);
|
||||
} else if (instr->opcode == aco_opcode::s_not_b32 || instr->opcode == aco_opcode::s_not_b64) {
|
||||
combine_salu_not_bitwise(ctx, instr);
|
||||
|
@ -4080,7 +4080,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
aco_opcode min, max, min3, max3, med3;
|
||||
bool some_gfx9_only;
|
||||
if (get_minmax_info(instr->opcode, &min, &max, &min3, &max3, &med3, &some_gfx9_only) &&
|
||||
(!some_gfx9_only || ctx.program->chip_class >= GFX9)) {
|
||||
(!some_gfx9_only || ctx.program->gfx_level >= GFX9)) {
|
||||
if (combine_minmax(ctx, instr, instr->opcode == min ? max : min,
|
||||
instr->opcode == min ? min3 : max3)) {
|
||||
} else {
|
||||
|
@ -4236,7 +4236,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
instr->opcode != aco_opcode::v_fma_legacy_f32) {
|
||||
/* FMA can only take literals on GFX10+ */
|
||||
if ((instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) &&
|
||||
ctx.program->chip_class < GFX10)
|
||||
ctx.program->gfx_level < GFX10)
|
||||
return;
|
||||
/* There are no v_fmaak_legacy_f16/v_fmamk_legacy_f16 and on chips where VOP3 can take
|
||||
* literals (GFX10+), these instructions don't exist.
|
||||
|
@ -4261,7 +4261,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
/* Encoding limitations requires a VGPR operand. The constant bus limitations before
|
||||
* GFX10 disallows SGPRs.
|
||||
*/
|
||||
if ((!has_sgpr || ctx.program->chip_class >= GFX10) && has_vgpr) {
|
||||
if ((!has_sgpr || ctx.program->gfx_level >= GFX10) && has_vgpr) {
|
||||
literal_idx = 2;
|
||||
literal_uses = ctx.uses[instr->operands[2].tempId()];
|
||||
}
|
||||
|
@ -4275,7 +4275,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
continue;
|
||||
|
||||
/* The constant bus limitations before GFX10 disallows SGPRs. */
|
||||
if (ctx.program->chip_class < GFX10 && instr->operands[!i].isTemp() &&
|
||||
if (ctx.program->gfx_level < GFX10 && instr->operands[!i].isTemp() &&
|
||||
instr->operands[!i].getTemp().type() == RegType::sgpr)
|
||||
continue;
|
||||
|
||||
|
@ -4385,8 +4385,8 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
}
|
||||
}
|
||||
|
||||
if (instr->isSDWA() || (instr->isVOP3() && ctx.program->chip_class < GFX10) ||
|
||||
(instr->isVOP3P() && ctx.program->chip_class < GFX10))
|
||||
if (instr->isSDWA() || (instr->isVOP3() && ctx.program->gfx_level < GFX10) ||
|
||||
(instr->isVOP3P() && ctx.program->gfx_level < GFX10))
|
||||
return; /* some encodings can't ever take literals */
|
||||
|
||||
/* we do not apply the literals yet as we don't know if it is profitable */
|
||||
|
@ -4397,7 +4397,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
Operand literal(s1);
|
||||
unsigned num_operands = 1;
|
||||
if (instr->isSALU() ||
|
||||
(ctx.program->chip_class >= GFX10 && (can_use_VOP3(ctx, instr) || instr->isVOP3P())))
|
||||
(ctx.program->gfx_level >= GFX10 && (can_use_VOP3(ctx, instr) || instr->isVOP3P())))
|
||||
num_operands = instr->operands.size();
|
||||
/* catch VOP2 with a 3rd SGPR operand (e.g. v_cndmask_b32, v_addc_co_u32) */
|
||||
else if (instr->isVALU() && instr->operands.size() >= 3)
|
||||
|
@ -4442,7 +4442,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
instr->opcode == aco_opcode::v_lshrrev_b64 ||
|
||||
instr->opcode == aco_opcode::v_ashrrev_i64;
|
||||
unsigned const_bus_limit = instr->isVALU() ? 1 : UINT32_MAX;
|
||||
if (ctx.program->chip_class >= GFX10 && !is_shift64)
|
||||
if (ctx.program->gfx_level >= GFX10 && !is_shift64)
|
||||
const_bus_limit = 2;
|
||||
|
||||
unsigned num_sgprs = !!sgpr_ids[0] + !!sgpr_ids[1];
|
||||
|
|
|
@ -194,7 +194,7 @@ try_apply_branch_vcc(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
*/
|
||||
|
||||
/* Don't try to optimize this on GFX6-7 because SMEM may corrupt the vccz bit. */
|
||||
if (ctx.program->chip_class < GFX8)
|
||||
if (ctx.program->gfx_level < GFX8)
|
||||
return;
|
||||
|
||||
if (instr->format != Format::PSEUDO_BRANCH || instr->operands.size() == 0 ||
|
||||
|
|
|
@ -100,9 +100,9 @@ print_constant_data(FILE* output, Program* program)
|
|||
* Determines the GPU type to use for CLRXdisasm
|
||||
*/
|
||||
const char*
|
||||
to_clrx_device_name(chip_class cc, radeon_family family)
|
||||
to_clrx_device_name(amd_gfx_level gfx_level, radeon_family family)
|
||||
{
|
||||
switch (cc) {
|
||||
switch (gfx_level) {
|
||||
case GFX6:
|
||||
switch (family) {
|
||||
case CHIP_TAHITI: return "tahiti";
|
||||
|
@ -182,7 +182,7 @@ print_asm_clrx(Program* program, std::vector<uint32_t>& binary, unsigned exec_si
|
|||
FILE* p;
|
||||
int fd;
|
||||
|
||||
const char* gpu_type = to_clrx_device_name(program->chip_class, program->family);
|
||||
const char* gpu_type = to_clrx_device_name(program->gfx_level, program->family);
|
||||
|
||||
/* Dump the binary into a temporary file. */
|
||||
fd = mkstemp(path);
|
||||
|
@ -268,14 +268,14 @@ fail:
|
|||
|
||||
#ifdef LLVM_AVAILABLE
|
||||
std::pair<bool, size_t>
|
||||
disasm_instr(chip_class chip, LLVMDisasmContextRef disasm, uint32_t* binary, unsigned exec_size,
|
||||
size_t pos, char* outline, unsigned outline_size)
|
||||
disasm_instr(amd_gfx_level gfx_level, LLVMDisasmContextRef disasm, uint32_t* binary,
|
||||
unsigned exec_size, size_t pos, char* outline, unsigned outline_size)
|
||||
{
|
||||
size_t l =
|
||||
LLVMDisasmInstruction(disasm, (uint8_t*)&binary[pos], (exec_size - pos) * sizeof(uint32_t),
|
||||
pos * 4, outline, outline_size);
|
||||
|
||||
if (chip >= GFX10 && l == 8 && ((binary[pos] & 0xffff0000) == 0xd7610000) &&
|
||||
if (gfx_level >= GFX10 && l == 8 && ((binary[pos] & 0xffff0000) == 0xd7610000) &&
|
||||
((binary[pos + 1] & 0x1ff) == 0xff)) {
|
||||
/* v_writelane with literal uses 3 dwords but llvm consumes only 2 */
|
||||
l += 4;
|
||||
|
@ -284,16 +284,19 @@ disasm_instr(chip_class chip, LLVMDisasmContextRef disasm, uint32_t* binary, uns
|
|||
bool invalid = false;
|
||||
size_t size;
|
||||
if (!l &&
|
||||
((chip >= GFX9 && (binary[pos] & 0xffff8000) == 0xd1348000) || /* v_add_u32_e64 + clamp */
|
||||
(chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd7038000) || /* v_add_u16_e64 + clamp */
|
||||
(chip <= GFX9 && (binary[pos] & 0xffff8000) == 0xd1268000) || /* v_add_u16_e64 + clamp */
|
||||
(chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd76d8000) || /* v_add3_u32 + clamp */
|
||||
(chip == GFX9 && (binary[pos] & 0xffff8000) == 0xd1ff8000)) /* v_add3_u32 + clamp */) {
|
||||
((gfx_level >= GFX9 &&
|
||||
(binary[pos] & 0xffff8000) == 0xd1348000) || /* v_add_u32_e64 + clamp */
|
||||
(gfx_level >= GFX10 &&
|
||||
(binary[pos] & 0xffff8000) == 0xd7038000) || /* v_add_u16_e64 + clamp */
|
||||
(gfx_level <= GFX9 &&
|
||||
(binary[pos] & 0xffff8000) == 0xd1268000) || /* v_add_u16_e64 + clamp */
|
||||
(gfx_level >= GFX10 && (binary[pos] & 0xffff8000) == 0xd76d8000) || /* v_add3_u32 + clamp */
|
||||
(gfx_level == GFX9 && (binary[pos] & 0xffff8000) == 0xd1ff8000)) /* v_add3_u32 + clamp */) {
|
||||
strcpy(outline, "\tinteger addition + clamp");
|
||||
bool has_literal = chip >= GFX10 && (((binary[pos + 1] & 0x1ff) == 0xff) ||
|
||||
(((binary[pos + 1] >> 9) & 0x1ff) == 0xff));
|
||||
bool has_literal = gfx_level >= GFX10 && (((binary[pos + 1] & 0x1ff) == 0xff) ||
|
||||
(((binary[pos + 1] >> 9) & 0x1ff) == 0xff));
|
||||
size = 2 + has_literal;
|
||||
} else if (chip >= GFX10 && l == 4 && ((binary[pos] & 0xfe0001ff) == 0x020000f9)) {
|
||||
} else if (gfx_level >= GFX10 && l == 4 && ((binary[pos] & 0xfe0001ff) == 0x020000f9)) {
|
||||
strcpy(outline, "\tv_cndmask_b32 + sdwa");
|
||||
size = 2;
|
||||
} else if (!l) {
|
||||
|
@ -308,11 +311,11 @@ disasm_instr(chip_class chip, LLVMDisasmContextRef disasm, uint32_t* binary, uns
|
|||
/* See: https://github.com/GPUOpen-Tools/radeon_gpu_profiler/issues/65 and
|
||||
* https://github.com/llvm/llvm-project/issues/38652
|
||||
*/
|
||||
if (chip == GFX9 && (binary[pos] & 0xfc024000) == 0xc0024000) {
|
||||
if (gfx_level == GFX9 && (binary[pos] & 0xfc024000) == 0xc0024000) {
|
||||
/* SMEM with IMM=1 and SOE=1: LLVM ignores SOFFSET */
|
||||
size_t len = strlen(outline);
|
||||
snprintf(outline + len, outline_size - len, ", s%u", binary[pos + 1] >> 25);
|
||||
} else if (chip >= GFX10 && (binary[pos] & 0xfc000000) == 0xf4000000 &&
|
||||
} else if (gfx_level >= GFX10 && (binary[pos] & 0xfc000000) == 0xf4000000 &&
|
||||
(binary[pos + 1] & 0xfe000000) != 0xfa000000) {
|
||||
/* SMEM non-NULL SOFFSET: LLVM ignores OFFSET */
|
||||
uint32_t offset = binary[pos + 1] & 0x1fffff;
|
||||
|
@ -344,7 +347,7 @@ print_asm_llvm(Program* program, std::vector<uint32_t>& binary, unsigned exec_si
|
|||
}
|
||||
|
||||
const char* features = "";
|
||||
if (program->chip_class >= GFX10 && program->wave_size == 64) {
|
||||
if (program->gfx_level >= GFX10 && program->wave_size == 64) {
|
||||
features = "+wavefrontsize64";
|
||||
}
|
||||
|
||||
|
@ -376,7 +379,7 @@ print_asm_llvm(Program* program, std::vector<uint32_t>& binary, unsigned exec_si
|
|||
print_block_markers(output, program, referenced_blocks, &next_block, pos);
|
||||
|
||||
char outline[1024];
|
||||
std::pair<bool, size_t> res = disasm_instr(program->chip_class, disasm, binary.data(),
|
||||
std::pair<bool, size_t> res = disasm_instr(program->gfx_level, disasm, binary.data(),
|
||||
exec_size, pos, outline, sizeof(outline));
|
||||
invalid |= res.first;
|
||||
|
||||
|
@ -402,7 +405,7 @@ bool
|
|||
check_print_asm_support(Program* program)
|
||||
{
|
||||
#ifdef LLVM_AVAILABLE
|
||||
if (program->chip_class >= GFX8) {
|
||||
if (program->gfx_level >= GFX8) {
|
||||
/* LLVM disassembler only supports GFX8+ */
|
||||
return true;
|
||||
}
|
||||
|
@ -410,7 +413,7 @@ check_print_asm_support(Program* program)
|
|||
|
||||
#ifndef _WIN32
|
||||
/* Check if CLRX disassembler binary is available and can disassemble the program */
|
||||
return to_clrx_device_name(program->chip_class, program->family) &&
|
||||
return to_clrx_device_name(program->gfx_level, program->family) &&
|
||||
system("clrxdisasm --version") == 0;
|
||||
#else
|
||||
return false;
|
||||
|
@ -422,7 +425,7 @@ bool
|
|||
print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)
|
||||
{
|
||||
#ifdef LLVM_AVAILABLE
|
||||
if (program->chip_class >= GFX8) {
|
||||
if (program->gfx_level >= GFX8) {
|
||||
return print_asm_llvm(program, binary, exec_size, output);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -284,7 +284,7 @@ print_instr_format_specific(const Instruction* instr, FILE* output)
|
|||
uint16_t imm = instr->sopp().imm;
|
||||
switch (instr->opcode) {
|
||||
case aco_opcode::s_waitcnt: {
|
||||
/* we usually should check the chip class for vmcnt/lgkm, but
|
||||
/* we usually should check the gfx level for vmcnt/lgkm, but
|
||||
* insert_waitcnt() should fill it in regardless. */
|
||||
unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10);
|
||||
if (vmcnt != 63)
|
||||
|
|
|
@ -130,11 +130,11 @@ setup_reduce_temp(Program* program)
|
|||
op == imul16 || op == imax16 || op == imin16 || op == umin16 ||
|
||||
op == iadd64;
|
||||
|
||||
if (program->chip_class >= GFX10 && cluster_size == 64)
|
||||
if (program->gfx_level >= GFX10 && cluster_size == 64)
|
||||
need_vtmp = true;
|
||||
if (program->chip_class >= GFX10 && gfx10_need_vtmp)
|
||||
if (program->gfx_level >= GFX10 && gfx10_need_vtmp)
|
||||
need_vtmp = true;
|
||||
if (program->chip_class <= GFX7)
|
||||
if (program->gfx_level <= GFX7)
|
||||
need_vtmp = true;
|
||||
|
||||
need_vtmp |= cluster_size == 32;
|
||||
|
|
|
@ -37,7 +37,7 @@ namespace {
|
|||
|
||||
struct ra_ctx;
|
||||
|
||||
unsigned get_subdword_operand_stride(chip_class chip, const aco_ptr<Instruction>& instr,
|
||||
unsigned get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
|
||||
unsigned idx, RegClass rc);
|
||||
void add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, unsigned byte,
|
||||
RegClass rc);
|
||||
|
@ -214,7 +214,7 @@ struct DefInfo {
|
|||
|
||||
if (rc.is_subdword() && operand >= 0) {
|
||||
/* stride in bytes */
|
||||
stride = get_subdword_operand_stride(ctx.program->chip_class, instr, operand, rc);
|
||||
stride = get_subdword_operand_stride(ctx.program->gfx_level, instr, operand, rc);
|
||||
} else if (rc.is_subdword()) {
|
||||
std::pair<unsigned, unsigned> info = get_subdword_definition_info(ctx.program, instr, rc);
|
||||
stride = info.first;
|
||||
|
@ -229,7 +229,7 @@ struct DefInfo {
|
|||
stride = DIV_ROUND_UP(stride, 4);
|
||||
}
|
||||
assert(stride > 0);
|
||||
} else if (instr->isMIMG() && instr->mimg().d16 && ctx.program->chip_class <= GFX9) {
|
||||
} else if (instr->isMIMG() && instr->mimg().d16 && ctx.program->gfx_level <= GFX9) {
|
||||
/* Workaround GFX9 hardware bug for D16 image instructions: FeatureImageGather4D16Bug
|
||||
*
|
||||
* The register use is not calculated correctly, and the hardware assumes a
|
||||
|
@ -239,7 +239,7 @@ struct DefInfo {
|
|||
* https://reviews.llvm.org/D81172
|
||||
*/
|
||||
bool imageGather4D16Bug = operand == -1 && rc == v2 && instr->mimg().dmask != 0xF;
|
||||
assert(ctx.program->chip_class == GFX9 && "Image D16 on GFX8 not supported.");
|
||||
assert(ctx.program->gfx_level == GFX9 && "Image D16 on GFX8 not supported.");
|
||||
|
||||
if (imageGather4D16Bug)
|
||||
bounds.size -= rc.bytes() / 4;
|
||||
|
@ -490,14 +490,14 @@ print_regs(ra_ctx& ctx, bool vgprs, RegisterFile& reg_file)
|
|||
}
|
||||
|
||||
unsigned
|
||||
get_subdword_operand_stride(chip_class chip, const aco_ptr<Instruction>& instr, unsigned idx,
|
||||
RegClass rc)
|
||||
get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
|
||||
unsigned idx, RegClass rc)
|
||||
{
|
||||
if (instr->isPseudo()) {
|
||||
/* v_readfirstlane_b32 cannot use SDWA */
|
||||
if (instr->opcode == aco_opcode::p_as_uniform)
|
||||
return 4;
|
||||
else if (chip >= GFX8)
|
||||
else if (gfx_level >= GFX8)
|
||||
return rc.bytes() % 2 == 0 ? 2 : 1;
|
||||
else
|
||||
return 4;
|
||||
|
@ -505,9 +505,9 @@ get_subdword_operand_stride(chip_class chip, const aco_ptr<Instruction>& instr,
|
|||
|
||||
assert(rc.bytes() <= 2);
|
||||
if (instr->isVALU()) {
|
||||
if (can_use_SDWA(chip, instr, false))
|
||||
if (can_use_SDWA(gfx_level, instr, false))
|
||||
return rc.bytes();
|
||||
if (can_use_opsel(chip, instr->opcode, idx))
|
||||
if (can_use_opsel(gfx_level, instr->opcode, idx))
|
||||
return 2;
|
||||
if (instr->format == Format::VOP3P)
|
||||
return 2;
|
||||
|
@ -516,7 +516,7 @@ get_subdword_operand_stride(chip_class chip, const aco_ptr<Instruction>& instr,
|
|||
switch (instr->opcode) {
|
||||
case aco_opcode::v_cvt_f32_ubyte0: return 1;
|
||||
case aco_opcode::ds_write_b8:
|
||||
case aco_opcode::ds_write_b16: return chip >= GFX9 ? 2 : 4;
|
||||
case aco_opcode::ds_write_b16: return gfx_level >= GFX9 ? 2 : 4;
|
||||
case aco_opcode::buffer_store_byte:
|
||||
case aco_opcode::buffer_store_short:
|
||||
case aco_opcode::buffer_store_format_d16_x:
|
||||
|
@ -525,7 +525,7 @@ get_subdword_operand_stride(chip_class chip, const aco_ptr<Instruction>& instr,
|
|||
case aco_opcode::scratch_store_byte:
|
||||
case aco_opcode::scratch_store_short:
|
||||
case aco_opcode::global_store_byte:
|
||||
case aco_opcode::global_store_short: return chip >= GFX9 ? 2 : 4;
|
||||
case aco_opcode::global_store_short: return gfx_level >= GFX9 ? 2 : 4;
|
||||
default: return 4;
|
||||
}
|
||||
}
|
||||
|
@ -534,7 +534,7 @@ void
|
|||
add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, unsigned byte,
|
||||
RegClass rc)
|
||||
{
|
||||
chip_class chip = ctx.program->chip_class;
|
||||
amd_gfx_level gfx_level = ctx.program->gfx_level;
|
||||
if (instr->isPseudo() || byte == 0)
|
||||
return;
|
||||
|
||||
|
@ -563,8 +563,8 @@ add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, uns
|
|||
}
|
||||
|
||||
/* use SDWA */
|
||||
assert(can_use_SDWA(chip, instr, false));
|
||||
convert_to_SDWA(chip, instr);
|
||||
assert(can_use_SDWA(gfx_level, instr, false));
|
||||
convert_to_SDWA(gfx_level, instr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -600,10 +600,10 @@ add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, uns
|
|||
std::pair<unsigned, unsigned>
|
||||
get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr, RegClass rc)
|
||||
{
|
||||
chip_class chip = program->chip_class;
|
||||
amd_gfx_level gfx_level = program->gfx_level;
|
||||
|
||||
if (instr->isPseudo()) {
|
||||
if (chip >= GFX8)
|
||||
if (gfx_level >= GFX8)
|
||||
return std::make_pair(rc.bytes() % 2 == 0 ? 2 : 1, rc.bytes());
|
||||
else
|
||||
return std::make_pair(4, rc.size() * 4u);
|
||||
|
@ -612,16 +612,16 @@ get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr
|
|||
if (instr->isVALU() || instr->isVINTRP()) {
|
||||
assert(rc.bytes() <= 2);
|
||||
|
||||
if (can_use_SDWA(chip, instr, false))
|
||||
if (can_use_SDWA(gfx_level, instr, false))
|
||||
return std::make_pair(rc.bytes(), rc.bytes());
|
||||
|
||||
unsigned bytes_written = 4u;
|
||||
if (instr_is_16bit(chip, instr->opcode))
|
||||
if (instr_is_16bit(gfx_level, instr->opcode))
|
||||
bytes_written = 2u;
|
||||
|
||||
unsigned stride = 4u;
|
||||
if (instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
|
||||
can_use_opsel(chip, instr->opcode, -1))
|
||||
can_use_opsel(gfx_level, instr->opcode, -1))
|
||||
stride = 2u;
|
||||
|
||||
return std::make_pair(stride, bytes_written);
|
||||
|
@ -645,7 +645,7 @@ get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr
|
|||
case aco_opcode::buffer_load_sbyte_d16:
|
||||
case aco_opcode::buffer_load_short_d16:
|
||||
case aco_opcode::buffer_load_format_d16_x: {
|
||||
assert(chip >= GFX9);
|
||||
assert(gfx_level >= GFX9);
|
||||
if (!program->dev.sram_ecc_enabled)
|
||||
return std::make_pair(2u, 2u);
|
||||
else
|
||||
|
@ -654,7 +654,7 @@ get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr
|
|||
/* 3-component D16 loads */
|
||||
case aco_opcode::buffer_load_format_d16_xyz:
|
||||
case aco_opcode::tbuffer_load_format_d16_xyz: {
|
||||
assert(chip >= GFX9);
|
||||
assert(gfx_level >= GFX9);
|
||||
if (!program->dev.sram_ecc_enabled)
|
||||
return std::make_pair(4u, 6u);
|
||||
break;
|
||||
|
@ -664,7 +664,7 @@ get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr
|
|||
}
|
||||
|
||||
if (instr->isMIMG() && instr->mimg().d16 && !program->dev.sram_ecc_enabled) {
|
||||
assert(chip >= GFX9);
|
||||
assert(gfx_level >= GFX9);
|
||||
return std::make_pair(4u, rc.bytes());
|
||||
}
|
||||
|
||||
|
@ -678,16 +678,16 @@ add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg r
|
|||
return;
|
||||
|
||||
if (instr->isVALU()) {
|
||||
chip_class chip = program->chip_class;
|
||||
amd_gfx_level gfx_level = program->gfx_level;
|
||||
assert(instr->definitions[0].bytes() <= 2);
|
||||
|
||||
if (reg.byte() == 0 && instr_is_16bit(chip, instr->opcode))
|
||||
if (reg.byte() == 0 && instr_is_16bit(gfx_level, instr->opcode))
|
||||
return;
|
||||
|
||||
/* check if we can use opsel */
|
||||
if (instr->format == Format::VOP3) {
|
||||
assert(reg.byte() == 2);
|
||||
assert(can_use_opsel(chip, instr->opcode, -1));
|
||||
assert(can_use_opsel(gfx_level, instr->opcode, -1));
|
||||
instr->vop3().opsel |= (1 << 3); /* dst in high half */
|
||||
return;
|
||||
}
|
||||
|
@ -698,8 +698,8 @@ add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg r
|
|||
}
|
||||
|
||||
/* use SDWA */
|
||||
assert(can_use_SDWA(chip, instr, false));
|
||||
convert_to_SDWA(chip, instr);
|
||||
assert(can_use_SDWA(gfx_level, instr, false));
|
||||
convert_to_SDWA(gfx_level, instr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1053,7 +1053,7 @@ get_reg_for_create_vector_copy(ra_ctx& ctx, RegisterFile& reg_file,
|
|||
reg.reg_b += instr->operands[i].bytes();
|
||||
}
|
||||
|
||||
if (ctx.program->chip_class <= GFX8)
|
||||
if (ctx.program->gfx_level <= GFX8)
|
||||
return {PhysReg(), false};
|
||||
|
||||
/* check if the previous position was in vector */
|
||||
|
@ -1886,7 +1886,7 @@ handle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr)
|
|||
reads_subdword = true;
|
||||
}
|
||||
bool needs_scratch_reg = (writes_linear && reads_linear && reg_file[scc]) ||
|
||||
(ctx.program->chip_class <= GFX7 && reads_subdword);
|
||||
(ctx.program->gfx_level <= GFX7 && reads_subdword);
|
||||
if (!needs_scratch_reg)
|
||||
return;
|
||||
|
||||
|
@ -1910,7 +1910,7 @@ handle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr)
|
|||
}
|
||||
|
||||
bool
|
||||
operand_can_use_reg(chip_class chip, aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg,
|
||||
operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg,
|
||||
RegClass rc)
|
||||
{
|
||||
if (instr->operands[idx].isFixed())
|
||||
|
@ -1918,7 +1918,7 @@ operand_can_use_reg(chip_class chip, aco_ptr<Instruction>& instr, unsigned idx,
|
|||
|
||||
bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 ||
|
||||
instr->opcode == aco_opcode::v_writelane_b32_e64;
|
||||
if (chip <= GFX9 && is_writelane && idx <= 1) {
|
||||
if (gfx_level <= GFX9 && is_writelane && idx <= 1) {
|
||||
/* v_writelane_b32 can take two sgprs but only if one is m0. */
|
||||
bool is_other_sgpr =
|
||||
instr->operands[!idx].isTemp() &&
|
||||
|
@ -1930,7 +1930,7 @@ operand_can_use_reg(chip_class chip, aco_ptr<Instruction>& instr, unsigned idx,
|
|||
}
|
||||
|
||||
if (reg.byte()) {
|
||||
unsigned stride = get_subdword_operand_stride(chip, instr, idx, rc);
|
||||
unsigned stride = get_subdword_operand_stride(gfx_level, instr, idx, rc);
|
||||
if (reg.byte() % stride)
|
||||
return false;
|
||||
}
|
||||
|
@ -1940,7 +1940,7 @@ operand_can_use_reg(chip_class chip, aco_ptr<Instruction>& instr, unsigned idx,
|
|||
return reg != scc && reg != exec &&
|
||||
(reg != m0 || idx == 1 || idx == 3) && /* offset can be m0 */
|
||||
(reg != vcc || (instr->definitions.empty() && idx == 2) ||
|
||||
chip >= GFX10); /* sdata can be vcc */
|
||||
gfx_level >= GFX10); /* sdata can be vcc */
|
||||
default:
|
||||
// TODO: there are more instructions with restrictions on registers
|
||||
return true;
|
||||
|
@ -2389,7 +2389,7 @@ get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
|
|||
instr->operands[0].isFirstKillBeforeDef()) {
|
||||
ctx.split_vectors[instr->operands[0].tempId()] = instr.get();
|
||||
} else if (instr->isVOPC() && !instr->isVOP3()) {
|
||||
if (!instr->isSDWA() || ctx.program->chip_class == GFX8)
|
||||
if (!instr->isSDWA() || ctx.program->gfx_level == GFX8)
|
||||
ctx.assignments[instr->definitions[0].tempId()].vcc = true;
|
||||
} else if (instr->isVOP2() && !instr->isVOP3()) {
|
||||
if (instr->operands.size() == 3 && instr->operands[2].isTemp() &&
|
||||
|
@ -2437,7 +2437,7 @@ get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
|
|||
case aco_opcode::v_fma_f32:
|
||||
case aco_opcode::v_fma_f16:
|
||||
case aco_opcode::v_pk_fma_f16:
|
||||
if (ctx.program->chip_class < GFX10)
|
||||
if (ctx.program->gfx_level < GFX10)
|
||||
continue;
|
||||
FALLTHROUGH;
|
||||
case aco_opcode::v_mad_f32:
|
||||
|
@ -2642,12 +2642,12 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
assert(ctx.assignments[operand.tempId()].assigned);
|
||||
|
||||
PhysReg reg = ctx.assignments[operand.tempId()].reg;
|
||||
if (operand_can_use_reg(program->chip_class, instr, i, reg, operand.regClass()))
|
||||
if (operand_can_use_reg(program->gfx_level, instr, i, reg, operand.regClass()))
|
||||
operand.setFixed(reg);
|
||||
else
|
||||
get_reg_for_operand(ctx, register_file, parallelcopy, instr, operand, i);
|
||||
|
||||
if (instr->isEXP() || (instr->isVMEM() && i == 3 && ctx.program->chip_class == GFX6) ||
|
||||
if (instr->isEXP() || (instr->isVMEM() && i == 3 && ctx.program->gfx_level == GFX6) ||
|
||||
(instr->isDS() && instr->ds().gds)) {
|
||||
for (unsigned j = 0; j < operand.size(); j++)
|
||||
ctx.war_hint.set(operand.physReg().reg() + j);
|
||||
|
@ -2662,11 +2662,11 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
|
||||
/* try to optimize v_mad_f32 -> v_mac_f32 */
|
||||
if ((instr->opcode == aco_opcode::v_mad_f32 ||
|
||||
(instr->opcode == aco_opcode::v_fma_f32 && program->chip_class >= GFX10) ||
|
||||
(instr->opcode == aco_opcode::v_fma_f32 && program->gfx_level >= GFX10) ||
|
||||
instr->opcode == aco_opcode::v_mad_f16 ||
|
||||
instr->opcode == aco_opcode::v_mad_legacy_f16 ||
|
||||
(instr->opcode == aco_opcode::v_fma_f16 && program->chip_class >= GFX10) ||
|
||||
(instr->opcode == aco_opcode::v_pk_fma_f16 && program->chip_class >= GFX10) ||
|
||||
(instr->opcode == aco_opcode::v_fma_f16 && program->gfx_level >= GFX10) ||
|
||||
(instr->opcode == aco_opcode::v_pk_fma_f16 && program->gfx_level >= GFX10) ||
|
||||
(instr->opcode == aco_opcode::v_mad_legacy_f32 && program->dev.has_mac_legacy32) ||
|
||||
(instr->opcode == aco_opcode::v_fma_legacy_f32 && program->dev.has_mac_legacy32) ||
|
||||
(instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) &&
|
||||
|
@ -2953,7 +2953,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
|
||||
/* if the first operand is a literal, we have to move it to a reg */
|
||||
if (instr->operands.size() && instr->operands[0].isLiteral() &&
|
||||
program->chip_class < GFX10) {
|
||||
program->gfx_level < GFX10) {
|
||||
bool can_sgpr = true;
|
||||
/* check, if we have to move to vgpr */
|
||||
for (const Operand& op : instr->operands) {
|
||||
|
|
|
@ -1413,17 +1413,17 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset,
|
|||
uint32_t rsrc_conf =
|
||||
S_008F0C_ADD_TID_ENABLE(1) | S_008F0C_INDEX_STRIDE(ctx.program->wave_size == 64 ? 3 : 2);
|
||||
|
||||
if (ctx.program->chip_class >= GFX10) {
|
||||
if (ctx.program->gfx_level >= GFX10) {
|
||||
rsrc_conf |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
|
||||
S_008F0C_RESOURCE_LEVEL(ctx.program->chip_class < GFX11);
|
||||
} else if (ctx.program->chip_class <= GFX7) {
|
||||
S_008F0C_RESOURCE_LEVEL(ctx.program->gfx_level < GFX11);
|
||||
} else if (ctx.program->gfx_level <= GFX7) {
|
||||
/* dfmt modifies stride on GFX8/GFX9 when ADD_TID_EN=1 */
|
||||
rsrc_conf |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
||||
}
|
||||
/* older generations need element size = 4 bytes. element size removed in GFX9 */
|
||||
if (ctx.program->chip_class <= GFX8)
|
||||
if (ctx.program->gfx_level <= GFX8)
|
||||
rsrc_conf |= S_008F0C_ELEMENT_SIZE(1);
|
||||
|
||||
return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
|
||||
|
|
|
@ -112,7 +112,7 @@ get_perf_info(Program* program, aco_ptr<Instruction>& instr)
|
|||
#define WAIT(res) BlockCycleEstimator::res, 0
|
||||
#define WAIT_USE(res, cnt) BlockCycleEstimator::res, cnt
|
||||
|
||||
if (program->chip_class >= GFX10) {
|
||||
if (program->gfx_level >= GFX10) {
|
||||
/* fp64 might be incorrect */
|
||||
switch (cls) {
|
||||
case instr_class::valu32:
|
||||
|
@ -265,9 +265,9 @@ get_wait_imm(Program* program, aco_ptr<Instruction>& instr)
|
|||
} else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) {
|
||||
return wait_imm(0, 0, 0, instr->sopk().imm);
|
||||
} else {
|
||||
unsigned max_lgkm_cnt = program->chip_class >= GFX10 ? 62 : 14;
|
||||
unsigned max_lgkm_cnt = program->gfx_level >= GFX10 ? 62 : 14;
|
||||
unsigned max_exp_cnt = 6;
|
||||
unsigned max_vm_cnt = program->chip_class >= GFX9 ? 62 : 14;
|
||||
unsigned max_vm_cnt = program->gfx_level >= GFX9 ? 62 : 14;
|
||||
unsigned max_vs_cnt = 62;
|
||||
|
||||
wait_counter_info wait_info = get_wait_counter_info(instr);
|
||||
|
@ -306,7 +306,7 @@ BlockCycleEstimator::get_dependency_cost(aco_ptr<Instruction>& instr)
|
|||
if (instr->opcode == aco_opcode::s_endpgm) {
|
||||
for (unsigned i = 0; i < 512; i++)
|
||||
deps_available = MAX2(deps_available, reg_available[i]);
|
||||
} else if (program->chip_class >= GFX10) {
|
||||
} else if (program->gfx_level >= GFX10) {
|
||||
for (Operand& op : instr->operands) {
|
||||
if (op.isConstant() || op.isUndefined())
|
||||
continue;
|
||||
|
@ -315,7 +315,7 @@ BlockCycleEstimator::get_dependency_cost(aco_ptr<Instruction>& instr)
|
|||
}
|
||||
}
|
||||
|
||||
if (program->chip_class < GFX10)
|
||||
if (program->gfx_level < GFX10)
|
||||
deps_available = align(deps_available, 4);
|
||||
|
||||
return deps_available - cur_cycle;
|
||||
|
@ -357,7 +357,7 @@ BlockCycleEstimator::add(aco_ptr<Instruction>& instr)
|
|||
cur_cycle += get_dependency_cost(instr);
|
||||
|
||||
unsigned start;
|
||||
bool dual_issue = program->chip_class >= GFX10 && program->wave_size == 64 &&
|
||||
bool dual_issue = program->gfx_level >= GFX10 && program->wave_size == 64 &&
|
||||
is_vector(instr->opcode) && program->workgroup_size > 32;
|
||||
for (unsigned i = 0; i < (dual_issue ? 2 : 1); i++) {
|
||||
cur_cycle += cycles_until_res_available(instr);
|
||||
|
@ -366,7 +366,7 @@ BlockCycleEstimator::add(aco_ptr<Instruction>& instr)
|
|||
use_resources(instr);
|
||||
|
||||
/* GCN is in-order and doesn't begin the next instruction until the current one finishes */
|
||||
cur_cycle += program->chip_class >= GFX10 ? 1 : perf.latency;
|
||||
cur_cycle += program->gfx_level >= GFX10 ? 1 : perf.latency;
|
||||
}
|
||||
|
||||
wait_imm imm = get_wait_imm(program, instr);
|
||||
|
|
|
@ -153,16 +153,16 @@ validate_ir(Program* program)
|
|||
base_format == Format::VOPC,
|
||||
"Format cannot have SDWA applied", instr.get());
|
||||
|
||||
check(program->chip_class >= GFX8, "SDWA is GFX8+ only", instr.get());
|
||||
check(program->gfx_level >= GFX8, "SDWA is GFX8+ only", instr.get());
|
||||
|
||||
SDWA_instruction& sdwa = instr->sdwa();
|
||||
check(sdwa.omod == 0 || program->chip_class >= GFX9,
|
||||
"SDWA omod only supported on GFX9+", instr.get());
|
||||
check(sdwa.omod == 0 || program->gfx_level >= GFX9, "SDWA omod only supported on GFX9+",
|
||||
instr.get());
|
||||
if (base_format == Format::VOPC) {
|
||||
check(sdwa.clamp == false || program->chip_class == GFX8,
|
||||
check(sdwa.clamp == false || program->gfx_level == GFX8,
|
||||
"SDWA VOPC clamp only supported on GFX8", instr.get());
|
||||
check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
|
||||
program->chip_class >= GFX9,
|
||||
program->gfx_level >= GFX9,
|
||||
"SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
|
||||
} else {
|
||||
const Definition& def = instr->definitions[0];
|
||||
|
@ -215,7 +215,7 @@ validate_ir(Program* program)
|
|||
instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32;
|
||||
|
||||
const bool feature_mac =
|
||||
program->chip_class == GFX8 &&
|
||||
program->gfx_level == GFX8 &&
|
||||
(instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
|
||||
|
||||
check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
|
||||
|
@ -224,8 +224,8 @@ validate_ir(Program* program)
|
|||
/* check opsel */
|
||||
if (instr->isVOP3()) {
|
||||
VOP3_instruction& vop3 = instr->vop3();
|
||||
check(vop3.opsel == 0 || program->chip_class >= GFX9,
|
||||
"Opsel is only supported on GFX9+", instr.get());
|
||||
check(vop3.opsel == 0 || program->gfx_level >= GFX9, "Opsel is only supported on GFX9+",
|
||||
instr.get());
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (i >= instr->operands.size() ||
|
||||
|
@ -287,8 +287,8 @@ validate_ir(Program* program)
|
|||
continue;
|
||||
|
||||
check(!instr->isDPP() && !instr->isSDWA() &&
|
||||
(!instr->isVOP3() || program->chip_class >= GFX10) &&
|
||||
(!instr->isVOP3P() || program->chip_class >= GFX10),
|
||||
(!instr->isVOP3() || program->gfx_level >= GFX10) &&
|
||||
(!instr->isVOP3P() || program->gfx_level >= GFX10),
|
||||
"Literal applied on wrong instruction format", instr.get());
|
||||
|
||||
check(literal.isUndefined() || (literal.size() == op.size() &&
|
||||
|
@ -305,12 +305,12 @@ validate_ir(Program* program)
|
|||
instr->opcode == aco_opcode::v_lshrrev_b64 ||
|
||||
instr->opcode == aco_opcode::v_ashrrev_i64;
|
||||
unsigned const_bus_limit = 1;
|
||||
if (program->chip_class >= GFX10 && !is_shift64)
|
||||
if (program->gfx_level >= GFX10 && !is_shift64)
|
||||
const_bus_limit = 2;
|
||||
|
||||
uint32_t scalar_mask = instr->isVOP3() || instr->isVOP3P() ? 0x7 : 0x5;
|
||||
if (instr->isSDWA())
|
||||
scalar_mask = program->chip_class >= GFX9 ? 0x7 : 0x4;
|
||||
scalar_mask = program->gfx_level >= GFX9 ? 0x7 : 0x4;
|
||||
else if (instr->isDPP())
|
||||
scalar_mask = 0x4;
|
||||
|
||||
|
@ -412,7 +412,7 @@ validate_ir(Program* program)
|
|||
check(instr->definitions[0].getTemp().type() == RegType::vgpr ||
|
||||
instr->operands[0].regClass().type() == RegType::sgpr,
|
||||
"Cannot extract SGPR value from VGPR vector", instr.get());
|
||||
check(program->chip_class >= GFX9 ||
|
||||
check(program->gfx_level >= GFX9 ||
|
||||
!instr->definitions[0].regClass().is_subdword() ||
|
||||
instr->operands[0].regClass().type() == RegType::vgpr,
|
||||
"Cannot extract subdword from SGPR before GFX9+", instr.get());
|
||||
|
@ -430,7 +430,7 @@ validate_ir(Program* program)
|
|||
"Wrong Definition type for VGPR split_vector", instr.get());
|
||||
} else {
|
||||
for (const Definition& def : instr->definitions)
|
||||
check(program->chip_class >= GFX9 || !def.regClass().is_subdword(),
|
||||
check(program->gfx_level >= GFX9 || !def.regClass().is_subdword(),
|
||||
"Cannot split SGPR into subdword VGPRs before GFX9+", instr.get());
|
||||
}
|
||||
} else if (instr->opcode == aco_opcode::p_parallelcopy) {
|
||||
|
@ -610,7 +610,7 @@ validate_ir(Program* program)
|
|||
"TFE/LWE loads",
|
||||
instr.get());
|
||||
}
|
||||
check(instr->operands.size() == 4 || program->chip_class >= GFX10,
|
||||
check(instr->operands.size() == 4 || program->gfx_level >= GFX10,
|
||||
"NSA is only supported on GFX10+", instr.get());
|
||||
for (unsigned i = 3; i < instr->operands.size(); i++) {
|
||||
if (instr->operands.size() == 4) {
|
||||
|
@ -762,14 +762,15 @@ ra_fail(Program* program, Location loc, Location loc2, const char* fmt, ...)
|
|||
}
|
||||
|
||||
bool
|
||||
validate_subdword_operand(chip_class chip, const aco_ptr<Instruction>& instr, unsigned index)
|
||||
validate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
|
||||
unsigned index)
|
||||
{
|
||||
Operand op = instr->operands[index];
|
||||
unsigned byte = op.physReg().byte();
|
||||
|
||||
if (instr->opcode == aco_opcode::p_as_uniform)
|
||||
return byte == 0;
|
||||
if (instr->isPseudo() && chip >= GFX8)
|
||||
if (instr->isPseudo() && gfx_level >= GFX8)
|
||||
return true;
|
||||
if (instr->isSDWA())
|
||||
return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
|
||||
|
@ -781,7 +782,7 @@ validate_subdword_operand(chip_class chip, const aco_ptr<Instruction>& instr, un
|
|||
return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) &&
|
||||
((instr->vop3p().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1));
|
||||
}
|
||||
if (byte == 2 && can_use_opsel(chip, instr->opcode, index))
|
||||
if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index))
|
||||
return true;
|
||||
|
||||
switch (instr->opcode) {
|
||||
|
@ -824,17 +825,17 @@ validate_subdword_operand(chip_class chip, const aco_ptr<Instruction>& instr, un
|
|||
}
|
||||
|
||||
bool
|
||||
validate_subdword_definition(chip_class chip, const aco_ptr<Instruction>& instr)
|
||||
validate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr)
|
||||
{
|
||||
Definition def = instr->definitions[0];
|
||||
unsigned byte = def.physReg().byte();
|
||||
|
||||
if (instr->isPseudo() && chip >= GFX8)
|
||||
if (instr->isPseudo() && gfx_level >= GFX8)
|
||||
return true;
|
||||
if (instr->isSDWA())
|
||||
return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
|
||||
byte % instr->sdwa().dst_sel.size() == 0;
|
||||
if (byte == 2 && can_use_opsel(chip, instr->opcode, -1))
|
||||
if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, -1))
|
||||
return true;
|
||||
|
||||
switch (instr->opcode) {
|
||||
|
@ -859,17 +860,17 @@ validate_subdword_definition(chip_class chip, const aco_ptr<Instruction>& instr)
|
|||
unsigned
|
||||
get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index)
|
||||
{
|
||||
chip_class chip = program->chip_class;
|
||||
amd_gfx_level gfx_level = program->gfx_level;
|
||||
Definition def = instr->definitions[index];
|
||||
|
||||
if (instr->isPseudo())
|
||||
return chip >= GFX8 ? def.bytes() : def.size() * 4u;
|
||||
return gfx_level >= GFX8 ? def.bytes() : def.size() * 4u;
|
||||
if (instr->isVALU()) {
|
||||
assert(def.bytes() <= 2);
|
||||
if (instr->isSDWA())
|
||||
return instr->sdwa().dst_sel.size();
|
||||
|
||||
if (instr_is_16bit(chip, instr->opcode))
|
||||
if (instr_is_16bit(gfx_level, instr->opcode))
|
||||
return 2;
|
||||
|
||||
return 4;
|
||||
|
@ -1009,7 +1010,7 @@ validate_ra(Program* program)
|
|||
err |= ra_fail(program, loc, Location(),
|
||||
"Operand %d fixed to vcc but needs_vcc=false", i);
|
||||
if (op.regClass().is_subdword() &&
|
||||
!validate_subdword_operand(program->chip_class, instr, i))
|
||||
!validate_subdword_operand(program->gfx_level, instr, i))
|
||||
err |= ra_fail(program, loc, Location(), "Operand %d not aligned correctly", i);
|
||||
if (!assignments[op.tempId()].firstloc.block)
|
||||
assignments[op.tempId()].firstloc = loc;
|
||||
|
@ -1040,7 +1041,7 @@ validate_ra(Program* program)
|
|||
err |= ra_fail(program, loc, Location(),
|
||||
"Definition %d fixed to vcc but needs_vcc=false", i);
|
||||
if (def.regClass().is_subdword() &&
|
||||
!validate_subdword_definition(program->chip_class, instr))
|
||||
!validate_subdword_definition(program->gfx_level, instr))
|
||||
err |= ra_fail(program, loc, Location(), "Definition %d not aligned correctly", i);
|
||||
if (!assignments[def.tempId()].firstloc.block)
|
||||
assignments[def.tempId()].firstloc = loc;
|
||||
|
|
|
@ -43,7 +43,7 @@ extern FILE *output;
|
|||
|
||||
bool set_variant(const char *name);
|
||||
|
||||
inline bool set_variant(chip_class cls, const char *rest="")
|
||||
inline bool set_variant(amd_gfx_level cls, const char *rest="")
|
||||
{
|
||||
char buf[8+strlen(rest)];
|
||||
if (cls != GFX10_3) {
|
||||
|
|
|
@ -72,13 +72,13 @@ static std::mutex create_device_mutex;
|
|||
FUNCTION_LIST
|
||||
#undef ITEM
|
||||
|
||||
void create_program(enum chip_class chip_class, Stage stage, unsigned wave_size, enum radeon_family family)
|
||||
void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size, enum radeon_family family)
|
||||
{
|
||||
memset(&config, 0, sizeof(config));
|
||||
info.wave_size = wave_size;
|
||||
|
||||
program.reset(new Program);
|
||||
aco::init_program(program.get(), stage, &info, chip_class, family, false, &config);
|
||||
aco::init_program(program.get(), stage, &info, gfx_level, family, false, &config);
|
||||
program->workgroup_size = UINT_MAX;
|
||||
calc_min_waves(program.get());
|
||||
|
||||
|
@ -98,15 +98,15 @@ void create_program(enum chip_class chip_class, Stage stage, unsigned wave_size,
|
|||
config.float_mode = program->blocks[0].fp_mode.val;
|
||||
}
|
||||
|
||||
bool setup_cs(const char *input_spec, enum chip_class chip_class,
|
||||
bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
|
||||
enum radeon_family family, const char* subvariant,
|
||||
unsigned wave_size)
|
||||
{
|
||||
if (!set_variant(chip_class, subvariant))
|
||||
if (!set_variant(gfx_level, subvariant))
|
||||
return false;
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
create_program(chip_class, compute_cs, wave_size, family);
|
||||
create_program(gfx_level, compute_cs, wave_size, family);
|
||||
|
||||
if (input_spec) {
|
||||
std::vector<RegClass> input_classes;
|
||||
|
@ -236,7 +236,7 @@ void finish_assembler_test()
|
|||
|
||||
/* we could use CLRX for disassembly but that would require it to be
|
||||
* installed */
|
||||
if (program->chip_class >= GFX8) {
|
||||
if (program->gfx_level >= GFX8) {
|
||||
print_asm(program.get(), binary, exec_size / 4u, output);
|
||||
} else {
|
||||
//TODO: maybe we should use CLRX and skip this test if it's not available?
|
||||
|
@ -350,10 +350,10 @@ Temp ext_ubyte(Temp src, unsigned idx, Builder b)
|
|||
Operand::c32(8u), Operand::c32(false));
|
||||
}
|
||||
|
||||
VkDevice get_vk_device(enum chip_class chip_class)
|
||||
VkDevice get_vk_device(enum amd_gfx_level gfx_level)
|
||||
{
|
||||
enum radeon_family family;
|
||||
switch (chip_class) {
|
||||
switch (gfx_level) {
|
||||
case GFX6:
|
||||
family = CHIP_TAHITI;
|
||||
break;
|
||||
|
|
|
@ -70,9 +70,9 @@ namespace aco {
|
|||
struct ra_test_policy;
|
||||
}
|
||||
|
||||
void create_program(enum chip_class chip_class, aco::Stage stage,
|
||||
void create_program(enum amd_gfx_level gfx_level, aco::Stage stage,
|
||||
unsigned wave_size=64, enum radeon_family family=CHIP_UNKNOWN);
|
||||
bool setup_cs(const char *input_spec, enum chip_class chip_class,
|
||||
bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
|
||||
enum radeon_family family=CHIP_UNKNOWN, const char* subvariant = "",
|
||||
unsigned wave_size=64);
|
||||
|
||||
|
@ -104,7 +104,7 @@ aco::Temp ext_ushort(aco::Temp src, unsigned idx, aco::Builder b=bld);
|
|||
aco::Temp ext_ubyte(aco::Temp src, unsigned idx, aco::Builder b=bld);
|
||||
|
||||
/* vulkan helpers */
|
||||
VkDevice get_vk_device(enum chip_class chip_class);
|
||||
VkDevice get_vk_device(enum amd_gfx_level gfx_level);
|
||||
VkDevice get_vk_device(enum radeon_family family);
|
||||
|
||||
void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
|
||||
|
|
|
@ -27,7 +27,7 @@ using namespace aco;
|
|||
|
||||
BEGIN_TEST(assembler.s_memtime)
|
||||
for (unsigned i = GFX6; i <= GFX10; i++) {
|
||||
if (!setup_cs(NULL, (chip_class)i))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
//~gfx[6-7]>> c7800000
|
||||
|
@ -41,7 +41,7 @@ BEGIN_TEST(assembler.s_memtime)
|
|||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.branch_3f)
|
||||
if (!setup_cs(NULL, (chip_class)GFX10))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)GFX10))
|
||||
return;
|
||||
|
||||
//! BB0:
|
||||
|
@ -60,7 +60,7 @@ BEGIN_TEST(assembler.branch_3f)
|
|||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.long_jump.unconditional_forwards)
|
||||
if (!setup_cs(NULL, (chip_class)GFX10))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)GFX10))
|
||||
return;
|
||||
|
||||
//!BB0:
|
||||
|
@ -90,7 +90,7 @@ BEGIN_TEST(assembler.long_jump.unconditional_forwards)
|
|||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.long_jump.conditional_forwards)
|
||||
if (!setup_cs(NULL, (chip_class)GFX10))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)GFX10))
|
||||
return;
|
||||
|
||||
//! BB0:
|
||||
|
@ -123,7 +123,7 @@ BEGIN_TEST(assembler.long_jump.conditional_forwards)
|
|||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.long_jump.unconditional_backwards)
|
||||
if (!setup_cs(NULL, (chip_class)GFX10))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)GFX10))
|
||||
return;
|
||||
|
||||
//!BB0:
|
||||
|
@ -151,7 +151,7 @@ BEGIN_TEST(assembler.long_jump.unconditional_backwards)
|
|||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.long_jump.conditional_backwards)
|
||||
if (!setup_cs(NULL, (chip_class)GFX10))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)GFX10))
|
||||
return;
|
||||
|
||||
//!BB0:
|
||||
|
@ -180,7 +180,7 @@ BEGIN_TEST(assembler.long_jump.conditional_backwards)
|
|||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.long_jump.3f)
|
||||
if (!setup_cs(NULL, (chip_class)GFX10))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)GFX10))
|
||||
return;
|
||||
|
||||
//! BB0:
|
||||
|
@ -205,7 +205,7 @@ BEGIN_TEST(assembler.long_jump.3f)
|
|||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.long_jump.constaddr)
|
||||
if (!setup_cs(NULL, (chip_class)GFX10))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)GFX10))
|
||||
return;
|
||||
|
||||
//>> s_getpc_b64 s[0:1] ; be801f00
|
||||
|
@ -232,7 +232,7 @@ END_TEST
|
|||
|
||||
BEGIN_TEST(assembler.v_add3)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
if (!setup_cs(NULL, (chip_class)i))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
//~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
|
||||
|
@ -250,7 +250,7 @@ END_TEST
|
|||
|
||||
BEGIN_TEST(assembler.v_add3_clamp)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
if (!setup_cs(NULL, (chip_class)i))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
//~gfx9>> integer addition + clamp ; d1ff8000 02010080
|
||||
|
@ -269,7 +269,7 @@ END_TEST
|
|||
|
||||
BEGIN_TEST(assembler.smem_offset)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
if (!setup_cs(NULL, (chip_class)i))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
Definition dst(PhysReg(7), s1);
|
||||
|
|
|
@ -28,7 +28,7 @@ using namespace aco;
|
|||
BEGIN_TEST(builder.v_mul_imm)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
/* simple optimizations */
|
||||
|
|
|
@ -204,7 +204,7 @@ END_TEST
|
|||
|
||||
BEGIN_TEST(form_hard_clauses.nsa)
|
||||
for (unsigned i = GFX10; i <= GFX10_3; i++) {
|
||||
if (!setup_cs(NULL, (chip_class)i))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
//>> p_unit_test 0
|
||||
|
|
|
@ -60,7 +60,7 @@ END_TEST
|
|||
|
||||
BEGIN_TEST(isel.compute.simple)
|
||||
for (unsigned i = GFX7; i <= GFX8; i++) {
|
||||
if (!set_variant((chip_class)i))
|
||||
if (!set_variant((amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
QoShaderModuleCreateInfo cs = qoShaderModuleCreateInfoGLSL(COMPUTE,
|
||||
|
@ -75,7 +75,7 @@ BEGIN_TEST(isel.compute.simple)
|
|||
}
|
||||
);
|
||||
|
||||
PipelineBuilder pbld(get_vk_device((chip_class)i));
|
||||
PipelineBuilder pbld(get_vk_device((amd_gfx_level)i));
|
||||
pbld.add_cs(cs);
|
||||
pbld.print_ir(VK_SHADER_STAGE_COMPUTE_BIT, "ACO IR", true);
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ END_TEST
|
|||
|
||||
BEGIN_TEST(isel.gs.no_outputs)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
if (!set_variant((chip_class)i))
|
||||
if (!set_variant((amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
||||
|
@ -100,7 +100,7 @@ BEGIN_TEST(isel.gs.no_outputs)
|
|||
}
|
||||
);
|
||||
|
||||
PipelineBuilder pbld(get_vk_device((chip_class)i));
|
||||
PipelineBuilder pbld(get_vk_device((amd_gfx_level)i));
|
||||
pbld.add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
|
||||
pbld.add_stage(VK_SHADER_STAGE_GEOMETRY_BIT, gs);
|
||||
pbld.create_pipeline();
|
||||
|
@ -112,7 +112,7 @@ END_TEST
|
|||
|
||||
BEGIN_TEST(isel.gs.no_verts)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
if (!set_variant((chip_class)i))
|
||||
if (!set_variant((amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
||||
|
@ -126,7 +126,7 @@ BEGIN_TEST(isel.gs.no_verts)
|
|||
void main() {}
|
||||
);
|
||||
|
||||
PipelineBuilder pbld(get_vk_device((chip_class)i));
|
||||
PipelineBuilder pbld(get_vk_device((amd_gfx_level)i));
|
||||
pbld.add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
|
||||
pbld.add_stage(VK_SHADER_STAGE_GEOMETRY_BIT, gs);
|
||||
pbld.create_pipeline();
|
||||
|
@ -138,7 +138,7 @@ END_TEST
|
|||
|
||||
BEGIN_TEST(isel.sparse.clause)
|
||||
for (unsigned i = GFX10_3; i <= GFX10_3; i++) {
|
||||
if (!set_variant((chip_class)i))
|
||||
if (!set_variant((amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
QoShaderModuleCreateInfo cs = qoShaderModuleCreateInfoGLSL(COMPUTE,
|
||||
|
@ -179,7 +179,7 @@ BEGIN_TEST(isel.sparse.clause)
|
|||
|
||||
fprintf(output, "llvm_version: %u\n", LLVM_VERSION_MAJOR);
|
||||
|
||||
PipelineBuilder pbld(get_vk_device((chip_class)i));
|
||||
PipelineBuilder pbld(get_vk_device((amd_gfx_level)i));
|
||||
pbld.add_cs(cs);
|
||||
pbld.print_ir(VK_SHADER_STAGE_COMPUTE_BIT, "ACO IR", true);
|
||||
pbld.print_ir(VK_SHADER_STAGE_COMPUTE_BIT, "Assembly", true);
|
||||
|
|
|
@ -28,7 +28,7 @@ using namespace aco;
|
|||
BEGIN_TEST(optimize.neg)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
//! v1: %res0 = v_mul_f32 %a, -%b
|
||||
|
@ -272,7 +272,7 @@ Temp create_subbrev_co(Operand op0, Operand op1, Operand op2)
|
|||
BEGIN_TEST(optimize.cndmask)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a, s1: %b, s2: %c = p_startpgm
|
||||
if (!setup_cs("v1 s1 s2", (chip_class)i))
|
||||
if (!setup_cs("v1 s1 s2", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
Temp subbrev;
|
||||
|
@ -316,7 +316,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.add_lshl)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
//>> s1: %a, v1: %b = p_startpgm
|
||||
if (!setup_cs("s1 v1", (chip_class)i))
|
||||
if (!setup_cs("s1 v1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
Temp shift;
|
||||
|
@ -398,7 +398,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.bcnt)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
//>> v1: %a, s1: %b = p_startpgm
|
||||
if (!setup_cs("v1 s1", (chip_class)i))
|
||||
if (!setup_cs("v1 s1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
Temp bcnt;
|
||||
|
@ -714,7 +714,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.minmax)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a = p_startpgm
|
||||
if (!setup_cs("v1", (chip_class)i))
|
||||
if (!setup_cs("v1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
//! v1: %res0 = v_max3_f32 0, -0, %a
|
||||
|
@ -737,7 +737,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.mad_32_24)
|
||||
for (unsigned i = GFX8; i <= GFX9; i++) {
|
||||
//>> v1: %a, v1: %b, v1: %c = p_startpgm
|
||||
if (!setup_cs("v1 v1 v1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 v1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
//! v1: %res0 = v_mad_u32_u24 %b, %c, %a
|
||||
|
@ -758,7 +758,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.add_lshlrev)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b, s1: %c = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 s1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
Temp lshl;
|
||||
|
@ -886,7 +886,7 @@ BEGIN_TEST(optimize.denorm_propagation)
|
|||
sprintf(subvariant, "_%s_%s_%s_%s",
|
||||
cfg.flush ? "flush" : "keep", srcdest_op_name(cfg.src),
|
||||
denorm_op_names[(int)cfg.op], srcdest_op_name(cfg.dest));
|
||||
if (!setup_cs("v1 s2", (chip_class)i, CHIP_UNKNOWN, subvariant))
|
||||
if (!setup_cs("v1 s2", (amd_gfx_level)i, CHIP_UNKNOWN, subvariant))
|
||||
continue;
|
||||
|
||||
bool can_propagate = cfg.src == aco_opcode::v_rcp_f32 || (i >= GFX9 && cfg.src == aco_opcode::v_min_f32) ||
|
||||
|
@ -1161,7 +1161,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.mad_mix.input_conv.basic)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a, v2b: %a16 = p_startpgm
|
||||
if (!setup_cs("v1 v2b", (chip_class)i))
|
||||
if (!setup_cs("v1 v2b", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
@ -1196,7 +1196,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.mad_mix.input_conv.precision)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a, v2b: %a16 = p_startpgm
|
||||
if (!setup_cs("v1 v2b", (chip_class)i))
|
||||
if (!setup_cs("v1 v2b", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
@ -1249,7 +1249,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.mad_mix.input_conv.modifiers)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a, v2b: %a16 = p_startpgm
|
||||
if (!setup_cs("v1 v2b", (chip_class)i))
|
||||
if (!setup_cs("v1 v2b", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
@ -1344,7 +1344,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.mad_mix.output_conv.basic)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b, v1: %c, v2b: %a16, v2b: %b16 = p_startpgm
|
||||
if (!setup_cs("v1 v1 v1 v2b v2b", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 v1 v2b v2b", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
@ -1386,7 +1386,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.mad_mix.output_conv.precision)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v2b: %a16 = p_startpgm
|
||||
if (!setup_cs("v2b", (chip_class)i))
|
||||
if (!setup_cs("v2b", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
@ -1410,7 +1410,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.mad_mix.output_conv.modifiers)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b, v2b: %a16, v2b: %b16 = p_startpgm
|
||||
if (!setup_cs("v1 v1 v2b v2b", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 v2b v2b", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
@ -1459,7 +1459,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.mad_mix.fma.basic)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b, v1: %c, v2b: %a16, v2b: %c16 = p_startpgm
|
||||
if (!setup_cs("v1 v1 v1 v2b v2b", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 v1 v2b v2b", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
@ -1513,7 +1513,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.mad_mix.fma.precision)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b, v1: %c, v2b: %a16, v2b: %b16 = p_startpgm
|
||||
if (!setup_cs("v1 v1 v1 v2b v2b", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 v1 v2b v2b", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
@ -1575,7 +1575,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.mad_mix.clamp)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a, v2b: %a16 = p_startpgm
|
||||
if (!setup_cs("v1 v2b", (chip_class)i))
|
||||
if (!setup_cs("v1 v2b", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
@ -1602,7 +1602,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.mad_mix.cast)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a, v2b: %a16 = p_startpgm
|
||||
if (!setup_cs("v1 v2b", (chip_class)i))
|
||||
if (!setup_cs("v1 v2b", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
|
|
@ -35,12 +35,12 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
|
|||
* result in v0.
|
||||
*/
|
||||
|
||||
for (chip_class cc = GFX8; cc < NUM_GFX_VERSIONS; cc = (chip_class)((unsigned)cc + 1)) {
|
||||
for (amd_gfx_level cc = GFX8; cc < NUM_GFX_VERSIONS; cc = (amd_gfx_level)((unsigned)cc + 1)) {
|
||||
for (bool pessimistic : { false, true }) {
|
||||
const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
|
||||
|
||||
//>> v1: %_:v[#a] = p_startpgm
|
||||
if (!setup_cs("v1", (chip_class)cc, CHIP_UNKNOWN, subvariant))
|
||||
if (!setup_cs("v1", (amd_gfx_level)cc, CHIP_UNKNOWN, subvariant))
|
||||
return;
|
||||
|
||||
//! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
|
||||
|
|
|
@ -29,7 +29,7 @@ using namespace aco;
|
|||
BEGIN_TEST(validate.sdwa.allow)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
|
||||
continue;
|
||||
//>> Validation results:
|
||||
//! Validation passed
|
||||
|
@ -50,7 +50,7 @@ END_TEST
|
|||
BEGIN_TEST(validate.sdwa.support)
|
||||
for (unsigned i = GFX7; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
|
||||
continue;
|
||||
//>> Validation results:
|
||||
|
||||
|
@ -66,7 +66,7 @@ END_TEST
|
|||
BEGIN_TEST(validate.sdwa.operands)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
//>> v1: %vgpr0, v1: %vgp1, s1: %sgpr0, s1: %sgpr1 = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
|
||||
continue;
|
||||
//>> Validation results:
|
||||
|
||||
|
@ -95,7 +95,7 @@ END_TEST
|
|||
BEGIN_TEST(validate.sdwa.vopc)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
//>> v1: %vgpr0, v1: %vgp1, s1: %sgpr0, s1: %sgpr1 = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
|
||||
continue;
|
||||
//>> Validation results:
|
||||
|
||||
|
@ -116,7 +116,7 @@ END_TEST
|
|||
BEGIN_TEST(validate.sdwa.omod)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
//>> v1: %vgpr0, v1: %vgp1, s1: %sgpr0, s1: %sgpr1 = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
|
||||
continue;
|
||||
//>> Validation results:
|
||||
|
||||
|
@ -132,7 +132,7 @@ END_TEST
|
|||
BEGIN_TEST(validate.sdwa.vcc)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
//>> v1: %vgpr0, v1: %vgpr1, s2: %sgpr0 = p_startpgm
|
||||
if (!setup_cs("v1 v1 s2", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 s2", (amd_gfx_level)i))
|
||||
continue;
|
||||
//>> Validation results:
|
||||
|
||||
|
@ -154,7 +154,7 @@ BEGIN_TEST(optimize.sdwa.extract)
|
|||
for (unsigned i = GFX7; i <= GFX10; i++) {
|
||||
for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
|
||||
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (chip_class)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
|
||||
continue;
|
||||
|
||||
//; def standard_test(index, sel):
|
||||
|
@ -277,7 +277,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.sdwa.extract_modifiers)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
aco_opcode ext = aco_opcode::p_extract;
|
||||
|
@ -334,7 +334,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.sdwa.extract.sgpr)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
aco_opcode ext = aco_opcode::p_extract;
|
||||
|
@ -378,7 +378,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.sdwa.from_vop3)
|
||||
for (unsigned i = GFX8; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
|
||||
if (!setup_cs("v1 v1 s1 s1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
//! v1: %res0 = v_mul_f32 -|%a|, %b dst_sel:dword src0_sel:dword src1_sel:ubyte0
|
||||
|
@ -425,7 +425,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.sdwa.insert)
|
||||
for (unsigned i = GFX7; i <= GFX10; i++) {
|
||||
//>> v1: %a, v1: %b = p_startpgm
|
||||
if (!setup_cs("v1 v1", (chip_class)i))
|
||||
if (!setup_cs("v1 v1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
aco_opcode ext = aco_opcode::p_extract;
|
||||
|
@ -523,7 +523,7 @@ END_TEST
|
|||
BEGIN_TEST(optimize.sdwa.insert_modifiers)
|
||||
for (unsigned i = GFX8; i <= GFX9; i++) {
|
||||
//>> v1: %a = p_startpgm
|
||||
if (!setup_cs("v1", (chip_class)i))
|
||||
if (!setup_cs("v1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
aco_opcode ins = aco_opcode::p_insert;
|
||||
|
|
|
@ -54,7 +54,7 @@ BEGIN_TEST(simple.2)
|
|||
//~gfx9! test gfx9
|
||||
//! test all
|
||||
for (int cls = GFX6; cls <= GFX7; cls++) {
|
||||
if (!set_variant((enum chip_class)cls))
|
||||
if (!set_variant((enum amd_gfx_level)cls))
|
||||
continue;
|
||||
fprintf(output, "test gfx67\n");
|
||||
fprintf(output, "test all\n");
|
||||
|
|
|
@ -44,7 +44,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
|||
v1_b3.reg_b += 3;
|
||||
|
||||
for (unsigned i = GFX6; i <= GFX7; i++) {
|
||||
if (!setup_cs(NULL, (chip_class)i))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
//~gfx[67]>> p_unit_test 0
|
||||
|
@ -224,7 +224,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
|
|||
}
|
||||
|
||||
for (unsigned i = GFX8; i <= GFX9; i++) {
|
||||
if (!setup_cs(NULL, (chip_class)i))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
//~gfx[89]>> p_unit_test 0
|
||||
|
@ -374,7 +374,7 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
|
|||
v1_hi.reg_b += 2;
|
||||
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
if (!setup_cs(NULL, (chip_class)i))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
/* 16-bit pack */
|
||||
|
@ -503,7 +503,7 @@ BEGIN_TEST(to_hw_instr.extract)
|
|||
|
||||
for (unsigned i = GFX7; i <= GFX9; i++) {
|
||||
for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
|
||||
if (!setup_cs(NULL, (chip_class)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
|
||||
continue;
|
||||
|
||||
#define EXT(idx, size) \
|
||||
|
@ -591,7 +591,7 @@ BEGIN_TEST(to_hw_instr.insert)
|
|||
PhysReg v1_lo{257};
|
||||
|
||||
for (unsigned i = GFX7; i <= GFX9; i++) {
|
||||
if (!setup_cs(NULL, (chip_class)i))
|
||||
if (!setup_cs(NULL, (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
#define INS(idx, size) \
|
||||
|
|
|
@ -56,14 +56,14 @@ struct ac_llvm_flow {
|
|||
* The caller is responsible for initializing ctx::module and ctx::builder.
|
||||
*/
|
||||
void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler,
|
||||
enum chip_class chip_class, enum radeon_family family,
|
||||
enum amd_gfx_level gfx_level, enum radeon_family family,
|
||||
const struct radeon_info *info,
|
||||
enum ac_float_mode float_mode, unsigned wave_size,
|
||||
unsigned ballot_mask_bits)
|
||||
{
|
||||
ctx->context = LLVMContextCreate();
|
||||
|
||||
ctx->chip_class = chip_class;
|
||||
ctx->gfx_level = gfx_level;
|
||||
ctx->family = family;
|
||||
ctx->info = info;
|
||||
ctx->wave_size = wave_size;
|
||||
|
@ -393,7 +393,7 @@ void ac_build_s_barrier(struct ac_llvm_context *ctx, gl_shader_stage stage)
|
|||
/* GFX6 only: s_barrier isn’t needed in TCS because an entire patch always fits into
|
||||
* a single wave due to a bug workaround disallowing multi-wave HS workgroups.
|
||||
*/
|
||||
if (ctx->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL)
|
||||
if (ctx->gfx_level == GFX6 && stage == MESA_SHADER_TESS_CTRL)
|
||||
return;
|
||||
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
|
||||
|
@ -865,7 +865,7 @@ void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_
|
|||
*
|
||||
* Clamp the layer earlier to work around the issue.
|
||||
*/
|
||||
if (ctx->chip_class <= GFX8) {
|
||||
if (ctx->gfx_level <= GFX8) {
|
||||
LLVMValueRef ge0;
|
||||
ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
|
||||
tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
|
||||
|
@ -949,7 +949,7 @@ LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_c
|
|||
{
|
||||
LLVMValueRef args[5];
|
||||
|
||||
if (ctx->chip_class >= GFX11) {
|
||||
if (ctx->gfx_level >= GFX11) {
|
||||
LLVMValueRef p;
|
||||
LLVMValueRef p10;
|
||||
|
||||
|
@ -1002,7 +1002,7 @@ LLVMValueRef ac_build_fs_interp_f16(struct ac_llvm_context *ctx, LLVMValueRef ll
|
|||
{
|
||||
LLVMValueRef args[6];
|
||||
|
||||
if (ctx->chip_class >= GFX11) {
|
||||
if (ctx->gfx_level >= GFX11) {
|
||||
LLVMValueRef p;
|
||||
LLVMValueRef p10;
|
||||
|
||||
|
@ -1059,7 +1059,7 @@ LLVMValueRef ac_build_fs_interp_mov(struct ac_llvm_context *ctx, LLVMValueRef pa
|
|||
{
|
||||
LLVMValueRef args[4];
|
||||
|
||||
if (ctx->chip_class >= GFX11) {
|
||||
if (ctx->gfx_level >= GFX11) {
|
||||
LLVMValueRef p;
|
||||
|
||||
args[0] = llvm_chan;
|
||||
|
@ -1186,12 +1186,12 @@ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
|
|||
static unsigned get_load_cache_policy(struct ac_llvm_context *ctx, unsigned cache_policy)
|
||||
{
|
||||
return cache_policy |
|
||||
(ctx->chip_class >= GFX10 && ctx->chip_class < GFX11 && cache_policy & ac_glc ? ac_dlc : 0);
|
||||
(ctx->gfx_level >= GFX10 && ctx->gfx_level < GFX11 && cache_policy & ac_glc ? ac_dlc : 0);
|
||||
}
|
||||
|
||||
static unsigned get_store_cache_policy(struct ac_llvm_context *ctx, unsigned cache_policy)
|
||||
{
|
||||
if (ctx->chip_class >= GFX11)
|
||||
if (ctx->gfx_level >= GFX11)
|
||||
cache_policy &= ~ac_glc; /* GLC has no effect on stores */
|
||||
return cache_policy;
|
||||
}
|
||||
|
@ -1239,7 +1239,7 @@ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
|
|||
unsigned num_channels = ac_get_llvm_num_components(vdata);
|
||||
|
||||
/* Split 3 channel stores if unsupported. */
|
||||
if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) {
|
||||
if (num_channels == 3 && !ac_has_vec3_support(ctx->gfx_level, false)) {
|
||||
LLVMValueRef v[3], v01, voffset2;
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
|
@ -1275,13 +1275,13 @@ static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLV
|
|||
args[idx++] = soffset ? soffset : ctx->i32_0;
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
|
||||
unsigned func =
|
||||
!ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
|
||||
!ac_has_vec3_support(ctx->gfx_level, use_format) && num_channels == 3 ? 4 : num_channels;
|
||||
const char *indexing_kind = structurized ? "struct" : "raw";
|
||||
char name[256], type_name[8];
|
||||
|
||||
/* D16 is only supported on gfx8+ */
|
||||
assert(!use_format || (channel_type != ctx->f16 && channel_type != ctx->i16) ||
|
||||
ctx->chip_class >= GFX8);
|
||||
ctx->gfx_level >= GFX8);
|
||||
|
||||
LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
|
||||
ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
|
||||
|
@ -1302,7 +1302,7 @@ LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc
|
|||
bool can_speculate, bool allow_smem)
|
||||
{
|
||||
if (allow_smem && !(cache_policy & ac_slc) &&
|
||||
(!(cache_policy & ac_glc) || ctx->chip_class >= GFX8)) {
|
||||
(!(cache_policy & ac_glc) || ctx->gfx_level >= GFX8)) {
|
||||
assert(vindex == NULL);
|
||||
|
||||
LLVMValueRef result[8];
|
||||
|
@ -1326,7 +1326,7 @@ LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc
|
|||
if (num_channels == 1)
|
||||
return result[0];
|
||||
|
||||
if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false))
|
||||
if (num_channels == 3 && !ac_has_vec3_support(ctx->gfx_level, false))
|
||||
result[num_channels++] = LLVMGetUndef(ctx->f32);
|
||||
return ac_build_gather_values(ctx, result, num_channels);
|
||||
}
|
||||
|
@ -1394,10 +1394,10 @@ static LLVMValueRef ac_build_tbuffer_load(struct ac_llvm_context *ctx, LLVMValue
|
|||
args[idx++] = vindex ? vindex : ctx->i32_0;
|
||||
args[idx++] = voffset ? voffset : ctx->i32_0;
|
||||
args[idx++] = soffset ? soffset : ctx->i32_0;
|
||||
args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
|
||||
args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->gfx_level, dfmt, nfmt), 0);
|
||||
args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
|
||||
unsigned func =
|
||||
!ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
|
||||
!ac_has_vec3_support(ctx->gfx_level, true) && num_channels == 3 ? 4 : num_channels;
|
||||
const char *indexing_kind = structurized ? "struct" : "raw";
|
||||
char name[256], type_name[8];
|
||||
|
||||
|
@ -1538,7 +1538,7 @@ LLVMValueRef ac_build_opencoded_load_format(struct ac_llvm_context *ctx, unsigne
|
|||
}
|
||||
|
||||
int log_recombine = 0;
|
||||
if ((ctx->chip_class == GFX6 || ctx->chip_class >= GFX10) && !known_aligned) {
|
||||
if ((ctx->gfx_level == GFX6 || ctx->gfx_level >= GFX10) && !known_aligned) {
|
||||
/* Avoid alignment restrictions by loading one byte at a time. */
|
||||
load_num_channels <<= load_log_size;
|
||||
log_recombine = load_log_size;
|
||||
|
@ -1976,7 +1976,7 @@ void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
|
|||
args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
|
||||
|
||||
if (a->compr) {
|
||||
assert(ctx->chip_class < GFX11);
|
||||
assert(ctx->gfx_level < GFX11);
|
||||
|
||||
args[2] = LLVMBuildBitCast(ctx->builder, a->out[0], ctx->v2i16, "");
|
||||
args[3] = LLVMBuildBitCast(ctx->builder, a->out[1], ctx->v2i16, "");
|
||||
|
@ -2004,7 +2004,7 @@ void ac_build_export_null(struct ac_llvm_context *ctx)
|
|||
args.valid_mask = 1; /* whether the EXEC mask is valid */
|
||||
args.done = 1; /* DONE bit */
|
||||
/* Gfx11 doesn't support null exports, and mrt0 should be exported instead. */
|
||||
args.target = ctx->chip_class >= GFX11 ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL;
|
||||
args.target = ctx->gfx_level >= GFX11 ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL;
|
||||
args.compr = 0; /* COMPR flag (0 = 32-bit export) */
|
||||
args.out[0] = LLVMGetUndef(ctx->f32); /* R */
|
||||
args.out[1] = LLVMGetUndef(ctx->f32); /* G */
|
||||
|
@ -2108,11 +2108,11 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
|
|||
assert((a->bias ? 1 : 0) + (a->lod ? 1 : 0) + (a->level_zero ? 1 : 0) + (a->derivs[0] ? 1 : 0) <=
|
||||
1);
|
||||
assert((a->min_lod ? 1 : 0) + (a->lod ? 1 : 0) + (a->level_zero ? 1 : 0) <= 1);
|
||||
assert(!a->d16 || (ctx->chip_class >= GFX8 && a->opcode != ac_image_atomic &&
|
||||
assert(!a->d16 || (ctx->gfx_level >= GFX8 && a->opcode != ac_image_atomic &&
|
||||
a->opcode != ac_image_atomic_cmpswap && a->opcode != ac_image_get_lod &&
|
||||
a->opcode != ac_image_get_resinfo));
|
||||
assert(!a->a16 || ctx->chip_class >= GFX9);
|
||||
assert(a->g16 == a->a16 || ctx->chip_class >= GFX10);
|
||||
assert(!a->a16 || ctx->gfx_level >= GFX9);
|
||||
assert(a->g16 == a->a16 || ctx->gfx_level >= GFX10);
|
||||
|
||||
assert(!a->offset ||
|
||||
ac_get_elem_bits(ctx, LLVMTypeOf(a->offset)) == 32);
|
||||
|
@ -2358,7 +2358,7 @@ LLVMValueRef ac_build_cvt_pknorm_i16_f16(struct ac_llvm_context *ctx,
|
|||
LLVMTypeRef param_types[] = {ctx->f16, ctx->f16};
|
||||
LLVMTypeRef calltype = LLVMFunctionType(ctx->i32, param_types, 2, false);
|
||||
LLVMValueRef code = LLVMConstInlineAsm(calltype,
|
||||
ctx->chip_class >= GFX11 ?
|
||||
ctx->gfx_level >= GFX11 ?
|
||||
"v_cvt_pk_norm_i16_f16 $0, $1, $2" :
|
||||
"v_cvt_pknorm_i16_f16 $0, $1, $2",
|
||||
"=v,v,v", false, false);
|
||||
|
@ -2371,7 +2371,7 @@ LLVMValueRef ac_build_cvt_pknorm_u16_f16(struct ac_llvm_context *ctx,
|
|||
LLVMTypeRef param_types[] = {ctx->f16, ctx->f16};
|
||||
LLVMTypeRef calltype = LLVMFunctionType(ctx->i32, param_types, 2, false);
|
||||
LLVMValueRef code = LLVMConstInlineAsm(calltype,
|
||||
ctx->chip_class >= GFX11 ?
|
||||
ctx->gfx_level >= GFX11 ?
|
||||
"v_cvt_pk_norm_u16_f16 $0, $1, $2" :
|
||||
"v_cvt_pknorm_u16_f16 $0, $1, $2",
|
||||
"=v,v,v", false, false);
|
||||
|
@ -2458,7 +2458,7 @@ LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMVal
|
|||
LLVMValueRef s2)
|
||||
{
|
||||
/* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
|
||||
if (ctx->chip_class >= GFX10) {
|
||||
if (ctx->gfx_level >= GFX10) {
|
||||
return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32, (LLVMValueRef[]){s0, s1, s2}, 3,
|
||||
AC_FUNC_ATTR_READNONE);
|
||||
}
|
||||
|
@ -2473,7 +2473,7 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
|
|||
|
||||
unsigned expcnt = 7;
|
||||
unsigned lgkmcnt = 63;
|
||||
unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15;
|
||||
unsigned vmcnt = ctx->gfx_level >= GFX9 ? 63 : 15;
|
||||
unsigned vscnt = 63;
|
||||
|
||||
if (wait_flags & AC_WAIT_EXP)
|
||||
|
@ -2484,7 +2484,7 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
|
|||
vmcnt = 0;
|
||||
|
||||
if (wait_flags & AC_WAIT_VSTORE) {
|
||||
if (ctx->chip_class >= GFX10)
|
||||
if (ctx->gfx_level >= GFX10)
|
||||
vscnt = 0;
|
||||
else
|
||||
vmcnt = 0;
|
||||
|
@ -2500,7 +2500,7 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
|
|||
|
||||
unsigned simm16;
|
||||
|
||||
if (ctx->chip_class >= GFX11)
|
||||
if (ctx->gfx_level >= GFX11)
|
||||
simm16 = expcnt | (lgkmcnt << 4) | (vmcnt << 10);
|
||||
else
|
||||
simm16 = (lgkmcnt << 8) | (expcnt << 4) | (vmcnt & 0xf) | ((vmcnt >> 4) << 14);
|
||||
|
@ -2519,7 +2519,7 @@ LLVMValueRef ac_build_fsat(struct ac_llvm_context *ctx, LLVMValueRef src,
|
|||
LLVMValueRef one = LLVMConstReal(type, 1.0);
|
||||
LLVMValueRef result;
|
||||
|
||||
if (bitsize == 64 || (bitsize == 16 && ctx->chip_class <= GFX8) || type == ctx->v2f16) {
|
||||
if (bitsize == 64 || (bitsize == 16 && ctx->gfx_level <= GFX8) || type == ctx->v2f16) {
|
||||
/* Use fmin/fmax for 64-bit fsat or 16-bit on GFX6-GFX8 because LLVM
|
||||
* doesn't expose an intrinsic.
|
||||
*/
|
||||
|
@ -2547,7 +2547,7 @@ LLVMValueRef ac_build_fsat(struct ac_llvm_context *ctx, LLVMValueRef src,
|
|||
AC_FUNC_ATTR_READNONE);
|
||||
}
|
||||
|
||||
if (ctx->chip_class < GFX9 && bitsize == 32) {
|
||||
if (ctx->gfx_level < GFX9 && bitsize == 32) {
|
||||
/* Only pre-GFX9 chips do not flush denorms. */
|
||||
result = ac_build_canonicalize(ctx, result, bitsize);
|
||||
}
|
||||
|
@ -2741,7 +2741,7 @@ void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
|
|||
|
||||
void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
|
||||
{
|
||||
unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768;
|
||||
unsigned lds_size = ctx->gfx_level >= GFX7 ? 65536 : 32768;
|
||||
ctx->lds = LLVMBuildIntToPtr(
|
||||
ctx->builder, ctx->i32_0,
|
||||
LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS), "lds");
|
||||
|
@ -3642,7 +3642,7 @@ static LLVMValueRef ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lh
|
|||
static LLVMValueRef ac_wavefront_shift_right_1(struct ac_llvm_context *ctx, LLVMValueRef src,
|
||||
LLVMValueRef identity, unsigned maxprefix)
|
||||
{
|
||||
if (ctx->chip_class >= GFX10) {
|
||||
if (ctx->gfx_level >= GFX10) {
|
||||
/* wavefront shift_right by 1 on GFX10 (emulate dpp_wf_sr1) */
|
||||
LLVMValueRef active, tmp1, tmp2;
|
||||
LLVMValueRef tid = ac_get_thread_id(ctx);
|
||||
|
@ -3672,7 +3672,7 @@ static LLVMValueRef ac_wavefront_shift_right_1(struct ac_llvm_context *ctx, LLVM
|
|||
|
||||
return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
|
||||
}
|
||||
} else if (ctx->chip_class >= GFX8) {
|
||||
} else if (ctx->gfx_level >= GFX8) {
|
||||
return ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
|
||||
}
|
||||
|
||||
|
@ -3716,7 +3716,7 @@ static LLVMValueRef ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMVa
|
|||
|
||||
result = src;
|
||||
|
||||
if (ctx->chip_class <= GFX7) {
|
||||
if (ctx->gfx_level <= GFX7) {
|
||||
assert(maxprefix == 64);
|
||||
LLVMValueRef tid = ac_get_thread_id(ctx);
|
||||
LLVMValueRef active;
|
||||
|
@ -3781,7 +3781,7 @@ static LLVMValueRef ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMVa
|
|||
if (maxprefix <= 16)
|
||||
return result;
|
||||
|
||||
if (ctx->chip_class >= GFX10) {
|
||||
if (ctx->gfx_level >= GFX10) {
|
||||
LLVMValueRef tid = ac_get_thread_id(ctx);
|
||||
LLVMValueRef active;
|
||||
|
||||
|
@ -3882,7 +3882,7 @@ LLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_
|
|||
if (cluster_size == 4)
|
||||
return ac_build_wwm(ctx, result);
|
||||
|
||||
if (ctx->chip_class >= GFX8)
|
||||
if (ctx->gfx_level >= GFX8)
|
||||
swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false);
|
||||
else
|
||||
swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04));
|
||||
|
@ -3890,7 +3890,7 @@ LLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_
|
|||
if (cluster_size == 8)
|
||||
return ac_build_wwm(ctx, result);
|
||||
|
||||
if (ctx->chip_class >= GFX8)
|
||||
if (ctx->gfx_level >= GFX8)
|
||||
swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false);
|
||||
else
|
||||
swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08));
|
||||
|
@ -3898,9 +3898,9 @@ LLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_
|
|||
if (cluster_size == 16)
|
||||
return ac_build_wwm(ctx, result);
|
||||
|
||||
if (ctx->chip_class >= GFX10)
|
||||
if (ctx->gfx_level >= GFX10)
|
||||
swap = ac_build_permlane16(ctx, result, 0, true, false);
|
||||
else if (ctx->chip_class >= GFX8 && cluster_size != 32)
|
||||
else if (ctx->gfx_level >= GFX8 && cluster_size != 32)
|
||||
swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
|
||||
else
|
||||
swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
|
||||
|
@ -3908,9 +3908,9 @@ LLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_
|
|||
if (cluster_size == 32)
|
||||
return ac_build_wwm(ctx, result);
|
||||
|
||||
if (ctx->chip_class >= GFX8) {
|
||||
if (ctx->gfx_level >= GFX8) {
|
||||
if (ctx->wave_size == 64) {
|
||||
if (ctx->chip_class >= GFX10)
|
||||
if (ctx->gfx_level >= GFX10)
|
||||
swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
|
||||
else
|
||||
swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
|
||||
|
@ -4134,7 +4134,7 @@ void ac_build_dual_src_blend_swizzle(struct ac_llvm_context *ctx,
|
|||
struct ac_export_args *mrt0,
|
||||
struct ac_export_args *mrt1)
|
||||
{
|
||||
assert(ctx->chip_class >= GFX11);
|
||||
assert(ctx->gfx_level >= GFX11);
|
||||
assert(mrt0->enabled_channels == mrt1->enabled_channels);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
|
@ -4147,7 +4147,7 @@ LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src
|
|||
unsigned lane1, unsigned lane2, unsigned lane3)
|
||||
{
|
||||
unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
|
||||
if (ctx->chip_class >= GFX8) {
|
||||
if (ctx->gfx_level >= GFX8) {
|
||||
return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
|
||||
} else {
|
||||
return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
|
||||
|
@ -4316,23 +4316,23 @@ void ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, LLVMValueR
|
|||
|
||||
if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
|
||||
assert(!depth);
|
||||
args->compr = ctx->chip_class < GFX11; /* COMPR flag */
|
||||
args->compr = ctx->gfx_level < GFX11; /* COMPR flag */
|
||||
|
||||
if (stencil) {
|
||||
/* Stencil should be in X[23:16]. */
|
||||
stencil = ac_to_integer(ctx, stencil);
|
||||
stencil = LLVMBuildShl(ctx->builder, stencil, LLVMConstInt(ctx->i32, 16, 0), "");
|
||||
args->out[0] = ac_to_float(ctx, stencil);
|
||||
mask |= ctx->chip_class >= GFX11 ? 0x1 : 0x3;
|
||||
mask |= ctx->gfx_level >= GFX11 ? 0x1 : 0x3;
|
||||
}
|
||||
if (samplemask) {
|
||||
/* SampleMask should be in Y[15:0]. */
|
||||
args->out[1] = samplemask;
|
||||
mask |= ctx->chip_class >= GFX11 ? 0x2 : 0xc;
|
||||
mask |= ctx->gfx_level >= GFX11 ? 0x2 : 0xc;
|
||||
}
|
||||
if (mrtz_alpha) {
|
||||
/* MRT0 alpha should be in Y[31:16] if alpha-to-coverage is enabled and MRTZ is present. */
|
||||
assert(ctx->chip_class >= GFX11);
|
||||
assert(ctx->gfx_level >= GFX11);
|
||||
mrtz_alpha = LLVMBuildFPTrunc(ctx->builder, mrtz_alpha, ctx->f16, "");
|
||||
mrtz_alpha = ac_to_integer(ctx, mrtz_alpha);
|
||||
mrtz_alpha = LLVMBuildZExt(ctx->builder, mrtz_alpha, ctx->i32, "");
|
||||
|
@ -4362,7 +4362,7 @@ void ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, LLVMValueR
|
|||
|
||||
/* GFX6 (except OLAND and HAINAN) has a bug that it only looks
|
||||
* at the X writemask component. */
|
||||
if (ctx->chip_class == GFX6 && ctx->family != CHIP_OLAND && ctx->family != CHIP_HAINAN)
|
||||
if (ctx->gfx_level == GFX6 && ctx->family != CHIP_OLAND && ctx->family != CHIP_HAINAN)
|
||||
mask |= 0x1;
|
||||
|
||||
/* Specify which components to enable */
|
||||
|
@ -4385,7 +4385,7 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav
|
|||
* We always have to export at least 1 primitive.
|
||||
* Export a degenerate triangle using vertex 0 for all 3 vertices.
|
||||
*/
|
||||
if (prim_cnt == ctx->i32_0 && ctx->chip_class == GFX10) {
|
||||
if (prim_cnt == ctx->i32_0 && ctx->gfx_level == GFX10) {
|
||||
assert(vtx_cnt == ctx->i32_0);
|
||||
prim_cnt = ctx->i32_1;
|
||||
vtx_cnt = ctx->i32_1;
|
||||
|
|
|
@ -132,7 +132,7 @@ struct ac_llvm_context {
|
|||
unsigned uniform_md_kind;
|
||||
LLVMValueRef empty_md;
|
||||
|
||||
enum chip_class chip_class;
|
||||
enum amd_gfx_level gfx_level;
|
||||
enum radeon_family family;
|
||||
const struct radeon_info *info;
|
||||
|
||||
|
@ -145,7 +145,7 @@ struct ac_llvm_context {
|
|||
};
|
||||
|
||||
void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler,
|
||||
enum chip_class chip_class, enum radeon_family family,
|
||||
enum amd_gfx_level gfx_level, enum radeon_family family,
|
||||
const struct radeon_info *info,
|
||||
enum ac_float_mode float_mode, unsigned wave_size,
|
||||
unsigned ballot_mask_bits);
|
||||
|
|
|
@ -328,9 +328,9 @@ void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx)
|
|||
|
||||
snprintf(features, sizeof(features), "+DumpCode%s%s",
|
||||
/* GFX9 has broken VGPR indexing, so always promote alloca to scratch. */
|
||||
ctx->chip_class == GFX9 ? ",-promote-alloca" : "",
|
||||
ctx->gfx_level == GFX9 ? ",-promote-alloca" : "",
|
||||
/* Wave32 is the default. */
|
||||
ctx->chip_class >= GFX10 && ctx->wave_size == 64 ?
|
||||
ctx->gfx_level >= GFX10 && ctx->wave_size == 64 ?
|
||||
",+wavefrontsize64,-wavefrontsize32" : "");
|
||||
|
||||
LLVMAddTargetDependentFunctionAttr(F, "target-features", features);
|
||||
|
|
|
@ -129,7 +129,7 @@ bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module
|
|||
char **pelf_buffer, size_t *pelf_size);
|
||||
void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
|
||||
|
||||
static inline bool ac_has_vec3_support(enum chip_class chip, bool use_format)
|
||||
static inline bool ac_has_vec3_support(enum amd_gfx_level chip, bool use_format)
|
||||
{
|
||||
/* GFX6 only supports vec3 with load/store format. */
|
||||
return chip != GFX6 || use_format;
|
||||
|
|
|
@ -339,7 +339,7 @@ static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, LLVMValueRef src0)
|
|||
src0 = ac_to_float(ctx, src0);
|
||||
result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
|
||||
|
||||
if (ctx->chip_class >= GFX8) {
|
||||
if (ctx->gfx_level >= GFX8) {
|
||||
LLVMValueRef args[2];
|
||||
/* Check if the result is a denormal - and flush to 0 if so. */
|
||||
args[0] = result;
|
||||
|
@ -351,7 +351,7 @@ static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, LLVMValueRef src0)
|
|||
/* need to convert back up to f32 */
|
||||
result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
|
||||
|
||||
if (ctx->chip_class >= GFX8)
|
||||
if (ctx->gfx_level >= GFX8)
|
||||
result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
|
||||
else {
|
||||
/* for GFX6-GFX7 */
|
||||
|
@ -901,7 +901,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
case nir_op_fmax:
|
||||
result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type),
|
||||
src[0], src[1]);
|
||||
if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
|
||||
if (ctx->ac.gfx_level < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
|
||||
/* Only pre-GFX9 chips do not flush denorms. */
|
||||
result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
|
||||
}
|
||||
|
@ -909,19 +909,19 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
case nir_op_fmin:
|
||||
result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", ac_to_float_type(&ctx->ac, def_type),
|
||||
src[0], src[1]);
|
||||
if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
|
||||
if (ctx->ac.gfx_level < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
|
||||
/* Only pre-GFX9 chips do not flush denorms. */
|
||||
result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
|
||||
}
|
||||
break;
|
||||
case nir_op_ffma:
|
||||
/* FMA is slow on gfx6-8, so it shouldn't be used. */
|
||||
assert(instr->dest.dest.ssa.bit_size != 32 || ctx->ac.chip_class >= GFX9);
|
||||
assert(instr->dest.dest.ssa.bit_size != 32 || ctx->ac.gfx_level >= GFX9);
|
||||
result = emit_intrin_3f_param(&ctx->ac, "llvm.fma", ac_to_float_type(&ctx->ac, def_type),
|
||||
src[0], src[1], src[2]);
|
||||
break;
|
||||
case nir_op_ffmaz:
|
||||
assert(LLVM_VERSION_MAJOR >= 12 && ctx->ac.chip_class >= GFX10_3);
|
||||
assert(LLVM_VERSION_MAJOR >= 12 && ctx->ac.gfx_level >= GFX10_3);
|
||||
src[0] = ac_to_float(&ctx->ac, src[0]);
|
||||
src[1] = ac_to_float(&ctx->ac, src[1]);
|
||||
src[2] = ac_to_float(&ctx->ac, src[2]);
|
||||
|
@ -1386,7 +1386,7 @@ static LLVMValueRef get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef des
|
|||
LLVMBuildExtractElement(ctx->ac.builder, descriptor, LLVMConstInt(ctx->ac.i32, 2, false), "");
|
||||
|
||||
/* GFX8 only */
|
||||
if (ctx->ac.chip_class == GFX8 && in_elements) {
|
||||
if (ctx->ac.gfx_level == GFX8 && in_elements) {
|
||||
/* On GFX8, the descriptor contains the size in bytes,
|
||||
* but TXQ must return the size in elements.
|
||||
* The stride is always non-zero for resources using TXQ.
|
||||
|
@ -1486,7 +1486,7 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, struct ac
|
|||
}
|
||||
|
||||
/* Query the texture size. */
|
||||
resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array);
|
||||
resinfo.dim = ac_get_sampler_dim(ctx->gfx_level, instr->sampler_dim, instr->is_array);
|
||||
resinfo.opcode = ac_image_get_resinfo;
|
||||
resinfo.dmask = 0xf;
|
||||
resinfo.lod = ctx->i32_0;
|
||||
|
@ -1611,13 +1611,13 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_te
|
|||
if (!ctx->ac.info->has_3d_cube_border_color_mipmap)
|
||||
args->level_zero = false;
|
||||
|
||||
if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8 &&
|
||||
if (instr->op == nir_texop_tg4 && ctx->ac.gfx_level <= GFX8 &&
|
||||
(instr->dest_type & (nir_type_int | nir_type_uint))) {
|
||||
return lower_gather4_integer(&ctx->ac, args, instr);
|
||||
}
|
||||
|
||||
/* Fixup for GFX9 which allocates 1D textures as 2D. */
|
||||
if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) {
|
||||
if (instr->op == nir_texop_lod && ctx->ac.gfx_level == GFX9) {
|
||||
if ((args->dim == ac_image_2darray || args->dim == ac_image_2d) && !args->coords[1]) {
|
||||
args->coords[1] = ctx->ac.i32_0;
|
||||
}
|
||||
|
@ -1777,7 +1777,7 @@ static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qual
|
|||
* store opcodes not aligned to a dword are affected. The only way to
|
||||
* get unaligned stores is through shader images.
|
||||
*/
|
||||
if (((may_store_unaligned && ctx->ac.chip_class == GFX6) ||
|
||||
if (((may_store_unaligned && ctx->ac.gfx_level == GFX6) ||
|
||||
/* If this is write-only, don't keep data in L1 to prevent
|
||||
* evicting L1 cache lines that may be needed by other
|
||||
* instructions.
|
||||
|
@ -1852,7 +1852,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in
|
|||
/* Due to alignment issues, split stores of 8-bit/16-bit
|
||||
* vectors.
|
||||
*/
|
||||
if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) {
|
||||
if (ctx->ac.gfx_level == GFX6 && count > 1 && elem_size_bytes < 4) {
|
||||
writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
|
||||
count = 1;
|
||||
num_bytes = elem_size_bytes;
|
||||
|
@ -2486,11 +2486,11 @@ static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_ins
|
|||
ASSERTED bool add_frag_pos =
|
||||
(dim == GLSL_SAMPLER_DIM_SUBPASS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
|
||||
bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
|
||||
bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D;
|
||||
bool gfx9_1d = ctx->ac.gfx_level == GFX9 && dim == GLSL_SAMPLER_DIM_1D;
|
||||
assert(!add_frag_pos && "Input attachments should be lowered by this point.");
|
||||
count = image_type_to_components_count(dim, is_array);
|
||||
|
||||
if (ctx->ac.chip_class < GFX11 &&
|
||||
if (ctx->ac.gfx_level < GFX11 &&
|
||||
is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||
|
||||
instr->intrinsic == nir_intrinsic_bindless_image_load ||
|
||||
instr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
|
||||
|
@ -2529,7 +2529,7 @@ static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_ins
|
|||
args->coords[1] = ctx->ac.i32_0;
|
||||
count++;
|
||||
}
|
||||
if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_2D && !is_array) {
|
||||
if (ctx->ac.gfx_level == GFX9 && dim == GLSL_SAMPLER_DIM_2D && !is_array) {
|
||||
/* The hw can't bind a slice of a 3D image as a 2D
|
||||
* image, because it ignores BASE_ARRAY if the target
|
||||
* is 3D. The workaround is to read BASE_ARRAY and set
|
||||
|
@ -2621,7 +2621,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri
|
|||
args.opcode = level_zero ? ac_image_load : ac_image_load_mip;
|
||||
args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
|
||||
get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
|
||||
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
|
||||
args.dim = ac_get_image_dim(ctx->ac.gfx_level, dim, is_array);
|
||||
if (!level_zero)
|
||||
args.lod = get_src(ctx, instr->src[3]);
|
||||
args.dmask = 15;
|
||||
|
@ -2713,7 +2713,7 @@ static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_in
|
|||
args.data[0] = src;
|
||||
args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
|
||||
get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
|
||||
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
|
||||
args.dim = ac_get_image_dim(ctx->ac.gfx_level, dim, is_array);
|
||||
if (!level_zero)
|
||||
args.lod = get_src(ctx, instr->src[4]);
|
||||
args.dmask = 15;
|
||||
|
@ -2872,7 +2872,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int
|
|||
args.data[1] = params[1];
|
||||
args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
|
||||
get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
|
||||
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
|
||||
args.dim = ac_get_image_dim(ctx->ac.gfx_level, dim, is_array);
|
||||
|
||||
result = ac_build_image_opcode(&ctx->ac, &args);
|
||||
}
|
||||
|
@ -2932,7 +2932,7 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, const nir_intri
|
|||
|
||||
struct ac_image_args args = {0};
|
||||
|
||||
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
|
||||
args.dim = ac_get_image_dim(ctx->ac.gfx_level, dim, is_array);
|
||||
args.dmask = 0xf;
|
||||
args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
|
||||
args.opcode = ac_image_get_resinfo;
|
||||
|
@ -2942,7 +2942,7 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, const nir_intri
|
|||
|
||||
res = ac_build_image_opcode(&ctx->ac, &args);
|
||||
|
||||
if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
|
||||
if (ctx->ac.gfx_level == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
|
||||
LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
|
||||
LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
|
||||
res = LLVMBuildInsertElement(ctx->ac.builder, res, layers, ctx->ac.i32_1, "");
|
||||
|
@ -3664,7 +3664,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
if (ctx->stage == MESA_SHADER_TESS_CTRL) {
|
||||
result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5);
|
||||
} else {
|
||||
if (ctx->ac.chip_class >= GFX10) {
|
||||
if (ctx->ac.gfx_level >= GFX10) {
|
||||
result =
|
||||
LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
|
||||
LLVMConstInt(ctx->ac.i32, 127, 0), "");
|
||||
|
@ -4040,8 +4040,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_shuffle:
|
||||
if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ||
|
||||
(ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
|
||||
if (ctx->ac.gfx_level == GFX8 || ctx->ac.gfx_level == GFX9 ||
|
||||
(ctx->ac.gfx_level >= GFX10 && ctx->ac.wave_size == 32)) {
|
||||
result =
|
||||
ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
|
||||
} else {
|
||||
|
@ -4477,7 +4477,7 @@ static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValue
|
|||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMValueRef img7, samp0;
|
||||
|
||||
if (ctx->ac.chip_class >= GFX8)
|
||||
if (ctx->ac.gfx_level >= GFX8)
|
||||
return samp;
|
||||
|
||||
img7 = LLVMBuildExtractElement(builder, res, LLVMConstInt(ctx->ac.i32, 7, 0), "");
|
||||
|
@ -4550,7 +4550,7 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
|
|||
/* The fragment mask is fetched from the compressed
|
||||
* multisampled surface.
|
||||
*/
|
||||
assert(ctx->ac.chip_class < GFX11);
|
||||
assert(ctx->ac.gfx_level < GFX11);
|
||||
main_descriptor = AC_DESC_FMASK;
|
||||
}
|
||||
|
||||
|
@ -4593,7 +4593,7 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
|
|||
if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
|
||||
*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
|
||||
}
|
||||
if (ctx->ac.chip_class < GFX11 &&
|
||||
if (ctx->ac.gfx_level < GFX11 &&
|
||||
fmask_ptr && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_samples_identical))
|
||||
*fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, &instr->instr,
|
||||
texture_dynamic_index, false, false);
|
||||
|
@ -4747,7 +4747,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||
* Z24 anymore. Do it manually here for GFX8-9; GFX10 has
|
||||
* an explicitly clamped 32-bit float format.
|
||||
*/
|
||||
if (args.compare && ctx->ac.chip_class >= GFX8 && ctx->ac.chip_class <= GFX9 &&
|
||||
if (args.compare && ctx->ac.gfx_level >= GFX8 && ctx->ac.gfx_level <= GFX9 &&
|
||||
ctx->abi->clamp_shadow_reference) {
|
||||
LLVMValueRef upgraded, clamped;
|
||||
|
||||
|
@ -4775,7 +4775,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||
break;
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
num_src_deriv_channels = 1;
|
||||
if (ctx->ac.chip_class == GFX9) {
|
||||
if (ctx->ac.gfx_level == GFX9) {
|
||||
num_dest_deriv_channels = 2;
|
||||
} else {
|
||||
num_dest_deriv_channels = 1;
|
||||
|
@ -4819,7 +4819,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||
args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
|
||||
}
|
||||
|
||||
if (ctx->ac.chip_class == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
|
||||
if (ctx->ac.gfx_level == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
|
||||
instr->op != nir_texop_lod) {
|
||||
LLVMValueRef filler;
|
||||
if (instr->op == nir_texop_txf)
|
||||
|
@ -4837,7 +4837,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||
args.coords[instr->coord_components] = sample_index;
|
||||
|
||||
if (instr->op == nir_texop_samples_identical) {
|
||||
assert(ctx->ac.chip_class < GFX11);
|
||||
assert(ctx->ac.gfx_level < GFX11);
|
||||
struct ac_image_args txf_args = {0};
|
||||
memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));
|
||||
|
||||
|
@ -4851,7 +4851,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||
goto write_result;
|
||||
}
|
||||
|
||||
if (ctx->ac.chip_class < GFX11 &&
|
||||
if (ctx->ac.gfx_level < GFX11 &&
|
||||
(instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ||
|
||||
instr->sampler_dim == GLSL_SAMPLER_DIM_MS) &&
|
||||
instr->op != nir_texop_txs && instr->op != nir_texop_fragment_fetch_amd &&
|
||||
|
@ -4890,7 +4890,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||
}
|
||||
|
||||
if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) {
|
||||
args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array);
|
||||
args.dim = ac_get_sampler_dim(ctx->ac.gfx_level, instr->sampler_dim, instr->is_array);
|
||||
args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
|
||||
}
|
||||
|
||||
|
@ -4932,7 +4932,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||
else if (instr->is_shadow && instr->is_new_style_shadow && instr->op != nir_texop_txs &&
|
||||
instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
|
||||
result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
|
||||
else if (ctx->ac.chip_class == GFX9 && instr->op == nir_texop_txs &&
|
||||
else if (ctx->ac.gfx_level == GFX9 && instr->op == nir_texop_txs &&
|
||||
instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array) {
|
||||
LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
|
||||
LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
|
||||
|
|
|
@ -5,7 +5,7 @@ from canonicalize import json_canonicalize
|
|||
|
||||
######### BEGIN HARDCODED CONFIGURATION
|
||||
|
||||
gfx_versions = {
|
||||
gfx_levels = {
|
||||
'gfx6': [
|
||||
[],
|
||||
'asic_reg/gca/gfx_6_0_d.h',
|
||||
|
@ -68,7 +68,7 @@ re_shift = re.compile(r'^#define (?P<name>\w+)__(?P<field>\w+)__SHIFT\s+(?P<valu
|
|||
# match: #define SDMA0_DEC_START__START_MASK 0xFFFFFFFF
|
||||
re_mask = re.compile(r'^#define (?P<name>\w+)__(?P<field>\w+)_MASK\s+(?P<value>[0-9a-fA-Fx]+)L?\n')
|
||||
|
||||
def register_filter(gfx_version, name, offset, already_added):
|
||||
def register_filter(gfx_level, name, offset, already_added):
|
||||
# Only accept writeable registers and debug registers
|
||||
return ((offset // 0x1000 in [0xB, 0x28, 0x30, 0x31, 0x34, 0x35, 0x36, 0x37] or
|
||||
# Add SQ_WAVE registers for trap handlers
|
||||
|
@ -81,7 +81,7 @@ def register_filter(gfx_version, name, offset, already_added):
|
|||
name.startswith('GRBM_STATUS') or
|
||||
name.startswith('CP_CP'))) or
|
||||
# Add all registers in the 0x8000 range for gfx6
|
||||
(gfx_version == 'gfx6' and offset // 0x1000 == 0x8) or
|
||||
(gfx_level == 'gfx6' and offset // 0x1000 == 0x8) or
|
||||
# Add registers in the 0x9000 range
|
||||
(offset // 0x1000 == 0x9 and
|
||||
(name in ['TA_CS_BC_BASE_ADDR', 'GB_ADDR_CONFIG', 'SPI_CONFIG_CNTL'] or
|
||||
|
@ -689,11 +689,11 @@ fields_missing = {
|
|||
def bitcount(n):
|
||||
return bin(n).count('1')
|
||||
|
||||
def generate_json(gfx_version, amd_headers_path):
|
||||
gc_base_offsets = gfx_versions[gfx_version][0]
|
||||
def generate_json(gfx_level, amd_headers_path):
|
||||
gc_base_offsets = gfx_levels[gfx_level][0]
|
||||
|
||||
# Add the path to the filenames
|
||||
filenames = [amd_headers_path + '/' + a for a in gfx_versions[gfx_version][1:]]
|
||||
filenames = [amd_headers_path + '/' + a for a in gfx_levels[gfx_level][1:]]
|
||||
|
||||
# Open the files
|
||||
files = [open(a, 'r').readlines() if a is not None else None for a in filenames]
|
||||
|
@ -726,9 +726,9 @@ def generate_json(gfx_version, amd_headers_path):
|
|||
name = name[:-4]
|
||||
|
||||
# Only accept writeable registers and debug registers
|
||||
if register_filter(gfx_version, name, offset, offset in added_offsets):
|
||||
if register_filter(gfx_level, name, offset, offset in added_offsets):
|
||||
regs[name] = {
|
||||
'chips': [gfx_version],
|
||||
'chips': [gfx_level],
|
||||
'map': {'at': offset, 'to': 'mm'},
|
||||
'name': name,
|
||||
}
|
||||
|
@ -765,7 +765,7 @@ def generate_json(gfx_version, amd_headers_path):
|
|||
re_enum_end = re.compile(r'^} \w+;\n')
|
||||
inside_enum = False
|
||||
name = None
|
||||
enums = enums_missing[gfx_version] if gfx_version in enums_missing else {}
|
||||
enums = enums_missing[gfx_level] if gfx_level in enums_missing else {}
|
||||
|
||||
for line in files[2]:
|
||||
r = re_enum_begin.match(line)
|
||||
|
@ -795,7 +795,7 @@ def generate_json(gfx_version, amd_headers_path):
|
|||
# Assemble everything
|
||||
reg_types = {}
|
||||
reg_mappings = []
|
||||
missing_fields = fields_missing[gfx_version] if gfx_version in fields_missing else {}
|
||||
missing_fields = fields_missing[gfx_level] if gfx_level in fields_missing else {}
|
||||
|
||||
for (name, reg) in regs.items():
|
||||
type = {'fields': []}
|
||||
|
@ -823,7 +823,7 @@ def generate_json(gfx_version, amd_headers_path):
|
|||
if type_name is not None:
|
||||
if type_name not in enums:
|
||||
print('{0}: {1} type not found for {2}.{3}'
|
||||
.format(gfx_version, type_name, name, field), file=sys.stderr)
|
||||
.format(gfx_level, type_name, name, field), file=sys.stderr)
|
||||
else:
|
||||
new['enum_ref'] = type_name
|
||||
|
||||
|
@ -868,8 +868,8 @@ def generate_json(gfx_version, amd_headers_path):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) <= 1 or (sys.argv[1] not in gfx_versions and sys.argv[1] != 'all'):
|
||||
print('First parameter should be one of: all, ' + ', '.join(gfx_versions.keys()), file=sys.stderr)
|
||||
if len(sys.argv) <= 1 or (sys.argv[1] not in gfx_levels and sys.argv[1] != 'all'):
|
||||
print('First parameter should be one of: all, ' + ', '.join(gfx_levels.keys()), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if len(sys.argv) <= 2:
|
||||
|
@ -877,8 +877,8 @@ if __name__ == '__main__':
|
|||
sys.exit(1)
|
||||
|
||||
if sys.argv[1] == 'all':
|
||||
for gfx_version in gfx_versions.keys():
|
||||
print(generate_json(gfx_version, sys.argv[2]), file=open(gfx_version + '.json', 'w'))
|
||||
for gfx_level in gfx_levels.keys():
|
||||
print(generate_json(gfx_level, sys.argv[2]), file=open(gfx_level + '.json', 'w'))
|
||||
sys.exit(0)
|
||||
|
||||
print(generate_json(sys.argv[1], sys.argv[2]))
|
||||
|
|
|
@ -353,7 +353,7 @@ bool
|
|||
radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
return cmd_buffer->qf == RADV_QUEUE_COMPUTE &&
|
||||
cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
|
||||
cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7;
|
||||
}
|
||||
|
||||
enum amd_ip_type
|
||||
|
@ -527,7 +527,7 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
|
|||
cmd_buffer->descriptors[i].push_dirty = false;
|
||||
}
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 &&
|
||||
cmd_buffer->qf == RADV_QUEUE_GENERAL) {
|
||||
unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends;
|
||||
unsigned fence_offset, eop_bug_offset;
|
||||
|
@ -541,7 +541,7 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_fence_va, 8);
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
/* Allocate a buffer for the EOP bug on GFX9. */
|
||||
radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, &eop_bug_offset, &fence_ptr);
|
||||
memset(fence_ptr, 0, 16 * num_db);
|
||||
|
@ -618,7 +618,7 @@ radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
|
|||
* being placed in less of them.
|
||||
*/
|
||||
unsigned offset = cmd_buffer->upload.offset;
|
||||
unsigned line_size = rad_info->chip_class >= GFX10 ? 64 : 32;
|
||||
unsigned line_size = rad_info->gfx_level >= GFX10 ? 64 : 32;
|
||||
unsigned gap = align(offset, line_size) - offset;
|
||||
if ((size & (line_size - 1)) > gap)
|
||||
offset = align(offset, line_size);
|
||||
|
@ -686,7 +686,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flu
|
|||
|
||||
/* Force wait for graphics or compute engines to be idle. */
|
||||
si_cs_emit_cache_flush(cmd_buffer->cs,
|
||||
cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||
cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
&cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), flags, &sqtt_flush_bits,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
|
@ -1058,7 +1058,7 @@ radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeli
|
|||
{
|
||||
const struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline;
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level < GFX9)
|
||||
return;
|
||||
|
||||
if (old_pipeline &&
|
||||
|
@ -1070,7 +1070,7 @@ radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeli
|
|||
if (cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA12 ||
|
||||
cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA20 ||
|
||||
cmd_buffer->device->physical_device->rad_info.family == CHIP_RAVEN2 ||
|
||||
cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
binning_flush = !old_pipeline ||
|
||||
G_028C44_BINNING_MODE(old_pipeline->graphics.binning.pa_sc_binner_cntl_0) !=
|
||||
G_028C44_BINNING_MODE(pipeline->graphics.binning.pa_sc_binner_cntl_0);
|
||||
|
@ -1586,7 +1586,7 @@ radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
assert(!cmd_buffer->state.mesh_shading);
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs,
|
||||
R_030908_VGT_PRIMITIVE_TYPE, 1, d->primitive_topology);
|
||||
} else {
|
||||
|
@ -1648,7 +1648,7 @@ radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer)
|
|||
uint32_t pipeline_comb_mode = d->fragment_shading_rate.combiner_ops[0];
|
||||
uint32_t htile_comb_mode = d->fragment_shading_rate.combiner_ops[1];
|
||||
|
||||
assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10_3);
|
||||
assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3);
|
||||
|
||||
if (subpass && !subpass->vrs_attachment) {
|
||||
/* When the current subpass has no VRS attachment, the VRS rates are expected to be 1x1, so we
|
||||
|
@ -1706,7 +1706,7 @@ radv_emit_primitive_restart_enable(struct radv_cmd_buffer *cmd_buffer)
|
|||
{
|
||||
struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
radeon_set_uconfig_reg(cmd_buffer->cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN,
|
||||
d->primitive_restart_enable);
|
||||
} else {
|
||||
|
@ -1754,7 +1754,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index,
|
|||
struct radv_color_buffer_info *cb, struct radv_image_view *iview,
|
||||
VkImageLayout layout, bool in_render_loop)
|
||||
{
|
||||
bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8;
|
||||
bool is_vi = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX8;
|
||||
uint32_t cb_color_info = cb->cb_color_info;
|
||||
struct radv_image *image = iview->image;
|
||||
|
||||
|
@ -1780,7 +1780,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index,
|
|||
cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY;
|
||||
}
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
|
||||
radeon_emit(cmd_buffer->cs, cb->cb_color_base);
|
||||
radeon_emit(cmd_buffer->cs, 0);
|
||||
|
@ -1808,7 +1808,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index,
|
|||
cb->cb_color_attrib2);
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4,
|
||||
cb->cb_color_attrib3);
|
||||
} else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
|
||||
} else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
|
||||
radeon_emit(cmd_buffer->cs, cb->cb_color_base);
|
||||
radeon_emit(cmd_buffer->cs, S_028C64_BASE_256B(cb->cb_color_base >> 32));
|
||||
|
@ -1884,7 +1884,7 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_
|
|||
|
||||
db_z_info &= C_028040_ZRANGE_PRECISION;
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
db_z_info_reg = R_028038_DB_Z_INFO;
|
||||
} else {
|
||||
db_z_info_reg = R_028040_DB_Z_INFO;
|
||||
|
@ -1924,7 +1924,7 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_
|
|||
db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
|
||||
}
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10_3 &&
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3 &&
|
||||
!cmd_buffer->state.subpass->vrs_attachment) {
|
||||
db_htile_surface &= C_028ABC_VRS_HTILE_ENCODING;
|
||||
}
|
||||
|
@ -1932,7 +1932,7 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_
|
|||
radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view);
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, db_htile_surface);
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size);
|
||||
|
||||
|
@ -1951,7 +1951,7 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_
|
|||
radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
|
||||
radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
|
||||
radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32);
|
||||
} else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
|
||||
} else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3);
|
||||
radeon_emit(cmd_buffer->cs, ds->db_htile_data_base);
|
||||
radeon_emit(cmd_buffer->cs, S_028018_BASE_HI(ds->db_htile_data_base >> 32));
|
||||
|
@ -2420,7 +2420,7 @@ radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer)
|
|||
bool color_mip_changed = false;
|
||||
|
||||
/* Entire workaround is not applicable before GFX9 */
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX9)
|
||||
return;
|
||||
|
||||
if (!framebuffer)
|
||||
|
@ -2456,7 +2456,7 @@ static void
|
|||
radv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
/* Entire workaround is not applicable before GFX9 */
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX9)
|
||||
return;
|
||||
|
||||
bool need_color_mip_flush = false;
|
||||
|
@ -2523,7 +2523,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
radv_load_color_clear_metadata(cmd_buffer, iview, i);
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 &&
|
||||
iview->image->dcc_sign_reinterpret) {
|
||||
/* Disable constant encoding with the clear value of "1" with different DCC signedness
|
||||
* because the hardware will fill "1" instead of the clear value.
|
||||
|
@ -2592,7 +2592,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
radv_image_view_finish(&iview);
|
||||
} else {
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9)
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9)
|
||||
radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2);
|
||||
else
|
||||
radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2);
|
||||
|
@ -2603,17 +2603,18 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
|
||||
S_028208_BR_X(framebuffer->width) | S_028208_BR_Y(framebuffer->height));
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX8) {
|
||||
bool disable_constant_encode =
|
||||
cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode;
|
||||
enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
|
||||
uint8_t watermark = chip_class >= GFX10 ? 6 : 4;
|
||||
enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
|
||||
uint8_t watermark = gfx_level >= GFX10 ? 6 : 4;
|
||||
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL,
|
||||
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(chip_class <= GFX9) |
|
||||
S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
|
||||
S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) |
|
||||
S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
|
||||
radeon_set_context_reg(
|
||||
cmd_buffer->cs, R_028424_CB_DCC_CONTROL,
|
||||
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(gfx_level <= GFX9) |
|
||||
S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
|
||||
S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) |
|
||||
S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
|
||||
}
|
||||
|
||||
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;
|
||||
|
@ -2649,7 +2650,7 @@ radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer, bool enable_occlus
|
|||
uint32_t db_count_control;
|
||||
|
||||
if (!enable_occlusion_queries) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) &&
|
||||
pipeline->graphics.disable_out_of_order_rast_for_occlusion && has_perfect_queries) {
|
||||
/* Re-enable out-of-order rasterization if the
|
||||
|
@ -2665,9 +2666,9 @@ radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer, bool enable_occlus
|
|||
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
|
||||
uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0;
|
||||
bool gfx10_perfect =
|
||||
cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10 && has_perfect_queries;
|
||||
cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10 && has_perfect_queries;
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
/* Always enable PERFECT_ZPASS_COUNTS due to issues with partially
|
||||
* covered tiles, discards, and early depth testing. For more details,
|
||||
* see https://gitlab.freedesktop.org/mesa/mesa/-/issues/3218 */
|
||||
|
@ -2786,7 +2787,7 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shad
|
|||
uint32_t instance_rate_inputs = state->instance_rate_inputs & attribute_mask;
|
||||
uint32_t zero_divisors = state->zero_divisors & attribute_mask;
|
||||
*nontrivial_divisors = state->nontrivial_divisors & attribute_mask;
|
||||
enum chip_class chip = device->physical_device->rad_info.chip_class;
|
||||
enum amd_gfx_level chip = device->physical_device->rad_info.gfx_level;
|
||||
const uint32_t misaligned_mask = chip == GFX6 || chip >= GFX10 ? cmd_buffer->state.vbo_misaligned_mask : 0;
|
||||
|
||||
/* try to use a pre-compiled prolog first */
|
||||
|
@ -2914,7 +2915,7 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shad
|
|||
if (cmd_buffer->state.emitted_vs_prolog == prolog && !pipeline_is_dirty)
|
||||
return;
|
||||
|
||||
enum chip_class chip = cmd_buffer->device->physical_device->rad_info.chip_class;
|
||||
enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level;
|
||||
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
|
||||
uint64_t prolog_va = radv_buffer_get_va(prolog->bo) + prolog->alloc->offset;
|
||||
|
||||
|
@ -3369,7 +3370,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_
|
|||
|
||||
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
|
||||
struct radv_shader *vs_shader = radv_get_shader(pipeline, MESA_SHADER_VERTEX);
|
||||
enum chip_class chip = cmd_buffer->device->physical_device->rad_info.chip_class;
|
||||
enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level;
|
||||
unsigned vb_offset;
|
||||
void *vb_ptr;
|
||||
unsigned desc_index = 0;
|
||||
|
@ -3598,7 +3599,7 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
|
|||
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
@ -3779,10 +3780,10 @@ si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dr
|
|||
draw_vertex_count, topology, prim_restart_enable);
|
||||
|
||||
if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
|
||||
if (info->chip_class == GFX9) {
|
||||
if (info->gfx_level == GFX9) {
|
||||
radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs,
|
||||
R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
|
||||
} else if (info->chip_class >= GFX7) {
|
||||
} else if (info->gfx_level >= GFX7) {
|
||||
radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
|
||||
} else {
|
||||
radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
|
||||
|
@ -3801,7 +3802,7 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
|
|||
bool disable_instance_packing = false;
|
||||
|
||||
/* Draw state. */
|
||||
if (info->chip_class < GFX10) {
|
||||
if (info->gfx_level < GFX10) {
|
||||
si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, draw_info->indirect,
|
||||
!!draw_info->strmout_buffer,
|
||||
draw_info->indirect ? 0 : draw_info->count);
|
||||
|
@ -3838,21 +3839,20 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
|
|||
* topologies and instance_count > 1, pipeline stats generated by GE are incorrect. It needs to
|
||||
* be applied for indexed and non-indexed draws.
|
||||
*/
|
||||
if (info->chip_class == GFX10_3 && state->active_pipeline_queries > 0 &&
|
||||
if (info->gfx_level == GFX10_3 && state->active_pipeline_queries > 0 &&
|
||||
(draw_info->instance_count > 1 || draw_info->indirect) &&
|
||||
(topology == V_008958_DI_PT_LINELIST_ADJ ||
|
||||
topology == V_008958_DI_PT_LINESTRIP_ADJ ||
|
||||
topology == V_008958_DI_PT_TRILIST_ADJ ||
|
||||
topology == V_008958_DI_PT_TRISTRIP_ADJ)) {
|
||||
(topology == V_008958_DI_PT_LINELIST_ADJ || topology == V_008958_DI_PT_LINESTRIP_ADJ ||
|
||||
topology == V_008958_DI_PT_TRILIST_ADJ || topology == V_008958_DI_PT_TRISTRIP_ADJ)) {
|
||||
disable_instance_packing = true;
|
||||
}
|
||||
|
||||
if ((draw_info->indexed && state->index_type != state->last_index_type) ||
|
||||
(info->chip_class == GFX10_3 && (state->last_index_type == -1 ||
|
||||
(info->gfx_level == GFX10_3 &&
|
||||
(state->last_index_type == -1 ||
|
||||
disable_instance_packing != G_028A7C_DISABLE_INSTANCE_PACKING(state->last_index_type)))) {
|
||||
uint32_t index_type = state->index_type | S_028A7C_DISABLE_INSTANCE_PACKING(disable_instance_packing);
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs,
|
||||
R_03090C_VGT_INDEX_TYPE, 2, index_type);
|
||||
} else {
|
||||
|
@ -3905,8 +3905,8 @@ radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_s
|
|||
static bool
|
||||
can_skip_buffer_l2_flushes(struct radv_device *device)
|
||||
{
|
||||
return device->physical_device->rad_info.chip_class == GFX9 ||
|
||||
(device->physical_device->rad_info.chip_class >= GFX10 &&
|
||||
return device->physical_device->rad_info.gfx_level == GFX9 ||
|
||||
(device->physical_device->rad_info.gfx_level >= GFX10 &&
|
||||
!device->physical_device->rad_info.tcc_rb_non_coherent);
|
||||
}
|
||||
|
||||
|
@ -4085,7 +4085,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_fla
|
|||
break;
|
||||
case VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR:
|
||||
flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX9)
|
||||
flush_bits |= RADV_CMD_FLAG_INV_L2;
|
||||
break;
|
||||
case VK_ACCESS_2_SHADER_WRITE_BIT:
|
||||
|
@ -4713,7 +4713,7 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding,
|
|||
|
||||
assert(firstBinding + bindingCount <= MAX_VBS);
|
||||
cmd_buffer->state.vbo_misaligned_mask = state->misaligned_mask;
|
||||
enum chip_class chip = cmd_buffer->device->physical_device->rad_info.chip_class;
|
||||
enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level;
|
||||
|
||||
if (firstBinding + bindingCount > cmd_buffer->used_vertex_bindings)
|
||||
cmd_buffer->used_vertex_bindings = firstBinding + bindingCount;
|
||||
|
@ -4885,7 +4885,7 @@ radv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pi
|
|||
dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
@ -5044,7 +5044,7 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer)
|
|||
radv_emit_mip_change_flush_default(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->qf != RADV_QUEUE_TRANSFER) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX6)
|
||||
cmd_buffer->state.flush_bits |=
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
|
||||
|
||||
|
@ -5631,7 +5631,7 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD
|
|||
|
||||
memset(state, 0, sizeof(*state));
|
||||
|
||||
enum chip_class chip = cmd_buffer->device->physical_device->rad_info.chip_class;
|
||||
enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level;
|
||||
for (unsigned i = 0; i < vertexAttributeDescriptionCount; i++) {
|
||||
const VkVertexInputAttributeDescription2EXT *attrib = &pVertexAttributeDescriptions[i];
|
||||
const VkVertexInputBindingDescription2EXT *binding = bindings[attrib->binding];
|
||||
|
@ -5712,7 +5712,7 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
|
|||
RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);
|
||||
bool allow_ib2 = true;
|
||||
|
||||
if (secondary->device->physical_device->rad_info.chip_class == GFX7 &&
|
||||
if (secondary->device->physical_device->rad_info.gfx_level == GFX7 &&
|
||||
secondary->state.uses_draw_indirect_multi) {
|
||||
/* Do not launch an IB2 for secondary command buffers that contain
|
||||
* DRAW_{INDEX}_INDIRECT_MULTI on GFX7 because it's illegal and hang the GPU.
|
||||
|
@ -6270,7 +6270,8 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer,
|
|||
const int index_size = radv_get_vgt_index_size(state->index_type);
|
||||
unsigned i = 0;
|
||||
const bool uses_drawid = state->pipeline->graphics.uses_drawid;
|
||||
const bool can_eop = !uses_drawid && cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10;
|
||||
const bool can_eop =
|
||||
!uses_drawid && cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10;
|
||||
|
||||
if (uses_drawid) {
|
||||
if (vertexOffset) {
|
||||
|
@ -6333,7 +6334,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
} else {
|
||||
if (vertexOffset) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX10) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10) {
|
||||
/* GFX10 has a bug that consecutive draw packets with NOT_EOP must not have
|
||||
* count == 0 for the last draw that doesn't have NOT_EOP.
|
||||
*/
|
||||
|
@ -6756,7 +6757,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r
|
|||
* so the state must be re-emitted before the next indexed
|
||||
* draw.
|
||||
*/
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
cmd_buffer->state.last_index_type = -1;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
|
||||
}
|
||||
|
@ -6791,7 +6792,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r
|
|||
ALWAYS_INLINE static bool
|
||||
radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount)
|
||||
{
|
||||
const bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
|
||||
const bool has_prefetch = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7;
|
||||
const bool pipeline_is_dirty = (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) &&
|
||||
cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline;
|
||||
|
||||
|
@ -6871,7 +6872,7 @@ static void
|
|||
radv_after_draw(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
const struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
|
||||
bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
|
||||
bool has_prefetch = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7;
|
||||
/* Start prefetches after the draw has been started. Both will
|
||||
* run in parallel, but starting the draw first is more
|
||||
* important.
|
||||
|
@ -7311,7 +7312,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, struct radv_pipel
|
|||
ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 25);
|
||||
|
||||
if (compute_shader->info.wave_size == 32) {
|
||||
assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
|
||||
assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
|
||||
dispatch_initiator |= S_00B800_CS_W32_EN(1);
|
||||
}
|
||||
|
||||
|
@ -7332,7 +7333,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, struct radv_pipel
|
|||
unsigned reg = R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4;
|
||||
|
||||
if (cmd_buffer->device->load_grid_size_from_user_sgpr) {
|
||||
assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10_3);
|
||||
assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3);
|
||||
radeon_emit(cs, PKT3(PKT3_LOAD_SH_REG_INDEX, 3, 0));
|
||||
radeon_emit(cs, info->va);
|
||||
radeon_emit(cs, info->va >> 32);
|
||||
|
@ -7452,7 +7453,7 @@ static void
|
|||
radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info,
|
||||
struct radv_pipeline *pipeline, VkPipelineBindPoint bind_point)
|
||||
{
|
||||
bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
|
||||
bool has_prefetch = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7;
|
||||
bool pipeline_is_dirty = pipeline != cmd_buffer->state.emitted_compute_pipeline;
|
||||
|
||||
if (pipeline->compute.cs_regalloc_hang_bug)
|
||||
|
@ -8194,7 +8195,7 @@ radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
|
|||
|
||||
flush_bits |= radv_clear_dcc(cmd_buffer, image, range, value);
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX8) {
|
||||
/* When DCC is enabled with mipmaps, some levels might not
|
||||
* support fast clears and we have to initialize them as "fully
|
||||
* expanded".
|
||||
|
@ -8243,7 +8244,7 @@ radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_i
|
|||
if (radv_image_has_cmask(image)) {
|
||||
uint32_t value;
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
/* TODO: Fix clearing CMASK layers on GFX9. */
|
||||
if (radv_image_is_tc_compat_cmask(image) ||
|
||||
(radv_image_has_fmask(image) &&
|
||||
|
@ -8622,7 +8623,7 @@ write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event,
|
|||
event_type = V_028A90_BOTTOM_OF_PIPE_TS;
|
||||
}
|
||||
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), event_type, 0,
|
||||
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, value,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
|
@ -8869,14 +8870,14 @@ radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer)
|
|||
unsigned reg_strmout_cntl;
|
||||
|
||||
/* The register is at different places on different ASICs. */
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
|
||||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
|
||||
radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_ENGINE_SEL(V_370_ME));
|
||||
radeon_emit(cs, R_0300FC_CP_STRMOUT_CNTL >> 2);
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, 0);
|
||||
} else if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
} else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
|
||||
radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
|
||||
} else {
|
||||
|
@ -8973,7 +8974,7 @@ gfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCou
|
|||
unsigned last_target = util_last_bit(so->enabled_mask) - 1;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
|
||||
assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
|
||||
assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
|
||||
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
|
||||
|
||||
/* Sync because the next streamout operation will overwrite GDS and we
|
||||
|
@ -9097,7 +9098,7 @@ gfx10_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCount
|
|||
struct radv_streamout_state *so = &cmd_buffer->state.streamout;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
|
||||
assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
|
||||
assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
|
||||
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
|
||||
|
||||
u_foreach_bit(i, so->enabled_mask)
|
||||
|
@ -9117,7 +9118,7 @@ gfx10_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCount
|
|||
|
||||
va += buffer->offset + counter_buffer_offset;
|
||||
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_PS_DONE, 0,
|
||||
EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va, EOP_DATA_GDS(i, 1), 0);
|
||||
|
||||
|
@ -9193,7 +9194,7 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag
|
|||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
} else {
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS,
|
||||
0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
|
|
|
@ -120,7 +120,7 @@ radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_
|
|||
assert(idx);
|
||||
|
||||
unsigned opcode = PKT3_SET_SH_REG_INDEX;
|
||||
if (pdevice->rad_info.chip_class < GFX10)
|
||||
if (pdevice->rad_info.gfx_level < GFX10)
|
||||
opcode = PKT3_SET_SH_REG;
|
||||
|
||||
radeon_emit(cs, PKT3(opcode, 1, 0));
|
||||
|
@ -175,8 +175,8 @@ radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct ra
|
|||
assert(idx);
|
||||
|
||||
unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
|
||||
if (pdevice->rad_info.chip_class < GFX9 ||
|
||||
(pdevice->rad_info.chip_class == GFX9 && pdevice->rad_info.me_fw_version < 26))
|
||||
if (pdevice->rad_info.gfx_level < GFX9 ||
|
||||
(pdevice->rad_info.gfx_level == GFX9 && pdevice->rad_info.me_fw_version < 26))
|
||||
opcode = PKT3_SET_UCONFIG_REG;
|
||||
|
||||
radeon_emit(cs, PKT3(opcode, 1, 0));
|
||||
|
|
|
@ -83,8 +83,7 @@ radv_init_trace(struct radv_device *device)
|
|||
if (!device->trace_id_ptr)
|
||||
return false;
|
||||
|
||||
ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp,
|
||||
NULL);
|
||||
ac_vm_fault_occured(device->physical_device->rad_info.gfx_level, &device->dmesg_timestamp, NULL);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -114,7 +113,7 @@ radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
|
|||
uint32_t value;
|
||||
|
||||
if (ws->read_registers(ws, offset, 1, &value))
|
||||
ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0);
|
||||
ac_dump_reg(f, device->physical_device->rad_info.gfx_level, offset, value, ~0);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -132,7 +131,7 @@ radv_dump_debug_registers(struct radv_device *device, FILE *f)
|
|||
radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
|
||||
radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
|
||||
radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
|
||||
if (info->chip_class <= GFX8) {
|
||||
if (info->gfx_level <= GFX8) {
|
||||
radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
|
||||
radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
|
||||
radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
|
||||
|
@ -151,50 +150,50 @@ radv_dump_debug_registers(struct radv_device *device, FILE *f)
|
|||
}
|
||||
|
||||
static void
|
||||
radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
|
||||
radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, FILE *f)
|
||||
{
|
||||
fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
|
||||
for (unsigned j = 0; j < 4; j++)
|
||||
ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
|
||||
ac_dump_reg(f, gfx_level, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
|
||||
radv_dump_image_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, FILE *f)
|
||||
{
|
||||
unsigned sq_img_rsrc_word0 =
|
||||
chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
|
||||
gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
|
||||
|
||||
fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n");
|
||||
for (unsigned j = 0; j < 8; j++)
|
||||
ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
|
||||
ac_dump_reg(f, gfx_level, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
|
||||
|
||||
fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
|
||||
for (unsigned j = 0; j < 8; j++)
|
||||
ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
|
||||
ac_dump_reg(f, gfx_level, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
|
||||
radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, FILE *f)
|
||||
{
|
||||
fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
|
||||
ac_dump_reg(f, gfx_level, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
|
||||
radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc,
|
||||
FILE *f)
|
||||
{
|
||||
radv_dump_image_descriptor(chip_class, desc, f);
|
||||
radv_dump_sampler_descriptor(chip_class, desc + 16, f);
|
||||
radv_dump_image_descriptor(gfx_level, desc, f);
|
||||
radv_dump_sampler_descriptor(gfx_level, desc + 16, f);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id,
|
||||
FILE *f)
|
||||
{
|
||||
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
|
||||
enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
|
||||
const struct radv_descriptor_set_layout *layout;
|
||||
int i;
|
||||
|
||||
|
@ -210,18 +209,18 @@ radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set
|
|||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
radv_dump_buffer_descriptor(chip_class, desc, f);
|
||||
radv_dump_buffer_descriptor(gfx_level, desc, f);
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||
radv_dump_image_descriptor(chip_class, desc, f);
|
||||
radv_dump_image_descriptor(gfx_level, desc, f);
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
||||
radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
|
||||
radv_dump_combined_image_sampler_descriptor(gfx_level, desc, f);
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
||||
radv_dump_sampler_descriptor(chip_class, desc, f);
|
||||
radv_dump_sampler_descriptor(gfx_level, desc, f);
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
|
@ -369,8 +368,8 @@ radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBit
|
|||
FILE *f)
|
||||
{
|
||||
struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
|
||||
enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
|
||||
unsigned num_waves = ac_get_wave_info(chip_class, waves);
|
||||
enum amd_gfx_level gfx_level = pipeline->device->physical_device->rad_info.gfx_level;
|
||||
unsigned num_waves = ac_get_wave_info(gfx_level, waves);
|
||||
|
||||
fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
|
||||
|
||||
|
@ -640,7 +639,7 @@ radv_dump_umr_ring(struct radv_queue *queue, FILE *f)
|
|||
return;
|
||||
|
||||
sprintf(cmd, "umr -R %s 2>&1",
|
||||
device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
|
||||
device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
|
||||
|
||||
fprintf(f, "\nUMR GFX ring:\n\n");
|
||||
radv_dump_cmd(cmd, f);
|
||||
|
@ -658,7 +657,7 @@ radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
|
|||
return;
|
||||
|
||||
sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
|
||||
device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
|
||||
device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
|
||||
|
||||
fprintf(f, "\nUMR GFX waves:\n\n");
|
||||
radv_dump_cmd(cmd, f);
|
||||
|
@ -687,7 +686,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
|
|||
bool hang_occurred = radv_gpu_hang_occured(queue, ring);
|
||||
bool vm_fault_occurred = false;
|
||||
if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
|
||||
vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
|
||||
vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.gfx_level,
|
||||
&device->dmesg_timestamp, &addr);
|
||||
if (!hang_occurred && !vm_fault_occurred)
|
||||
return;
|
||||
|
@ -964,23 +963,23 @@ radv_dump_sq_hw_regs(struct radv_device *device)
|
|||
struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
|
||||
|
||||
fprintf(stderr, "\nHardware registers:\n");
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000408_SQ_WAVE_STATUS,
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_000408_SQ_WAVE_STATUS,
|
||||
regs->status, ~0);
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00040C_SQ_WAVE_TRAPSTS,
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00040C_SQ_WAVE_TRAPSTS,
|
||||
regs->trap_sts, ~0);
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00045C_SQ_WAVE_HW_ID1,
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00045C_SQ_WAVE_HW_ID1,
|
||||
regs->hw_id, ~0);
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00041C_SQ_WAVE_IB_STS,
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00041C_SQ_WAVE_IB_STS,
|
||||
regs->ib_sts, ~0);
|
||||
} else {
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000048_SQ_WAVE_STATUS,
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_000048_SQ_WAVE_STATUS,
|
||||
regs->status, ~0);
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00004C_SQ_WAVE_TRAPSTS,
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00004C_SQ_WAVE_TRAPSTS,
|
||||
regs->trap_sts, ~0);
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000050_SQ_WAVE_HW_ID,
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_000050_SQ_WAVE_HW_ID,
|
||||
regs->hw_id, ~0);
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00005C_SQ_WAVE_IB_STS,
|
||||
ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00005C_SQ_WAVE_IB_STS,
|
||||
regs->ib_sts, ~0);
|
||||
}
|
||||
fprintf(stderr, "\n\n");
|
||||
|
|
|
@ -1080,7 +1080,7 @@ write_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_
|
|||
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
|
|
@ -420,7 +420,7 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
|
|||
.KHR_external_semaphore = true,
|
||||
.KHR_external_semaphore_fd = true,
|
||||
.KHR_format_feature_flags2 = true,
|
||||
.KHR_fragment_shading_rate = device->rad_info.chip_class >= GFX10_3,
|
||||
.KHR_fragment_shading_rate = device->rad_info.gfx_level >= GFX10_3,
|
||||
.KHR_get_memory_requirements2 = true,
|
||||
.KHR_image_format_list = true,
|
||||
.KHR_imageless_framebuffer = true,
|
||||
|
@ -469,7 +469,7 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
|
|||
.EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS,
|
||||
.EXT_color_write_enable = true,
|
||||
.EXT_conditional_rendering = true,
|
||||
.EXT_conservative_rasterization = device->rad_info.chip_class >= GFX9,
|
||||
.EXT_conservative_rasterization = device->rad_info.gfx_level >= GFX9,
|
||||
.EXT_custom_border_color = true,
|
||||
.EXT_debug_marker = radv_thread_trace_enabled(),
|
||||
.EXT_depth_clip_control = true,
|
||||
|
@ -488,10 +488,10 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
|
|||
.EXT_global_priority_query = true,
|
||||
.EXT_host_query_reset = true,
|
||||
.EXT_image_2d_view_of_3d = true,
|
||||
.EXT_image_drm_format_modifier = device->rad_info.chip_class >= GFX9,
|
||||
.EXT_image_drm_format_modifier = device->rad_info.gfx_level >= GFX9,
|
||||
.EXT_image_robustness = true,
|
||||
.EXT_image_view_min_lod = true,
|
||||
.EXT_index_type_uint8 = device->rad_info.chip_class >= GFX8,
|
||||
.EXT_index_type_uint8 = device->rad_info.gfx_level >= GFX8,
|
||||
.EXT_inline_uniform_block = true,
|
||||
.EXT_line_rasterization = true,
|
||||
.EXT_memory_budget = true,
|
||||
|
@ -503,15 +503,15 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
|
|||
#endif
|
||||
.EXT_pipeline_creation_cache_control = true,
|
||||
.EXT_pipeline_creation_feedback = true,
|
||||
.EXT_post_depth_coverage = device->rad_info.chip_class >= GFX10,
|
||||
.EXT_post_depth_coverage = device->rad_info.gfx_level >= GFX10,
|
||||
.EXT_primitive_topology_list_restart = true,
|
||||
.EXT_private_data = true,
|
||||
.EXT_provoking_vertex = true,
|
||||
.EXT_queue_family_foreign = true,
|
||||
.EXT_robustness2 = true,
|
||||
.EXT_sample_locations = device->rad_info.chip_class < GFX10,
|
||||
.EXT_sample_locations = device->rad_info.gfx_level < GFX10,
|
||||
.EXT_sampler_filter_minmax = true,
|
||||
.EXT_scalar_block_layout = device->rad_info.chip_class >= GFX7,
|
||||
.EXT_scalar_block_layout = device->rad_info.gfx_level >= GFX7,
|
||||
.EXT_separate_stencil_usage = true,
|
||||
.EXT_shader_atomic_float = true,
|
||||
#ifdef LLVM_AVAILABLE
|
||||
|
@ -556,7 +556,7 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
|
|||
.GOOGLE_hlsl_functionality1 = true,
|
||||
.GOOGLE_user_type = true,
|
||||
.NV_compute_shader_derivatives = true,
|
||||
.NV_mesh_shader = device->use_ngg && device->rad_info.chip_class >= GFX10_3 &&
|
||||
.NV_mesh_shader = device->use_ngg && device->rad_info.gfx_level >= GFX10_3 &&
|
||||
device->instance->perftest_flags & RADV_PERFTEST_NV_MS && !device->use_llvm,
|
||||
/* Undocumented extension purely for vkd3d-proton. This check is to prevent anyone else from
|
||||
* using it.
|
||||
|
@ -571,7 +571,7 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
|
|||
static bool
|
||||
radv_is_conformant(const struct radv_physical_device *pdevice)
|
||||
{
|
||||
return pdevice->rad_info.chip_class >= GFX8;
|
||||
return pdevice->rad_info.gfx_level >= GFX8;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -743,16 +743,14 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
|
|||
|
||||
device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
|
||||
|
||||
device->use_ngg = device->rad_info.chip_class >= GFX10 &&
|
||||
device->use_ngg = device->rad_info.gfx_level >= GFX10 &&
|
||||
device->rad_info.family != CHIP_NAVI14 &&
|
||||
!(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
|
||||
|
||||
device->use_ngg_culling =
|
||||
device->use_ngg &&
|
||||
device->rad_info.max_render_backends > 1 &&
|
||||
(device->rad_info.chip_class >= GFX10_3 ||
|
||||
(device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
|
||||
!(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
|
||||
device->use_ngg_culling = device->use_ngg && device->rad_info.max_render_backends > 1 &&
|
||||
(device->rad_info.gfx_level >= GFX10_3 ||
|
||||
(device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
|
||||
!(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
|
||||
|
||||
device->use_ngg_streamout = false;
|
||||
|
||||
|
@ -762,7 +760,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
|
|||
device->ge_wave_size = 64;
|
||||
device->rt_wave_size = 64;
|
||||
|
||||
if (device->rad_info.chip_class >= GFX10) {
|
||||
if (device->rad_info.gfx_level >= GFX10) {
|
||||
if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
|
||||
device->cs_wave_size = 32;
|
||||
|
||||
|
@ -1314,7 +1312,7 @@ radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,
|
|||
f->runtimeDescriptorArray = true;
|
||||
|
||||
f->samplerFilterMinmax = true;
|
||||
f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
|
||||
f->scalarBlockLayout = pdevice->rad_info.gfx_level >= GFX7;
|
||||
f->imagelessFramebuffer = true;
|
||||
f->uniformBufferStandardLayout = true;
|
||||
f->shaderSubgroupExtendedTypes = true;
|
||||
|
@ -1451,7 +1449,7 @@ radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
|
||||
VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
|
||||
(VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
|
||||
features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
|
||||
features->indexTypeUint8 = pdevice->rad_info.gfx_level >= GFX8;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
|
||||
|
@ -1464,7 +1462,7 @@ radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
VkPhysicalDeviceShaderClockFeaturesKHR *features =
|
||||
(VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
|
||||
features->shaderSubgroupClock = true;
|
||||
features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;
|
||||
features->shaderDeviceClock = pdevice->rad_info.gfx_level >= GFX8;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
|
||||
|
@ -1489,7 +1487,7 @@ radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
/* FIXME: Some stippled Bresenham CTS fails on Vega10
|
||||
* but work on Raven.
|
||||
*/
|
||||
features->stippledBresenhamLines = pdevice->rad_info.chip_class != GFX9;
|
||||
features->stippledBresenhamLines = pdevice->rad_info.gfx_level != GFX9;
|
||||
features->stippledSmoothLines = false;
|
||||
break;
|
||||
}
|
||||
|
@ -1528,7 +1526,7 @@ radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
features->shaderBufferFloat64Atomics = true;
|
||||
features->shaderBufferFloat64AtomicAdd = false;
|
||||
features->shaderSharedFloat32Atomics = true;
|
||||
features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8;
|
||||
features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.gfx_level >= GFX8;
|
||||
features->shaderSharedFloat64Atomics = true;
|
||||
features->shaderSharedFloat64AtomicAdd = false;
|
||||
features->shaderImageFloat32Atomics = true;
|
||||
|
@ -1626,8 +1624,8 @@ radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features =
|
||||
(VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *)ext;
|
||||
bool has_shader_buffer_float_minmax = radv_has_shader_buffer_float_minmax(pdevice);
|
||||
bool has_shader_image_float_minmax = pdevice->rad_info.chip_class != GFX8 &&
|
||||
pdevice->rad_info.chip_class != GFX9;
|
||||
bool has_shader_image_float_minmax =
|
||||
pdevice->rad_info.gfx_level != GFX8 && pdevice->rad_info.gfx_level != GFX9;
|
||||
features->shaderBufferFloat16Atomics = false;
|
||||
features->shaderBufferFloat16AtomicAdd = false;
|
||||
features->shaderBufferFloat16AtomicMinMax = false;
|
||||
|
@ -1828,7 +1826,7 @@ radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
|
|||
.maxFragmentOutputAttachments = 8,
|
||||
.maxFragmentDualSrcAttachments = 1,
|
||||
.maxFragmentCombinedOutputResources = max_descriptor_set_size,
|
||||
.maxComputeSharedMemorySize = pdevice->rad_info.chip_class >= GFX7 ? 65536 : 32768,
|
||||
.maxComputeSharedMemorySize = pdevice->rad_info.gfx_level >= GFX7 ? 65536 : 32768,
|
||||
.maxComputeWorkGroupCount = {65535, 65535, 65535},
|
||||
.maxComputeWorkGroupInvocations = 1024,
|
||||
.maxComputeWorkGroupSize = {1024, 1024, 1024},
|
||||
|
@ -1953,7 +1951,7 @@ radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
|
|||
radv_get_compiler_string(pdevice));
|
||||
|
||||
if (radv_is_conformant(pdevice)) {
|
||||
if (pdevice->rad_info.chip_class >= GFX10_3) {
|
||||
if (pdevice->rad_info.gfx_level >= GFX10_3) {
|
||||
p->conformanceVersion = (VkConformanceVersion){
|
||||
.major = 1,
|
||||
.minor = 3,
|
||||
|
@ -2008,11 +2006,11 @@ radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
|
|||
p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
|
||||
p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
|
||||
|
||||
p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
|
||||
p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
|
||||
p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
|
||||
p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
|
||||
p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
|
||||
p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.gfx_level >= GFX8 && !pdevice->use_llvm;
|
||||
p->shaderDenormPreserveFloat64 = pdevice->rad_info.gfx_level >= GFX8;
|
||||
p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.gfx_level >= GFX8;
|
||||
p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.gfx_level >= GFX8 && !pdevice->use_llvm;
|
||||
p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.gfx_level >= GFX8;
|
||||
|
||||
p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
|
||||
p->shaderUniformBufferArrayNonUniformIndexingNative = false;
|
||||
|
@ -2059,7 +2057,7 @@ radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
|
|||
p->independentResolve = true;
|
||||
|
||||
/* GFX6-8 only support single channel min/max filter. */
|
||||
p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
|
||||
p->filterMinmaxImageComponentMapping = pdevice->rad_info.gfx_level >= GFX9;
|
||||
p->filterMinmaxSingleComponentFormats = true;
|
||||
|
||||
p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
|
||||
|
@ -2077,7 +2075,7 @@ radv_get_physical_device_properties_1_3(struct radv_physical_device *pdevice,
|
|||
p->maxSubgroupSize = 64;
|
||||
p->maxComputeWorkgroupSubgroups = UINT32_MAX;
|
||||
p->requiredSubgroupSizeStages = 0;
|
||||
if (pdevice->rad_info.chip_class >= GFX10) {
|
||||
if (pdevice->rad_info.gfx_level >= GFX10) {
|
||||
/* Only GFX10+ supports wave32. */
|
||||
p->minSubgroupSize = 32;
|
||||
p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
@ -2739,7 +2737,7 @@ radv_queue_finish(struct radv_queue *queue)
|
|||
static void
|
||||
radv_device_init_gs_info(struct radv_device *device)
|
||||
{
|
||||
device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
|
||||
device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.gfx_level,
|
||||
device->physical_device->rad_info.family);
|
||||
}
|
||||
|
||||
|
@ -3315,7 +3313,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
}
|
||||
device->private_sdma_queue = VK_NULL_HANDLE;
|
||||
|
||||
device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
device->pbb_allowed = device->physical_device->rad_info.gfx_level >= GFX9 &&
|
||||
!(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
|
||||
|
||||
/* The maximum number of scratch waves. Scratch space isn't divided
|
||||
|
@ -3336,7 +3334,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
|
||||
device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
/* If the KMD allows it (there is a KMD hw register for it),
|
||||
* allow launching waves out-of-order.
|
||||
*/
|
||||
|
@ -3391,8 +3389,8 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
}
|
||||
|
||||
if (radv_thread_trace_enabled()) {
|
||||
if (device->physical_device->rad_info.chip_class < GFX8 ||
|
||||
device->physical_device->rad_info.chip_class > GFX10_3) {
|
||||
if (device->physical_device->rad_info.gfx_level < GFX8 ||
|
||||
device->physical_device->rad_info.gfx_level > GFX10_3) {
|
||||
fprintf(stderr, "GPU hardware not supported: refer to "
|
||||
"the RGP documentation for the list of "
|
||||
"supported GPUs!\n");
|
||||
|
@ -3409,7 +3407,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
radv_spm_trace_enabled() ? "enabled" : "disabled");
|
||||
|
||||
if (radv_spm_trace_enabled()) {
|
||||
if (device->physical_device->rad_info.chip_class < GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level < GFX10) {
|
||||
fprintf(stderr, "SPM isn't supported for this GPU!\n");
|
||||
abort();
|
||||
}
|
||||
|
@ -3421,7 +3419,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
|
||||
if (getenv("RADV_TRAP_HANDLER")) {
|
||||
/* TODO: Add support for more hardware. */
|
||||
assert(device->physical_device->rad_info.chip_class == GFX8);
|
||||
assert(device->physical_device->rad_info.gfx_level == GFX8);
|
||||
|
||||
fprintf(stderr, "**********************************************************************\n");
|
||||
fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
|
||||
|
@ -3436,7 +3434,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
goto fail;
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10_3) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10_3) {
|
||||
if (getenv("RADV_FORCE_VRS_CONFIG_FILE")) {
|
||||
const char *file = radv_get_force_vrs_config_file();
|
||||
|
||||
|
@ -3456,7 +3454,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
}
|
||||
|
||||
/* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */
|
||||
device->load_grid_size_from_user_sgpr = device->physical_device->rad_info.chip_class >= GFX10_3;
|
||||
device->load_grid_size_from_user_sgpr = device->physical_device->rad_info.gfx_level >= GFX10_3;
|
||||
|
||||
device->keep_shader_info = keep_shader_info;
|
||||
result = radv_device_init_meta(device);
|
||||
|
@ -3478,7 +3476,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
goto fail;
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX7)
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX7)
|
||||
cik_create_gfx_config(device);
|
||||
|
||||
VkPipelineCacheCreateInfo ci;
|
||||
|
@ -3638,7 +3636,7 @@ radv_fill_shader_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_
|
|||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
|
||||
S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1);
|
||||
|
||||
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (queue->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
@ -3655,7 +3653,7 @@ radv_fill_shader_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_
|
|||
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (queue->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
@ -3678,7 +3676,7 @@ radv_fill_shader_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_
|
|||
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (queue->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
@ -3696,7 +3694,7 @@ radv_fill_shader_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_
|
|||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
|
||||
S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true);
|
||||
|
||||
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (queue->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
@ -3717,7 +3715,7 @@ radv_fill_shader_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_
|
|||
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (queue->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
@ -3731,7 +3729,7 @@ radv_fill_shader_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_
|
|||
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (queue->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
@ -3772,7 +3770,7 @@ radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
|
|||
if (gsvs_ring_bo)
|
||||
radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
|
||||
|
||||
if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (queue->device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
|
||||
radeon_emit(cs, esgs_ring_size >> 8);
|
||||
radeon_emit(cs, gsvs_ring_size >> 8);
|
||||
|
@ -3797,14 +3795,14 @@ radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
|
|||
|
||||
radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
|
||||
|
||||
if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (queue->device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size));
|
||||
radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8);
|
||||
|
||||
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (queue->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI,
|
||||
S_030984_BASE_HI(tf_va >> 40));
|
||||
} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
|
||||
} else if (queue->device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
|
||||
}
|
||||
radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, queue->device->hs.hs_offchip_param);
|
||||
|
@ -3868,7 +3866,7 @@ radv_emit_global_shader_pointers(struct radv_queue *queue, struct radeon_cmdbuf
|
|||
|
||||
radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
|
||||
|
||||
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (queue->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
|
||||
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
|
||||
R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
|
||||
|
@ -3876,7 +3874,7 @@ radv_emit_global_shader_pointers(struct radv_queue *queue, struct radeon_cmdbuf
|
|||
for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
|
||||
radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
|
||||
}
|
||||
} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
|
||||
} else if (queue->device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
|
||||
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
|
||||
R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
|
||||
|
@ -4016,7 +4014,7 @@ radv_update_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave
|
|||
}
|
||||
|
||||
if (add_gds) {
|
||||
assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
|
||||
assert(queue->device->physical_device->rad_info.gfx_level >= GFX10);
|
||||
|
||||
/* 4 streamout GDS counters.
|
||||
* We need 256B (64 dw) of GDS, otherwise streamout hangs.
|
||||
|
@ -4029,7 +4027,7 @@ radv_update_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave
|
|||
}
|
||||
|
||||
if (add_gds_oa) {
|
||||
assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
|
||||
assert(queue->device->physical_device->rad_info.gfx_level >= GFX10);
|
||||
|
||||
result =
|
||||
queue->device->ws->buffer_create(queue->device->ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags,
|
||||
|
@ -4082,7 +4080,7 @@ radv_update_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave
|
|||
if (i == 2) {
|
||||
/* We only need the continue preamble when we can't use indirect buffers. */
|
||||
if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_IBS) &&
|
||||
queue->device->physical_device->rad_info.chip_class >= GFX7)
|
||||
queue->device->physical_device->rad_info.gfx_level >= GFX7)
|
||||
continue;
|
||||
/* Continue preamble is unnecessary when no shader rings are used. */
|
||||
if (!scratch_size_per_wave && !compute_scratch_size_per_wave && !esgs_ring_size &&
|
||||
|
@ -4143,8 +4141,8 @@ radv_update_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave
|
|||
|
||||
if (i < 2) {
|
||||
/* The two initial preambles have a cache flush at the beginning. */
|
||||
const enum chip_class chip_class = queue->device->physical_device->rad_info.chip_class;
|
||||
const bool is_mec = queue->qf == RADV_QUEUE_COMPUTE && chip_class >= GFX7;
|
||||
const enum amd_gfx_level gfx_level = queue->device->physical_device->rad_info.gfx_level;
|
||||
const bool is_mec = queue->qf == RADV_QUEUE_COMPUTE && gfx_level >= GFX7;
|
||||
enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |
|
||||
RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |
|
||||
RADV_CMD_FLAG_START_PIPELINE_STATS;
|
||||
|
@ -4156,7 +4154,7 @@ radv_update_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave
|
|||
flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
|
||||
}
|
||||
|
||||
si_cs_emit_cache_flush(cs, chip_class, NULL, 0, is_mec, flush_bits, &sqtt_flush_bits, 0);
|
||||
si_cs_emit_cache_flush(cs, gfx_level, NULL, 0, is_mec, flush_bits, &sqtt_flush_bits, 0);
|
||||
}
|
||||
|
||||
result = queue->device->ws->cs_finalize(cs);
|
||||
|
@ -4330,7 +4328,7 @@ radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMem
|
|||
if (bind->pBinds[i].memory != VK_NULL_HANDLE)
|
||||
mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level];
|
||||
pitch = surface->u.gfx9.prt_level_pitch[level];
|
||||
} else {
|
||||
|
@ -5421,7 +5419,7 @@ static unsigned
|
|||
get_dcc_max_uncompressed_block_size(const struct radv_device *device,
|
||||
const struct radv_image_view *iview)
|
||||
{
|
||||
if (device->physical_device->rad_info.chip_class < GFX10 && iview->image->info.samples > 1) {
|
||||
if (device->physical_device->rad_info.gfx_level < GFX10 && iview->image->info.samples > 1) {
|
||||
if (iview->image->planes[0].surface.bpe == 1)
|
||||
return V_028C78_MAX_BLOCK_SIZE_64B;
|
||||
else if (iview->image->planes[0].surface.bpe == 2)
|
||||
|
@ -5460,7 +5458,7 @@ radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iv
|
|||
|
||||
/* For GFX9+ ac_surface computes values for us (except min_compressed
|
||||
* and max_uncompressed) */
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
max_compressed_block_size =
|
||||
iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
|
||||
independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;
|
||||
|
@ -5514,8 +5512,8 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
|
||||
cb->cb_color_base = va >> 8;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
|
||||
S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
|
||||
S_028EE0_CMASK_PIPE_ALIGNED(1) |
|
||||
|
@ -5557,14 +5555,14 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
|
||||
|
||||
if (radv_image_has_fmask(iview->image)) {
|
||||
if (device->physical_device->rad_info.chip_class >= GFX7)
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX7)
|
||||
cb->cb_color_pitch |=
|
||||
S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
|
||||
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
|
||||
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
|
||||
} else {
|
||||
/* This must be set for fast clear to work without FMASK. */
|
||||
if (device->physical_device->rad_info.chip_class >= GFX7)
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX7)
|
||||
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
|
||||
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
|
||||
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
|
||||
|
@ -5580,7 +5578,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
va += surf->meta_offset;
|
||||
|
||||
if (radv_dcc_enabled(iview->image, iview->base_mip) &&
|
||||
device->physical_device->rad_info.chip_class <= GFX8)
|
||||
device->physical_device->rad_info.gfx_level <= GFX8)
|
||||
va += plane->surface.u.legacy.color.dcc_level[iview->base_mip].dcc_offset;
|
||||
|
||||
unsigned dcc_tile_swizzle = surf->tile_swizzle;
|
||||
|
@ -5646,7 +5644,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
S_028C70_NUMBER_TYPE(ntype) | S_028C70_ENDIAN(endian);
|
||||
if (radv_image_has_fmask(iview->image)) {
|
||||
cb->cb_color_info |= S_028C70_COMPRESSION(1);
|
||||
if (device->physical_device->rad_info.chip_class == GFX6) {
|
||||
if (device->physical_device->rad_info.gfx_level == GFX6) {
|
||||
unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh);
|
||||
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
|
||||
}
|
||||
|
@ -5659,7 +5657,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
*/
|
||||
cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class == GFX8) {
|
||||
if (device->physical_device->rad_info.gfx_level == GFX8) {
|
||||
/* Set CMASK into a tiling format that allows
|
||||
* the texture block to read it.
|
||||
*/
|
||||
|
@ -5678,13 +5676,12 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
|
||||
|
||||
/* This must be set for fast clear to work without FMASK. */
|
||||
if (!radv_image_has_fmask(iview->image) &&
|
||||
device->physical_device->rad_info.chip_class == GFX6) {
|
||||
if (!radv_image_has_fmask(iview->image) && device->physical_device->rad_info.gfx_level == GFX6) {
|
||||
unsigned bankh = util_logbase2(surf->u.legacy.bankh);
|
||||
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D
|
||||
? (iview->extent.depth - 1)
|
||||
: (iview->image->info.array_size - 1);
|
||||
|
@ -5693,7 +5690,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
unsigned height =
|
||||
vk_format_get_plane_height(iview->image->vk_format, iview->plane_id, iview->extent.height);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
|
||||
|
||||
cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
|
||||
|
@ -5717,7 +5714,7 @@ radv_calc_decompress_on_z_planes(struct radv_device *device, struct radv_image_v
|
|||
|
||||
assert(radv_image_is_tc_compat_htile(iview->image));
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
/* Default value for 32-bit depth surfaces. */
|
||||
max_zplanes = 4;
|
||||
|
||||
|
@ -5819,7 +5816,7 @@ radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_inf
|
|||
|
||||
uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
|
||||
ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) | S_028008_SLICE_MAX(max_slice);
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
ds->db_depth_view |=
|
||||
S_028008_SLICE_START_HI(iview->base_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
|
||||
}
|
||||
|
@ -5830,7 +5827,7 @@ radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_inf
|
|||
va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
|
||||
s_offs = z_offs = va;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
assert(surf->u.gfx9.surf_offset == 0);
|
||||
s_offs += surf->u.gfx9.zs.stencil_offset;
|
||||
|
||||
|
@ -5841,7 +5838,7 @@ radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_inf
|
|||
ds->db_stencil_info =
|
||||
S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class == GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
|
||||
ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
|
||||
}
|
||||
|
@ -5858,7 +5855,7 @@ radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_inf
|
|||
|
||||
ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
bool iterate256 = radv_image_get_iterate256(device, iview->image);
|
||||
|
||||
ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
|
||||
|
@ -5879,7 +5876,7 @@ radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_inf
|
|||
ds->db_htile_data_base = va >> 8;
|
||||
ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class == GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
|
||||
}
|
||||
|
||||
|
@ -5903,7 +5900,7 @@ radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_inf
|
|||
if (iview->image->info.samples > 1)
|
||||
ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
struct radeon_info *info = &device->physical_device->rad_info;
|
||||
unsigned tiling_index = surf->u.legacy.tiling_index[level];
|
||||
unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];
|
||||
|
@ -6132,8 +6129,8 @@ radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler,
|
|||
{
|
||||
uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
|
||||
uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
|
||||
bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
|
||||
device->physical_device->rad_info.chip_class == GFX9;
|
||||
bool compat_mode = device->physical_device->rad_info.gfx_level == GFX8 ||
|
||||
device->physical_device->rad_info.gfx_level == GFX9;
|
||||
unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
|
||||
unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
|
||||
bool trunc_coord =
|
||||
|
@ -6196,15 +6193,15 @@ radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler,
|
|||
sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR_GFX6(border_color_ptr) |
|
||||
S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
sampler->state[2] |=
|
||||
S_008F38_ANISO_OVERRIDE_GFX10(device->instance->disable_aniso_single_level);
|
||||
} else {
|
||||
sampler->state[2] |=
|
||||
S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
|
||||
S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.gfx_level <= GFX8) |
|
||||
S_008F38_FILTER_PREC_FIX(1) |
|
||||
S_008F38_ANISO_OVERRIDE_GFX8(device->instance->disable_aniso_single_level &&
|
||||
device->physical_device->rad_info.chip_class >= GFX8);
|
||||
device->physical_device->rad_info.gfx_level >= GFX8);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -158,7 +158,7 @@ radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkForma
|
|||
*dfmt = radv_translate_buffer_dataformat(desc, 0);
|
||||
|
||||
*alpha_adjust = ALPHA_ADJUST_NONE;
|
||||
if (pdevice->rad_info.chip_class <= GFX8 && pdevice->rad_info.family != CHIP_STONEY) {
|
||||
if (pdevice->rad_info.gfx_level <= GFX8 && pdevice->rad_info.family != CHIP_STONEY) {
|
||||
switch (format) {
|
||||
case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
|
||||
case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
|
||||
|
@ -599,7 +599,7 @@ radv_is_storage_image_format_supported(struct radv_physical_device *physical_dev
|
|||
/* TODO: FMASK formats. */
|
||||
return true;
|
||||
case V_008F14_IMG_DATA_FORMAT_5_9_9_9:
|
||||
return physical_device->rad_info.chip_class >= GFX10_3;
|
||||
return physical_device->rad_info.gfx_level >= GFX10_3;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@ -640,7 +640,7 @@ radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice,
|
|||
} else
|
||||
*blendable = true;
|
||||
|
||||
if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 && pdevice->rad_info.chip_class < GFX10_3)
|
||||
if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 && pdevice->rad_info.gfx_level < GFX10_3)
|
||||
return false;
|
||||
|
||||
return color_format != V_028C70_COLOR_INVALID && color_swap != ~0U && color_num_format != ~0;
|
||||
|
@ -1471,7 +1471,7 @@ radv_get_image_format_properties(struct radv_physical_device *physical_device,
|
|||
uint32_t maxArraySize;
|
||||
VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
|
||||
const struct util_format_description *desc = vk_format_description(format);
|
||||
enum chip_class chip_class = physical_device->rad_info.chip_class;
|
||||
enum amd_gfx_level gfx_level = physical_device->rad_info.gfx_level;
|
||||
VkImageTiling tiling = info->tiling;
|
||||
const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *mod_info =
|
||||
vk_find_struct_const(info->pNext, PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT);
|
||||
|
@ -1503,17 +1503,17 @@ radv_get_image_format_properties(struct radv_physical_device *physical_device,
|
|||
maxExtent.height = 1;
|
||||
maxExtent.depth = 1;
|
||||
maxMipLevels = 15; /* log2(maxWidth) + 1 */
|
||||
maxArraySize = chip_class >= GFX10 ? 8192 : 2048;
|
||||
maxArraySize = gfx_level >= GFX10 ? 8192 : 2048;
|
||||
break;
|
||||
case VK_IMAGE_TYPE_2D:
|
||||
maxExtent.width = 16384;
|
||||
maxExtent.height = 16384;
|
||||
maxExtent.depth = 1;
|
||||
maxMipLevels = 15; /* log2(maxWidth) + 1 */
|
||||
maxArraySize = chip_class >= GFX10 ? 8192 : 2048;
|
||||
maxArraySize = gfx_level >= GFX10 ? 8192 : 2048;
|
||||
break;
|
||||
case VK_IMAGE_TYPE_3D:
|
||||
if (chip_class >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
maxExtent.width = 8192;
|
||||
maxExtent.height = 8192;
|
||||
maxExtent.depth = 8192;
|
||||
|
@ -1555,7 +1555,7 @@ radv_get_image_format_properties(struct radv_physical_device *physical_device,
|
|||
}
|
||||
|
||||
/* We can't create 3d compressed 128bpp images that can be rendered to on GFX9 */
|
||||
if (physical_device->rad_info.chip_class >= GFX9 && info->type == VK_IMAGE_TYPE_3D &&
|
||||
if (physical_device->rad_info.gfx_level >= GFX9 && info->type == VK_IMAGE_TYPE_3D &&
|
||||
vk_format_get_blocksizebits(format) == 128 && vk_format_is_compressed(format) &&
|
||||
(info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) &&
|
||||
((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) ||
|
||||
|
@ -1624,7 +1624,7 @@ radv_get_image_format_properties(struct radv_physical_device *physical_device,
|
|||
|
||||
if (info->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
|
||||
/* Sparse textures are only supported on GFX8+. */
|
||||
if (physical_device->rad_info.chip_class < GFX8)
|
||||
if (physical_device->rad_info.gfx_level < GFX8)
|
||||
goto unsupported;
|
||||
|
||||
if (vk_format_get_plane_count(format) > 1 || info->type != VK_IMAGE_TYPE_2D ||
|
||||
|
@ -1851,7 +1851,7 @@ radv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,
|
|||
}
|
||||
|
||||
if (texture_lod_props) {
|
||||
if (physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (physical_device->rad_info.gfx_level >= GFX9) {
|
||||
texture_lod_props->supportsTextureGatherLODBiasAMD = true;
|
||||
} else {
|
||||
texture_lod_props->supportsTextureGatherLODBiasAMD = !vk_format_is_int(format);
|
||||
|
@ -1885,7 +1885,7 @@ fill_sparse_image_format_properties(struct radv_physical_device *pdev, VkFormat
|
|||
/* On GFX8 we first subdivide by level and then layer, leading to a single
|
||||
* miptail. On GFX9+ we first subdivide by layer and then level which results
|
||||
* in a miptail per layer. */
|
||||
if (pdev->rad_info.chip_class < GFX9)
|
||||
if (pdev->rad_info.gfx_level < GFX9)
|
||||
prop->flags |= VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT;
|
||||
|
||||
/* This assumes the sparse image tile size is always 64 KiB (1 << 16) */
|
||||
|
@ -1956,7 +1956,7 @@ radv_GetImageSparseMemoryRequirements2(VkDevice _device,
|
|||
req->memoryRequirements.imageMipTailFirstLod = image->planes[0].surface.first_mip_tail_level;
|
||||
|
||||
if (req->memoryRequirements.imageMipTailFirstLod < image->info.levels) {
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
/* The tail is always a single tile per layer. */
|
||||
req->memoryRequirements.imageMipTailSize = 65536;
|
||||
req->memoryRequirements.imageMipTailOffset =
|
||||
|
|
|
@ -52,7 +52,7 @@ radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateI
|
|||
return RADEON_SURF_MODE_2D;
|
||||
|
||||
if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
|
||||
device->physical_device->rad_info.chip_class <= GFX8) {
|
||||
device->physical_device->rad_info.gfx_level <= GFX8) {
|
||||
/* this causes hangs in some VK CTS tests on GFX9. */
|
||||
/* Textures with a very small height are recommended to be linear. */
|
||||
if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
|
||||
|
@ -70,7 +70,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea
|
|||
VkFormat format)
|
||||
{
|
||||
/* TC-compat HTILE is only available for GFX8+. */
|
||||
if (device->physical_device->rad_info.chip_class < GFX8)
|
||||
if (device->physical_device->rad_info.gfx_level < GFX8)
|
||||
return false;
|
||||
|
||||
if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
|
||||
|
@ -86,7 +86,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea
|
|||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
|
||||
return false;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class < GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level < GFX9) {
|
||||
/* TC-compat HTILE for MSAA depth/stencil images is broken
|
||||
* on GFX8 because the tiling doesn't match.
|
||||
*/
|
||||
|
@ -110,7 +110,7 @@ static bool
|
|||
radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
|
||||
{
|
||||
if (info->bo_metadata) {
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9)
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9)
|
||||
return info->bo_metadata->u.gfx9.scanout;
|
||||
else
|
||||
return info->bo_metadata->u.legacy.scanout;
|
||||
|
@ -230,7 +230,7 @@ radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *imag
|
|||
bool *sign_reinterpret)
|
||||
{
|
||||
/* DCC (Delta Color Compression) is only available for GFX8+. */
|
||||
if (device->physical_device->rad_info.chip_class < GFX8)
|
||||
if (device->physical_device->rad_info.gfx_level < GFX8)
|
||||
return false;
|
||||
|
||||
if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
|
||||
|
@ -247,7 +247,7 @@ radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *imag
|
|||
* decompressing a lot anyway we might as well not have DCC.
|
||||
*/
|
||||
if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
|
||||
(device->physical_device->rad_info.chip_class < GFX10 ||
|
||||
(device->physical_device->rad_info.gfx_level < GFX10 ||
|
||||
radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
|
||||
return false;
|
||||
|
||||
|
@ -269,14 +269,14 @@ radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *imag
|
|||
if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
|
||||
return false;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class < GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level < GFX10) {
|
||||
/* TODO: Add support for DCC MSAA on GFX8-9. */
|
||||
if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
|
||||
return false;
|
||||
|
||||
/* TODO: Add support for DCC layers/mipmaps on GFX9. */
|
||||
if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
|
||||
device->physical_device->rad_info.chip_class == GFX9)
|
||||
device->physical_device->rad_info.gfx_level == GFX9)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -317,7 +317,7 @@ radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image
|
|||
bool
|
||||
radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.chip_class,
|
||||
return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.gfx_level,
|
||||
&image->planes[0].surface);
|
||||
}
|
||||
|
||||
|
@ -346,10 +346,10 @@ radv_use_htile_for_image(const struct radv_device *device, const struct radv_ima
|
|||
* - Enable on other gens.
|
||||
*/
|
||||
bool use_htile_for_mips =
|
||||
image->info.array_size == 1 && device->physical_device->rad_info.chip_class >= GFX10;
|
||||
image->info.array_size == 1 && device->physical_device->rad_info.gfx_level >= GFX10;
|
||||
|
||||
/* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
|
||||
if (device->physical_device->rad_info.chip_class == GFX10 &&
|
||||
if (device->physical_device->rad_info.gfx_level == GFX10 &&
|
||||
image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->info.levels > 1)
|
||||
return false;
|
||||
|
||||
|
@ -371,7 +371,7 @@ static bool
|
|||
radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
|
||||
{
|
||||
/* TC-compat CMASK is only available for GFX8+. */
|
||||
if (device->physical_device->rad_info.chip_class < GFX8)
|
||||
if (device->physical_device->rad_info.gfx_level < GFX8)
|
||||
return false;
|
||||
|
||||
if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
|
||||
|
@ -379,7 +379,7 @@ radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image
|
|||
|
||||
/* TC-compat CMASK with storage images is supported on GFX10+. */
|
||||
if ((image->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
|
||||
device->physical_device->rad_info.chip_class < GFX10)
|
||||
device->physical_device->rad_info.gfx_level < GFX10)
|
||||
return false;
|
||||
|
||||
/* Do not enable TC-compatible if the image isn't readable by a shader
|
||||
|
@ -420,7 +420,7 @@ radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf
|
|||
{
|
||||
surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
if (md->u.gfx9.swizzle_mode > 0)
|
||||
surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
|
||||
else
|
||||
|
@ -463,7 +463,7 @@ radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image
|
|||
radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
|
||||
const struct radeon_bo_metadata *md = create_info->bo_metadata;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
|
||||
height = G_00A008_HEIGHT(md->metadata[4]) + 1;
|
||||
} else {
|
||||
|
@ -483,7 +483,7 @@ radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image
|
|||
"(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
|
||||
image->info.width, image->info.height, width, height);
|
||||
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
|
||||
} else if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
fprintf(stderr,
|
||||
"Tried to import an image with inconsistent width on GFX10.\n"
|
||||
"As GFX10 has no separate stride fields we cannot cope with\n"
|
||||
|
@ -528,7 +528,7 @@ radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *
|
|||
image->info.surf_index = NULL;
|
||||
}
|
||||
|
||||
if (create_info->prime_blit_src && device->physical_device->rad_info.chip_class == GFX9) {
|
||||
if (create_info->prime_blit_src && device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
/* Older SDMA hw can't handle DCC */
|
||||
image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
|
||||
}
|
||||
|
@ -627,7 +627,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns
|
|||
if (is_stencil)
|
||||
flags |= RADEON_SURF_SBUFFER;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9 &&
|
||||
pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
|
||||
vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
|
||||
flags |= RADEON_SURF_NO_RENDER_TARGET;
|
||||
|
@ -726,7 +726,7 @@ radv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buff
|
|||
state[0] = va;
|
||||
state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
|
||||
if (device->physical_device->rad_info.gfx_level != GFX8 && stride) {
|
||||
range /= stride;
|
||||
}
|
||||
|
||||
|
@ -736,7 +736,7 @@ radv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buff
|
|||
S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
|
||||
S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
|
||||
|
||||
/* OOB_SELECT chooses the out-of-bounds check:
|
||||
|
@ -770,9 +770,9 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im
|
|||
struct radv_image_plane *plane = &image->planes[plane_id];
|
||||
uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
|
||||
uint64_t va = gpu_address;
|
||||
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
|
||||
enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
|
||||
uint64_t meta_va = 0;
|
||||
if (chip_class >= GFX9) {
|
||||
if (gfx_level >= GFX9) {
|
||||
if (is_stencil)
|
||||
va += plane->surface.u.gfx9.zs.stencil_offset;
|
||||
else
|
||||
|
@ -781,17 +781,17 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im
|
|||
va += (uint64_t)base_level_info->offset_256B * 256;
|
||||
|
||||
state[0] = va >> 8;
|
||||
if (chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)
|
||||
if (gfx_level >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)
|
||||
state[0] |= plane->surface.tile_swizzle;
|
||||
state[1] &= C_008F14_BASE_ADDRESS_HI;
|
||||
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
|
||||
|
||||
if (chip_class >= GFX8) {
|
||||
if (gfx_level >= GFX8) {
|
||||
state[6] &= C_008F28_COMPRESSION_EN;
|
||||
state[7] = 0;
|
||||
if (!disable_compression && radv_dcc_enabled(image, first_level)) {
|
||||
meta_va = gpu_address + plane->surface.meta_offset;
|
||||
if (chip_class <= GFX8)
|
||||
if (gfx_level <= GFX8)
|
||||
meta_va += plane->surface.u.legacy.color.dcc_level[base_level].dcc_offset;
|
||||
|
||||
unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
|
||||
|
@ -803,12 +803,12 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im
|
|||
|
||||
if (meta_va) {
|
||||
state[6] |= S_008F28_COMPRESSION_EN(1);
|
||||
if (chip_class <= GFX9)
|
||||
if (gfx_level <= GFX9)
|
||||
state[7] = meta_va >> 8;
|
||||
}
|
||||
}
|
||||
|
||||
if (chip_class >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
state[3] &= C_00A00C_SW_MODE;
|
||||
|
||||
if (is_stencil) {
|
||||
|
@ -836,7 +836,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im
|
|||
}
|
||||
|
||||
state[7] = meta_va >> 16;
|
||||
} else if (chip_class == GFX9) {
|
||||
} else if (gfx_level == GFX9) {
|
||||
state[3] &= C_008F1C_SW_MODE;
|
||||
state[4] &= C_008F20_PITCH;
|
||||
|
||||
|
@ -938,7 +938,7 @@ vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
|
|||
{
|
||||
const struct util_format_description *desc = vk_format_description(format);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10 && desc->nr_channels == 1)
|
||||
return desc->swizzle[3] == PIPE_SWIZZLE_X;
|
||||
|
||||
return radv_translate_colorswap(format, false) <= 1;
|
||||
|
@ -982,7 +982,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
|
|||
type = V_008F1C_SQ_RSRC_IMG_3D;
|
||||
} else {
|
||||
type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
|
||||
is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
|
||||
is_storage_image, device->physical_device->rad_info.gfx_level == GFX9);
|
||||
}
|
||||
|
||||
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
|
||||
|
@ -1141,7 +1141,7 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
|
|||
}
|
||||
|
||||
/* S8 with either Z16 or Z32 HTILE need a special format. */
|
||||
if (device->physical_device->rad_info.chip_class == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
|
||||
if (device->physical_device->rad_info.gfx_level == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
|
||||
radv_image_is_tc_compat_htile(image)) {
|
||||
if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
|
||||
|
@ -1149,13 +1149,13 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
|
|||
data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class == GFX9 &&
|
||||
if (device->physical_device->rad_info.gfx_level == GFX9 &&
|
||||
img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
|
||||
assert(image->type == VK_IMAGE_TYPE_3D);
|
||||
type = V_008F1C_SQ_RSRC_IMG_3D;
|
||||
} else {
|
||||
type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
|
||||
is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
|
||||
is_storage_image, device->physical_device->rad_info.gfx_level == GFX9);
|
||||
}
|
||||
|
||||
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
|
||||
|
@ -1185,7 +1185,7 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
|
|||
state[6] = 0;
|
||||
state[7] = 0;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class == GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
unsigned bc_swizzle = gfx9_border_color_swizzle(desc);
|
||||
|
||||
/* Depth is the last accessible layer on Gfx9.
|
||||
|
@ -1212,7 +1212,7 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
|
|||
/* The last dword is unused by hw. The shader uses it to clear
|
||||
* bits in the first dword of sampler state.
|
||||
*/
|
||||
if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
|
||||
if (device->physical_device->rad_info.gfx_level <= GFX7 && image->info.samples <= 1) {
|
||||
if (first_level == last_level)
|
||||
state[7] = C_008F30_MAX_ANISO_RATIO;
|
||||
else
|
||||
|
@ -1232,7 +1232,7 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
|
|||
|
||||
va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class == GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
|
||||
switch (image->info.samples) {
|
||||
case 2:
|
||||
|
@ -1280,7 +1280,7 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
|
|||
fmask_state[6] = 0;
|
||||
fmask_state[7] = 0;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class == GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode);
|
||||
fmask_state[4] |= S_008F20_DEPTH(last_layer) |
|
||||
S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
|
||||
|
@ -1321,7 +1321,7 @@ radv_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
|
|||
unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state,
|
||||
uint32_t *fmask_state, VkImageCreateFlags img_create_flags)
|
||||
{
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
|
||||
first_level, last_level, first_layer, last_layer, width, height,
|
||||
depth, min_lod, state, fmask_state, img_create_flags);
|
||||
|
@ -1362,7 +1362,7 @@ radv_init_metadata(struct radv_device *device, struct radv_image *image,
|
|||
|
||||
memset(metadata, 0, sizeof(*metadata));
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
uint64_t dcc_offset =
|
||||
image->offset +
|
||||
(surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
|
||||
|
@ -1461,14 +1461,14 @@ radv_image_is_pipe_misaligned(const struct radv_device *device, const struct rad
|
|||
struct radeon_info *rad_info = &device->physical_device->rad_info;
|
||||
int log2_samples = util_logbase2(image->info.samples);
|
||||
|
||||
assert(rad_info->chip_class >= GFX10);
|
||||
assert(rad_info->gfx_level >= GFX10);
|
||||
|
||||
for (unsigned i = 0; i < image->plane_count; ++i) {
|
||||
VkFormat fmt = radv_image_get_plane_format(device->physical_device, image, i);
|
||||
int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
|
||||
int log2_bpp_and_samples;
|
||||
|
||||
if (rad_info->chip_class >= GFX10_3) {
|
||||
if (rad_info->gfx_level >= GFX10_3) {
|
||||
log2_bpp_and_samples = log2_bpp + log2_samples;
|
||||
} else {
|
||||
if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) {
|
||||
|
@ -1506,10 +1506,10 @@ radv_image_is_pipe_misaligned(const struct radv_device *device, const struct rad
|
|||
static bool
|
||||
radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
return !device->physical_device->rad_info.tcc_rb_non_coherent &&
|
||||
!radv_image_is_pipe_misaligned(device, image);
|
||||
} else if (device->physical_device->rad_info.chip_class == GFX9) {
|
||||
} else if (device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
if (image->info.samples == 1 &&
|
||||
(image->usage &
|
||||
(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
|
||||
|
@ -1560,7 +1560,7 @@ static bool
|
|||
radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
/* comp-to-single is only available for GFX10+. */
|
||||
if (device->physical_device->rad_info.chip_class < GFX10)
|
||||
if (device->physical_device->rad_info.gfx_level < GFX10)
|
||||
return false;
|
||||
|
||||
/* If the image can't be fast cleared, comp-to-single can't be used. */
|
||||
|
@ -1694,7 +1694,7 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
|
|||
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
|
||||
|
||||
for (unsigned i = 1; i < mem_planes; ++i) {
|
||||
if (ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
|
||||
if (ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
|
||||
&image->planes[plane].surface, i,
|
||||
0) != mod_info->pPlaneLayouts[i].offset)
|
||||
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
|
||||
|
@ -1753,7 +1753,7 @@ radv_image_print_info(struct radv_device *device, struct radv_image *image)
|
|||
const struct radv_image_plane *plane = &image->planes[i];
|
||||
const struct radeon_surf *surf = &plane->surface;
|
||||
const struct util_format_description *desc = vk_format_description(plane->format);
|
||||
uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
|
||||
uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
|
||||
&plane->surface, 0, 0);
|
||||
|
||||
fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
|
||||
|
@ -1947,7 +1947,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
|
|||
blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) *
|
||||
vk_format_get_blockwidth(vk_format);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9)
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9)
|
||||
hw_level = iview->base_mip;
|
||||
radv_make_texture_descriptor(
|
||||
device, image, is_storage_image, iview->type, vk_format, components, hw_level,
|
||||
|
@ -1960,7 +1960,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
|
|||
img_create_flags);
|
||||
|
||||
const struct legacy_surf_level *base_level_info = NULL;
|
||||
if (device->physical_device->rad_info.chip_class <= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level <= GFX9) {
|
||||
if (is_stencil)
|
||||
base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->base_mip];
|
||||
else
|
||||
|
@ -2118,7 +2118,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
|
|||
plane_count = 1;
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
iview->extent = (VkExtent3D){
|
||||
.width = image->info.width,
|
||||
.height = image->info.height,
|
||||
|
@ -2167,7 +2167,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
|
|||
* block compatible format and the compressed format, so even if we take
|
||||
* the plain converted dimensions the physical layout is correct.
|
||||
*/
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9 &&
|
||||
vk_format_is_compressed(image->vk_format) && !vk_format_is_compressed(iview->vk_format)) {
|
||||
/* If we have multiple levels in the view we should ideally take the last level,
|
||||
* but the mip calculation has a max(..., 1) so walking back to the base mip in an
|
||||
|
@ -2306,7 +2306,7 @@ radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_i
|
|||
(queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
|
||||
return false;
|
||||
|
||||
return device->physical_device->rad_info.chip_class >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
|
||||
return device->physical_device->rad_info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -2402,17 +2402,17 @@ radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image,
|
|||
assert(level == 0);
|
||||
assert(layer == 0);
|
||||
|
||||
pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
|
||||
pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
|
||||
surface, mem_plane_id, 0);
|
||||
pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.chip_class,
|
||||
pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.gfx_level,
|
||||
surface, mem_plane_id, level);
|
||||
pLayout->arrayPitch = 0;
|
||||
pLayout->depthPitch = 0;
|
||||
pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
|
||||
} else if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
} else if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
|
||||
|
||||
pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
|
||||
pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.gfx_level,
|
||||
&plane->surface, 0, layer) +
|
||||
level_offset;
|
||||
if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
|
||||
|
|
|
@ -235,7 +235,7 @@ radv_prefer_compute_dma(const struct radv_device *device, uint64_t size,
|
|||
{
|
||||
bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10 &&
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10 &&
|
||||
device->physical_device->rad_info.has_dedicated_vram) {
|
||||
if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
|
||||
!(dst_bo->initial_domain & RADEON_DOMAIN_VRAM)) {
|
||||
|
|
|
@ -1324,7 +1324,7 @@ get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
|
|||
{
|
||||
unsigned stride;
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
|
||||
} else {
|
||||
stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
|
||||
|
|
|
@ -1268,7 +1268,7 @@ radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
|
|||
uint64_t offset = image->offset + image->planes[0].surface.cmask_offset;
|
||||
uint64_t size;
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
/* TODO: clear layers. */
|
||||
size = image->planes[0].surface.cmask_size;
|
||||
} else {
|
||||
|
@ -1314,12 +1314,12 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
|
|||
uint32_t level = range->baseMipLevel + l;
|
||||
uint64_t size;
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
/* DCC for mipmaps+layers is currently disabled. */
|
||||
offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer +
|
||||
image->planes[0].surface.u.gfx9.meta_levels[level].offset;
|
||||
size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count;
|
||||
} else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
|
||||
} else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
/* Mipmap levels and layers aren't implemented. */
|
||||
assert(level == 0);
|
||||
size = image->planes[0].surface.meta_size;
|
||||
|
@ -1472,7 +1472,7 @@ radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im
|
|||
htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask);
|
||||
|
||||
if (level_count != image->info.levels) {
|
||||
assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
|
||||
assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
|
||||
|
||||
/* Clear individuals levels separately. */
|
||||
for (uint32_t l = 0; l < level_count; l++) {
|
||||
|
@ -1667,7 +1667,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
|||
&can_avoid_fast_clear_elim);
|
||||
|
||||
if (iview->image->info.levels > 1) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
uint32_t last_level = iview->base_mip + iview->level_count - 1;
|
||||
if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
|
||||
/* Do not fast clears if one level can't be fast cleard. */
|
||||
|
|
|
@ -115,7 +115,7 @@ radv_image_is_renderable(struct radv_device *device, struct radv_image *image)
|
|||
image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)
|
||||
return false;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9 && image->type == VK_IMAGE_TYPE_3D &&
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9 && image->type == VK_IMAGE_TYPE_3D &&
|
||||
vk_format_get_blocksizebits(image->vk_format) == 128 &&
|
||||
vk_format_is_compressed(image->vk_format))
|
||||
return false;
|
||||
|
|
|
@ -251,7 +251,7 @@ radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer,
|
|||
unsigned num_rects, const struct radv_meta_blit2d_rect *rects)
|
||||
{
|
||||
/* TODO: Test on pre GFX10 chips. */
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX10)
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX10)
|
||||
return false;
|
||||
|
||||
/* TODO: Add support for layers. */
|
||||
|
|
|
@ -280,7 +280,7 @@ static bool
|
|||
image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image,
|
||||
struct radv_image *dst_image)
|
||||
{
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
return dst_image->planes[0].surface.u.gfx9.swizzle_mode ==
|
||||
src_image->planes[0].surface.u.gfx9.swizzle_mode;
|
||||
} else {
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include "radv_shader_args.h"
|
||||
|
||||
typedef struct {
|
||||
enum chip_class chip_class;
|
||||
enum amd_gfx_level gfx_level;
|
||||
uint32_t address32_hi;
|
||||
bool disable_aniso_single_level;
|
||||
bool has_image_load_dcc_bug;
|
||||
|
@ -161,7 +161,7 @@ load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa
|
|||
uint32_t desc_type =
|
||||
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
if (state->chip_class >= GFX10) {
|
||||
if (state->gfx_level >= GFX10) {
|
||||
desc_type |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
@ -481,7 +481,7 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te
|
|||
tex->sampler_non_uniform, tex, false);
|
||||
|
||||
if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT &&
|
||||
state->chip_class < GFX8) {
|
||||
state->gfx_level < GFX8) {
|
||||
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
|
||||
*
|
||||
* GFX6-GFX7:
|
||||
|
@ -527,7 +527,7 @@ radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
|
|||
const struct radv_shader_args *args)
|
||||
{
|
||||
apply_layout_state state = {
|
||||
.chip_class = device->physical_device->rad_info.chip_class,
|
||||
.gfx_level = device->physical_device->rad_info.gfx_level,
|
||||
.address32_hi = device->physical_device->rad_info.address32_hi,
|
||||
.disable_aniso_single_level = device->instance->disable_aniso_single_level,
|
||||
.has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug,
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
#include "radv_shader_args.h"
|
||||
|
||||
typedef struct {
|
||||
enum chip_class chip_class;
|
||||
enum amd_gfx_level gfx_level;
|
||||
const struct radv_shader_args *args;
|
||||
const struct radv_shader_info *info;
|
||||
const struct radv_pipeline_key *pl_key;
|
||||
|
@ -221,14 +221,12 @@ filter_abi_instr(const nir_instr *instr,
|
|||
}
|
||||
|
||||
void
|
||||
radv_nir_lower_abi(nir_shader *shader,
|
||||
enum chip_class chip_class,
|
||||
const struct radv_shader_info *info,
|
||||
const struct radv_shader_args *args,
|
||||
radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level,
|
||||
const struct radv_shader_info *info, const struct radv_shader_args *args,
|
||||
const struct radv_pipeline_key *pl_key)
|
||||
{
|
||||
lower_abi_state state = {
|
||||
.chip_class = chip_class,
|
||||
.gfx_level = gfx_level,
|
||||
.info = info,
|
||||
.args = args,
|
||||
.pl_key = pl_key,
|
||||
|
|
|
@ -169,7 +169,7 @@ is_pre_gs_stage(gl_shader_stage stage)
|
|||
static void
|
||||
create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has_previous_stage)
|
||||
{
|
||||
if (ctx->ac.chip_class >= GFX10) {
|
||||
if (ctx->ac.gfx_level >= GFX10) {
|
||||
if (is_pre_gs_stage(stage) && ctx->shader_info->is_ngg) {
|
||||
/* On GFX10, VS is merged into GS for NGG. */
|
||||
stage = MESA_SHADER_GEOMETRY;
|
||||
|
@ -466,11 +466,11 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
|
|||
* dynamic) is unaligned and also if the VBO offset is aligned to a scalar (eg. stride is 8 and
|
||||
* VBO offset is 2 for R16G16B16A16_SNORM).
|
||||
*/
|
||||
if (ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10) {
|
||||
if (ctx->ac.gfx_level == GFX6 || ctx->ac.gfx_level >= GFX10) {
|
||||
unsigned chan_format = vtx_info->chan_format;
|
||||
LLVMValueRef values[4];
|
||||
|
||||
assert(ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10);
|
||||
assert(ctx->ac.gfx_level == GFX6 || ctx->ac.gfx_level >= GFX10);
|
||||
|
||||
for (unsigned chan = 0; chan < num_channels; chan++) {
|
||||
unsigned chan_offset = attrib_offset + chan * vtx_info->chan_byte_size;
|
||||
|
@ -645,7 +645,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values,
|
|||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_32_AR:
|
||||
if (ctx->ac.chip_class >= GFX10) {
|
||||
if (ctx->ac.gfx_level >= GFX10) {
|
||||
args->enabled_channels = 0x3;
|
||||
args->out[0] = values[0];
|
||||
args->out[1] = values[3];
|
||||
|
@ -986,7 +986,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
|
|||
if (outinfo->writes_layer == true)
|
||||
pos_args[1].out[2] = layer_value;
|
||||
if (outinfo->writes_viewport_index == true) {
|
||||
if (ctx->options->chip_class >= GFX9) {
|
||||
if (ctx->options->gfx_level >= GFX9) {
|
||||
/* GFX9 has the layer in out.z[10:0] and the viewport
|
||||
* index in out.z[19:16].
|
||||
*/
|
||||
|
@ -1011,7 +1011,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
|
|||
/* GFX10 skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
|
||||
* Setting valid_mask=1 prevents it and has no other effect.
|
||||
*/
|
||||
if (ctx->ac.chip_class == GFX10)
|
||||
if (ctx->ac.gfx_level == GFX10)
|
||||
pos_args[0].valid_mask = 1;
|
||||
|
||||
pos_idx = 0;
|
||||
|
@ -1822,7 +1822,7 @@ emit_gs_epilogue(struct radv_shader_context *ctx)
|
|||
return;
|
||||
}
|
||||
|
||||
if (ctx->ac.chip_class >= GFX10)
|
||||
if (ctx->ac.gfx_level >= GFX10)
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_VSTORE);
|
||||
|
||||
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id);
|
||||
|
@ -1881,7 +1881,7 @@ ac_llvm_finalize_module(struct radv_shader_context *ctx, LLVMPassManagerRef pass
|
|||
static void
|
||||
ac_setup_rings(struct radv_shader_context *ctx)
|
||||
{
|
||||
if (ctx->options->chip_class <= GFX8 &&
|
||||
if (ctx->options->gfx_level <= GFX8 &&
|
||||
(ctx->stage == MESA_SHADER_GEOMETRY ||
|
||||
(ctx->stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_es) ||
|
||||
(ctx->stage == MESA_SHADER_TESS_EVAL && ctx->shader_info->tes.as_es))) {
|
||||
|
@ -2041,13 +2041,13 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
|||
float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO;
|
||||
}
|
||||
|
||||
ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class, options->family,
|
||||
options->info, float_mode, info->wave_size, info->ballot_bit_size);
|
||||
ac_llvm_context_init(&ctx.ac, ac_llvm, options->gfx_level, options->family, options->info,
|
||||
float_mode, info->wave_size, info->ballot_bit_size);
|
||||
ctx.context = ctx.ac.context;
|
||||
|
||||
ctx.max_workgroup_size = info->workgroup_size;
|
||||
|
||||
if (ctx.ac.chip_class >= GFX10) {
|
||||
if (ctx.ac.gfx_level >= GFX10) {
|
||||
if (is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg) {
|
||||
ctx.max_workgroup_size = 128;
|
||||
}
|
||||
|
@ -2091,7 +2091,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
|||
declare_esgs_ring(&ctx);
|
||||
|
||||
/* GFX10 hang workaround - there needs to be an s_barrier before gs_alloc_req always */
|
||||
if (ctx.ac.chip_class == GFX10 && shader_count == 1)
|
||||
if (ctx.ac.gfx_level == GFX10 && shader_count == 1)
|
||||
ac_build_s_barrier(&ctx.ac, shaders[0]->info.stage);
|
||||
}
|
||||
|
||||
|
@ -2426,8 +2426,8 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
|
|||
|
||||
assert(args->is_gs_copy_shader);
|
||||
|
||||
ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class, options->family,
|
||||
options->info, AC_FLOAT_MODE_DEFAULT, 64, 64);
|
||||
ac_llvm_context_init(&ctx.ac, ac_llvm, options->gfx_level, options->family, options->info,
|
||||
AC_FLOAT_MODE_DEFAULT, 64, 64);
|
||||
ctx.context = ctx.ac.context;
|
||||
|
||||
ctx.stage = MESA_SHADER_VERTEX;
|
||||
|
|
|
@ -1183,7 +1183,7 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
|
|||
S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
|
||||
S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1);
|
||||
ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(
|
||||
pipeline->device->physical_device->rad_info.chip_class >= GFX9) |
|
||||
pipeline->device->physical_device->rad_info.gfx_level >= GFX9) |
|
||||
S_028A48_VPORT_SCISSOR_ENABLE(1);
|
||||
|
||||
const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line = vk_find_struct_const(
|
||||
|
@ -1217,7 +1217,7 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
|
|||
S_028BE0_MAX_SAMPLE_DIST(radv_get_default_max_sample_dist(log_samples)) |
|
||||
S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | /* CM_R_028BE0_PA_SC_AA_CONFIG */
|
||||
S_028BE0_COVERED_CENTROID_IS_CENTER(
|
||||
pipeline->device->physical_device->rad_info.chip_class >= GFX10_3);
|
||||
pipeline->device->physical_device->rad_info.gfx_level >= GFX10_3);
|
||||
ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
|
||||
if (ps_iter_samples > 1)
|
||||
pipeline->graphics.spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
|
||||
|
@ -1500,7 +1500,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline)
|
|||
|
||||
/* GS requirement. */
|
||||
ia_multi_vgt_param.partial_es_wave = false;
|
||||
if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8)
|
||||
if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.gfx_level <= GFX8)
|
||||
if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3)
|
||||
ia_multi_vgt_param.partial_es_wave = true;
|
||||
|
||||
|
@ -1527,7 +1527,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline)
|
|||
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
|
||||
if (device->physical_device->rad_info.has_distributed_tess) {
|
||||
if (radv_pipeline_has_gs(pipeline)) {
|
||||
if (device->physical_device->rad_info.chip_class <= GFX8)
|
||||
if (device->physical_device->rad_info.gfx_level <= GFX8)
|
||||
ia_multi_vgt_param.partial_es_wave = true;
|
||||
} else {
|
||||
ia_multi_vgt_param.partial_vs_wave = true;
|
||||
|
@ -1558,9 +1558,9 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline)
|
|||
ia_multi_vgt_param.base =
|
||||
S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
|
||||
/* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
|
||||
S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) |
|
||||
S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) |
|
||||
S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9);
|
||||
S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.gfx_level == GFX8 ? 2 : 0) |
|
||||
S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.gfx_level >= GFX9) |
|
||||
S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.gfx_level >= GFX9);
|
||||
|
||||
return ia_multi_vgt_param;
|
||||
}
|
||||
|
@ -1963,7 +1963,7 @@ radv_pipeline_init_raster_state(struct radv_pipeline *pipeline,
|
|||
S_028814_POLY_OFFSET_PARA_ENABLE(raster_info->depthBiasEnable ? 1 : 0) |
|
||||
S_028814_PROVOKING_VTX_LAST(provoking_vtx_last);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
/* It should also be set if PERPENDICULAR_ENDCAP_ENA is set. */
|
||||
pipeline->graphics.pa_su_sc_mode_cntl |=
|
||||
S_028814_KEEP_TOGETHER_ENABLE(raster_info->polygonMode != VK_POLYGON_MODE_FILL);
|
||||
|
@ -2026,7 +2026,7 @@ radv_pipeline_init_depth_stencil_state(struct radv_pipeline *pipeline,
|
|||
/* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */
|
||||
ds_state.db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(vkms && vkms->rasterizationSamples > 2);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10_3)
|
||||
ds_state.db_render_override2 |= S_028010_CENTROID_COMPUTATION_MODE(1);
|
||||
|
||||
db_depth_control = S_028800_Z_ENABLE(ds_info->depthTestEnable ? 1 : 0) |
|
||||
|
@ -2071,7 +2071,7 @@ gfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline
|
|||
struct radv_shader_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info;
|
||||
struct radv_es_output_info *es_info;
|
||||
bool has_tess = !!stages[MESA_SHADER_TESS_CTRL].nir;
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX9)
|
||||
es_info = has_tess ? &gs_info->tes.es_info : &gs_info->vs.es_info;
|
||||
else
|
||||
es_info = has_tess ? &stages[MESA_SHADER_TESS_EVAL].info.tes.es_info
|
||||
|
@ -2180,10 +2180,9 @@ gfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline
|
|||
assert(max_prims_per_subgroup <= max_out_prims);
|
||||
|
||||
gl_shader_stage es_stage = has_tess ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
|
||||
unsigned workgroup_size =
|
||||
ac_compute_esgs_workgroup_size(
|
||||
pipeline->device->physical_device->rad_info.chip_class, stages[es_stage].info.wave_size,
|
||||
es_verts_per_subgroup, gs_inst_prims_in_subgroup);
|
||||
unsigned workgroup_size = ac_compute_esgs_workgroup_size(
|
||||
pipeline->device->physical_device->rad_info.gfx_level, stages[es_stage].info.wave_size,
|
||||
es_verts_per_subgroup, gs_inst_prims_in_subgroup);
|
||||
stages[es_stage].info.workgroup_size = workgroup_size;
|
||||
stages[MESA_SHADER_GEOMETRY].info.workgroup_size = workgroup_size;
|
||||
}
|
||||
|
@ -2221,7 +2220,8 @@ radv_get_num_input_vertices(const struct radv_pipeline_stage *stages)
|
|||
}
|
||||
|
||||
static void
|
||||
gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum chip_class chip_class, uint32_t oversub_pc_lines)
|
||||
gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
uint32_t oversub_pc_lines)
|
||||
{
|
||||
radeon_set_uconfig_reg(
|
||||
cs, R_030980_GE_PC_ALLOC,
|
||||
|
@ -2318,7 +2318,7 @@ gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pi
|
|||
|
||||
/* All these are per subgroup: */
|
||||
const unsigned min_esverts =
|
||||
pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 29 : 24;
|
||||
pipeline->device->physical_device->rad_info.gfx_level >= GFX10_3 ? 29 : 24;
|
||||
bool max_vert_out_per_gs_instance = false;
|
||||
unsigned max_esverts_base = 128;
|
||||
unsigned max_gsprims_base = 128; /* default prim group size clamp */
|
||||
|
@ -2425,7 +2425,7 @@ gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pi
|
|||
max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
|
||||
|
||||
/* Hardware restriction: minimum value of max_esverts */
|
||||
if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level == GFX10)
|
||||
max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
|
||||
else
|
||||
max_esverts = MAX2(max_esverts, min_esverts);
|
||||
|
@ -2448,13 +2448,13 @@ gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pi
|
|||
} while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims);
|
||||
|
||||
/* Verify the restriction. */
|
||||
if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level == GFX10)
|
||||
assert(max_esverts >= min_esverts - 1 + max_verts_per_prim);
|
||||
else
|
||||
assert(max_esverts >= min_esverts);
|
||||
} else {
|
||||
/* Hardware restriction: minimum value of max_esverts */
|
||||
if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level == GFX10)
|
||||
max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
|
||||
else
|
||||
max_esverts = MAX2(max_esverts, min_esverts);
|
||||
|
@ -2478,7 +2478,7 @@ gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pi
|
|||
* whenever this check passes, there is enough space for a full
|
||||
* primitive without vertex reuse.
|
||||
*/
|
||||
if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level == GFX10)
|
||||
ngg->hw_max_esverts = max_esverts - max_verts_per_prim + 1;
|
||||
else
|
||||
ngg->hw_max_esverts = max_esverts;
|
||||
|
@ -2520,7 +2520,7 @@ radv_pipeline_init_gs_ring_state(struct radv_pipeline *pipeline, const struct gf
|
|||
* On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
|
||||
*/
|
||||
unsigned gs_vertex_reuse =
|
||||
(device->physical_device->rad_info.chip_class >= GFX8 ? 32 : 16) * num_se;
|
||||
(device->physical_device->rad_info.gfx_level >= GFX8 ? 32 : 16) * num_se;
|
||||
unsigned alignment = 256 * num_se;
|
||||
/* The maximum size is 63.999 MB per SE. */
|
||||
unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
|
||||
|
@ -2538,7 +2538,7 @@ radv_pipeline_init_gs_ring_state(struct radv_pipeline *pipeline, const struct gf
|
|||
esgs_ring_size = align(esgs_ring_size, alignment);
|
||||
gsvs_ring_size = align(gsvs_ring_size, alignment);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level <= GFX8)
|
||||
pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
|
||||
|
||||
pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
|
||||
|
@ -2747,7 +2747,7 @@ radv_link_shaders(struct radv_pipeline *pipeline,
|
|||
|
||||
bool has_geom_tess = stages[MESA_SHADER_GEOMETRY].nir || stages[MESA_SHADER_TESS_CTRL].nir;
|
||||
bool merged_gs = stages[MESA_SHADER_GEOMETRY].nir &&
|
||||
pipeline->device->physical_device->rad_info.chip_class >= GFX9;
|
||||
pipeline->device->physical_device->rad_info.gfx_level >= GFX9;
|
||||
|
||||
if (!optimize_conservatively && shader_count > 1) {
|
||||
unsigned first = ordered_shaders[shader_count - 1]->info.stage;
|
||||
|
@ -2923,7 +2923,7 @@ radv_link_shaders(struct radv_pipeline *pipeline,
|
|||
if (progress) {
|
||||
if (nir_lower_global_vars_to_local(ordered_shaders[i])) {
|
||||
ac_nir_lower_indirect_derefs(ordered_shaders[i],
|
||||
pipeline->device->physical_device->rad_info.chip_class);
|
||||
pipeline->device->physical_device->rad_info.gfx_level);
|
||||
/* remove dead writes, which can remove input loads */
|
||||
nir_lower_vars_to_ssa(ordered_shaders[i]);
|
||||
nir_opt_dce(ordered_shaders[i]);
|
||||
|
@ -2931,7 +2931,7 @@ radv_link_shaders(struct radv_pipeline *pipeline,
|
|||
|
||||
if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
|
||||
ac_nir_lower_indirect_derefs(ordered_shaders[i - 1],
|
||||
pipeline->device->physical_device->rad_info.chip_class);
|
||||
pipeline->device->physical_device->rad_info.gfx_level);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2991,7 +2991,7 @@ radv_set_driver_locations(struct radv_pipeline *pipeline, struct radv_pipeline_s
|
|||
unsigned vs_info_idx = MESA_SHADER_VERTEX;
|
||||
unsigned tes_info_idx = MESA_SHADER_TESS_EVAL;
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
/* These are merged into the next stage */
|
||||
vs_info_idx = has_tess ? MESA_SHADER_TESS_CTRL : MESA_SHADER_GEOMETRY;
|
||||
tes_info_idx = has_gs ? MESA_SHADER_GEOMETRY : MESA_SHADER_TESS_EVAL;
|
||||
|
@ -3059,10 +3059,10 @@ radv_generate_pipeline_key(const struct radv_pipeline *pipeline, VkPipelineCreat
|
|||
key.optimisations_disabled = 1;
|
||||
|
||||
key.disable_aniso_single_level = device->instance->disable_aniso_single_level &&
|
||||
device->physical_device->rad_info.chip_class < GFX8;
|
||||
device->physical_device->rad_info.gfx_level < GFX8;
|
||||
|
||||
key.image_2d_view_of_3d = device->image_2d_view_of_3d &&
|
||||
device->physical_device->rad_info.chip_class == GFX9;
|
||||
device->physical_device->rad_info.gfx_level == GFX9;
|
||||
|
||||
return key;
|
||||
}
|
||||
|
@ -3119,17 +3119,17 @@ radv_generate_graphics_pipeline_key(const struct radv_pipeline *pipeline,
|
|||
key.ps.col_format = blend->spi_shader_col_format;
|
||||
key.ps.cb_target_mask = blend->cb_target_mask;
|
||||
key.ps.mrt0_is_dual_src = blend->mrt0_is_dual_src;
|
||||
if (pipeline->device->physical_device->rad_info.chip_class < GFX8) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level < GFX8) {
|
||||
key.ps.is_int8 = blend->col_format_is_int8;
|
||||
key.ps.is_int10 = blend->col_format_is_int10;
|
||||
}
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX11) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||
key.ps.alpha_to_coverage_via_mrtz = G_028B70_ALPHA_TO_MASK_ENABLE(blend->db_alpha_to_mask);
|
||||
}
|
||||
|
||||
key.vs.topology = vi_info->primitive_topology;
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
const VkPipelineRasterizationStateCreateInfo *raster_info = pCreateInfo->pRasterizationState;
|
||||
const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *provoking_vtx_info =
|
||||
vk_find_struct_const(raster_info->pNext,
|
||||
|
@ -3348,7 +3348,7 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
|
|||
filled_stages |= (1 << MESA_SHADER_FRAGMENT);
|
||||
}
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX9 &&
|
||||
stages[MESA_SHADER_TESS_CTRL].nir) {
|
||||
struct nir_shader *combined_nir[] = {stages[MESA_SHADER_VERTEX].nir, stages[MESA_SHADER_TESS_CTRL].nir};
|
||||
|
||||
|
@ -3366,7 +3366,7 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
|
|||
filled_stages |= (1 << MESA_SHADER_TESS_CTRL);
|
||||
}
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX9 &&
|
||||
stages[MESA_SHADER_GEOMETRY].nir) {
|
||||
gl_shader_stage pre_stage =
|
||||
stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
|
||||
|
@ -3453,7 +3453,7 @@ static void
|
|||
radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stage *stages,
|
||||
const struct radv_pipeline_key *pipeline_key)
|
||||
{
|
||||
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
|
||||
enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
|
||||
unsigned active_stages = 0;
|
||||
|
||||
for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
|
||||
|
@ -3468,8 +3468,8 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag
|
|||
stages[i].args.load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr;
|
||||
}
|
||||
|
||||
if (chip_class >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
|
||||
radv_declare_shader_args(chip_class, pipeline_key, &stages[MESA_SHADER_TESS_CTRL].info,
|
||||
if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
|
||||
radv_declare_shader_args(gfx_level, pipeline_key, &stages[MESA_SHADER_TESS_CTRL].info,
|
||||
MESA_SHADER_TESS_CTRL, true, MESA_SHADER_VERTEX,
|
||||
&stages[MESA_SHADER_TESS_CTRL].args);
|
||||
stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs;
|
||||
|
@ -3481,11 +3481,12 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag
|
|||
active_stages &= ~(1 << MESA_SHADER_TESS_CTRL);
|
||||
}
|
||||
|
||||
if (chip_class >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) {
|
||||
if (gfx_level >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) {
|
||||
gl_shader_stage pre_stage =
|
||||
stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
|
||||
radv_declare_shader_args(chip_class, pipeline_key, &stages[MESA_SHADER_GEOMETRY].info,
|
||||
MESA_SHADER_GEOMETRY, true, pre_stage, &stages[MESA_SHADER_GEOMETRY].args);
|
||||
radv_declare_shader_args(gfx_level, pipeline_key, &stages[MESA_SHADER_GEOMETRY].info,
|
||||
MESA_SHADER_GEOMETRY, true, pre_stage,
|
||||
&stages[MESA_SHADER_GEOMETRY].args);
|
||||
stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs;
|
||||
stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask =
|
||||
stages[MESA_SHADER_GEOMETRY].args.ac.inline_push_const_mask;
|
||||
|
@ -3496,8 +3497,8 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag
|
|||
}
|
||||
|
||||
u_foreach_bit(i, active_stages) {
|
||||
radv_declare_shader_args(chip_class, pipeline_key, &stages[i].info, i, false, MESA_SHADER_VERTEX,
|
||||
&stages[i].args);
|
||||
radv_declare_shader_args(gfx_level, pipeline_key, &stages[i].info, i, false,
|
||||
MESA_SHADER_VERTEX, &stages[i].args);
|
||||
stages[i].info.user_sgprs_locs = stages[i].args.user_sgprs_locs;
|
||||
stages[i].info.inline_push_constant_mask = stages[i].args.ac.inline_push_const_mask;
|
||||
}
|
||||
|
@ -3572,12 +3573,13 @@ gather_tess_info(struct radv_device *device, struct radv_pipeline_stage *stages,
|
|||
tess_in_patch_size, tess_out_patch_size,
|
||||
stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_inputs,
|
||||
stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_outputs,
|
||||
stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs, device->hs.tess_offchip_block_dw_size,
|
||||
device->physical_device->rad_info.chip_class, device->physical_device->rad_info.family);
|
||||
stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs,
|
||||
device->hs.tess_offchip_block_dw_size, device->physical_device->rad_info.gfx_level,
|
||||
device->physical_device->rad_info.family);
|
||||
|
||||
/* LDS size used by VS+TCS for storing TCS inputs and outputs. */
|
||||
unsigned tcs_lds_size = calculate_tess_lds_size(
|
||||
device->physical_device->rad_info.chip_class, tess_in_patch_size, tess_out_patch_size,
|
||||
device->physical_device->rad_info.gfx_level, tess_in_patch_size, tess_out_patch_size,
|
||||
stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_inputs, num_patches,
|
||||
stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_outputs,
|
||||
stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs);
|
||||
|
@ -3608,7 +3610,7 @@ gather_tess_info(struct radv_device *device, struct radv_pipeline_stage *stages,
|
|||
* doesn't handle a instruction dominating another with a different mode.
|
||||
*/
|
||||
stages[MESA_SHADER_VERTEX].info.vs.tcs_in_out_eq =
|
||||
device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
device->physical_device->rad_info.gfx_level >= GFX9 &&
|
||||
tess_in_patch_size == tess_out_patch_size &&
|
||||
stages[MESA_SHADER_VERTEX].nir->info.float_controls_execution_mode ==
|
||||
stages[MESA_SHADER_TESS_CTRL].nir->info.float_controls_execution_mode;
|
||||
|
@ -3629,9 +3631,8 @@ gather_tess_info(struct radv_device *device, struct radv_pipeline_stage *stages,
|
|||
|
||||
for (gl_shader_stage s = MESA_SHADER_VERTEX; s <= MESA_SHADER_TESS_CTRL; ++s)
|
||||
stages[s].info.workgroup_size =
|
||||
ac_compute_lshs_workgroup_size(
|
||||
device->physical_device->rad_info.chip_class, s,
|
||||
num_patches, tess_in_patch_size, tess_out_patch_size);
|
||||
ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level, s, num_patches,
|
||||
tess_in_patch_size, tess_out_patch_size);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -3702,7 +3703,7 @@ static unsigned
|
|||
lower_bit_size_callback(const nir_instr *instr, void *_)
|
||||
{
|
||||
struct radv_device *device = _;
|
||||
enum chip_class chip = device->physical_device->rad_info.chip_class;
|
||||
enum amd_gfx_level chip = device->physical_device->rad_info.gfx_level;
|
||||
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
return 0;
|
||||
|
@ -4450,7 +4451,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
/* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
|
||||
* the final offset is not.
|
||||
*/
|
||||
.has_shared2_amd = device->physical_device->rad_info.chip_class >= GFX7,
|
||||
.has_shared2_amd = device->physical_device->rad_info.gfx_level >= GFX7,
|
||||
};
|
||||
|
||||
if (device->robust_buffer_access2) {
|
||||
|
@ -4467,7 +4468,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
}
|
||||
|
||||
struct radv_shader_info *info = &stages[i].info;
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
if (i == MESA_SHADER_VERTEX && stages[MESA_SHADER_TESS_CTRL].nir)
|
||||
info = &stages[MESA_SHADER_TESS_CTRL].info;
|
||||
else if (i == MESA_SHADER_VERTEX && stages[MESA_SHADER_GEOMETRY].nir)
|
||||
|
@ -4491,7 +4492,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
nir_lower_idiv(stages[i].nir,
|
||||
&(nir_lower_idiv_options){
|
||||
.imprecise_32bit_lowering = false,
|
||||
.allow_fp16 = device->physical_device->rad_info.chip_class >= GFX9,
|
||||
.allow_fp16 = device->physical_device->rad_info.gfx_level >= GFX9,
|
||||
});
|
||||
|
||||
nir_move_options sink_opts = nir_move_const_undef | nir_move_copies;
|
||||
|
@ -4509,13 +4510,13 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
radv_lower_ngg(device, &stages[i], pipeline_key);
|
||||
|
||||
ac_nir_lower_global_access(stages[i].nir);
|
||||
radv_nir_lower_abi(stages[i].nir, device->physical_device->rad_info.chip_class,
|
||||
radv_nir_lower_abi(stages[i].nir, device->physical_device->rad_info.gfx_level,
|
||||
&stages[i].info, &stages[i].args, pipeline_key);
|
||||
radv_optimize_nir_algebraic(
|
||||
stages[i].nir, io_to_mem || lowered_ngg || i == MESA_SHADER_COMPUTE || i == MESA_SHADER_TASK);
|
||||
|
||||
if (stages[i].nir->info.bit_sizes_int & (8 | 16)) {
|
||||
if (device->physical_device->rad_info.chip_class >= GFX8) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX8) {
|
||||
nir_convert_to_lcssa(stages[i].nir, true, true);
|
||||
nir_divergence_analysis(stages[i].nir);
|
||||
}
|
||||
|
@ -4525,11 +4526,11 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
NIR_PASS_V(stages[i].nir, nir_opt_dce);
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX8)
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX8)
|
||||
nir_opt_remove_phis(stages[i].nir); /* cleanup LCSSA phis */
|
||||
}
|
||||
if (((stages[i].nir->info.bit_sizes_int | stages[i].nir->info.bit_sizes_float) & 16) &&
|
||||
device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
bool copy_prop = false;
|
||||
uint32_t sampler_dims = UINT32_MAX;
|
||||
/* Skip because AMD doesn't support 16-bit types with these. */
|
||||
|
@ -4547,7 +4548,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
|
||||
|
||||
NIR_PASS_V(stages[i].nir, nir_opt_vectorize, opt_vectorize_callback, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup passes */
|
||||
nir_lower_load_const_to_scalar(stages[i].nir);
|
||||
|
@ -4585,7 +4586,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
struct radv_shader_args gs_copy_args = {0};
|
||||
gs_copy_args.is_gs_copy_shader = true;
|
||||
gs_copy_args.explicit_scratch_args = !radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX);
|
||||
radv_declare_shader_args(device->physical_device->rad_info.chip_class, pipeline_key, &info,
|
||||
radv_declare_shader_args(device->physical_device->rad_info.gfx_level, pipeline_key, &info,
|
||||
MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX, &gs_copy_args);
|
||||
info.user_sgprs_locs = gs_copy_args.user_sgprs_locs;
|
||||
info.inline_push_constant_mask = gs_copy_args.ac.inline_push_const_mask;
|
||||
|
@ -4615,7 +4616,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
active_stages &= ~(1 << MESA_SHADER_FRAGMENT);
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
|
||||
if (!pipeline->shaders[MESA_SHADER_TESS_CTRL]) {
|
||||
struct nir_shader *combined_nir[] = {stages[MESA_SHADER_VERTEX].nir, stages[MESA_SHADER_TESS_CTRL].nir};
|
||||
int64_t stage_start = os_time_get_nano();
|
||||
|
@ -4631,7 +4632,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
active_stages &= ~(1 << MESA_SHADER_TESS_CTRL);
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) {
|
||||
gl_shader_stage pre_stage =
|
||||
stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
|
||||
if (!pipeline->shaders[MESA_SHADER_GEOMETRY]) {
|
||||
|
@ -4731,7 +4732,7 @@ done:
|
|||
|
||||
static uint32_t
|
||||
radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline, gl_shader_stage stage,
|
||||
enum chip_class chip_class)
|
||||
enum amd_gfx_level gfx_level)
|
||||
{
|
||||
bool has_gs = radv_pipeline_has_gs(pipeline);
|
||||
bool has_tess = radv_pipeline_has_tess(pipeline);
|
||||
|
@ -4742,9 +4743,9 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline, gl_shader_sta
|
|||
return R_00B030_SPI_SHADER_USER_DATA_PS_0;
|
||||
case MESA_SHADER_VERTEX:
|
||||
if (has_tess) {
|
||||
if (chip_class >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
return R_00B430_SPI_SHADER_USER_DATA_HS_0;
|
||||
} else if (chip_class == GFX9) {
|
||||
} else if (gfx_level == GFX9) {
|
||||
return R_00B430_SPI_SHADER_USER_DATA_LS_0;
|
||||
} else {
|
||||
return R_00B530_SPI_SHADER_USER_DATA_LS_0;
|
||||
|
@ -4752,7 +4753,7 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline, gl_shader_sta
|
|||
}
|
||||
|
||||
if (has_gs) {
|
||||
if (chip_class >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
return R_00B230_SPI_SHADER_USER_DATA_GS_0;
|
||||
} else {
|
||||
return R_00B330_SPI_SHADER_USER_DATA_ES_0;
|
||||
|
@ -4764,18 +4765,18 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline, gl_shader_sta
|
|||
|
||||
return R_00B130_SPI_SHADER_USER_DATA_VS_0;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
return chip_class == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0
|
||||
: R_00B230_SPI_SHADER_USER_DATA_GS_0;
|
||||
return gfx_level == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0
|
||||
: R_00B230_SPI_SHADER_USER_DATA_GS_0;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
case MESA_SHADER_TASK:
|
||||
return R_00B900_COMPUTE_USER_DATA_0;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
return chip_class == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0
|
||||
: R_00B430_SPI_SHADER_USER_DATA_HS_0;
|
||||
return gfx_level == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0
|
||||
: R_00B430_SPI_SHADER_USER_DATA_HS_0;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
if (has_gs) {
|
||||
return chip_class >= GFX10 ? R_00B230_SPI_SHADER_USER_DATA_GS_0
|
||||
: R_00B330_SPI_SHADER_USER_DATA_ES_0;
|
||||
return gfx_level >= GFX10 ? R_00B230_SPI_SHADER_USER_DATA_GS_0
|
||||
: R_00B330_SPI_SHADER_USER_DATA_ES_0;
|
||||
} else if (has_ngg) {
|
||||
return R_00B230_SPI_SHADER_USER_DATA_GS_0;
|
||||
} else {
|
||||
|
@ -5168,7 +5169,7 @@ radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline,
|
|||
uint32_t pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
|
||||
S_028C44_DISABLE_START_OF_PRIM(1);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
const VkPipelineRenderingCreateInfo *render_create_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RENDERING_CREATE_INFO);
|
||||
const VkPipelineColorBlendStateCreateInfo *vkblend =
|
||||
|
@ -5232,13 +5233,13 @@ radv_pipeline_init_binning_state(struct radv_pipeline *pipeline,
|
|||
const VkGraphicsPipelineCreateInfo *pCreateInfo,
|
||||
const struct radv_blend_state *blend)
|
||||
{
|
||||
if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level < GFX9)
|
||||
return;
|
||||
|
||||
VkExtent2D bin_size;
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
bin_size = radv_gfx10_compute_bin_size(pipeline, pCreateInfo);
|
||||
} else if (pipeline->device->physical_device->rad_info.chip_class == GFX9) {
|
||||
} else if (pipeline->device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
bin_size = radv_gfx9_compute_bin_size(pipeline, pCreateInfo);
|
||||
} else
|
||||
unreachable("Unhandled generation for binning bin size calculation");
|
||||
|
@ -5302,7 +5303,7 @@ radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs,
|
|||
const VkConservativeRasterizationModeEXT mode = radv_get_conservative_raster_mode(vkraster);
|
||||
uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
/* Conservative rasterization. */
|
||||
if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
|
||||
pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) |
|
||||
|
@ -5348,7 +5349,7 @@ radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs,
|
|||
* if no sample lies on the pixel boundary (-8 sample offset). It's
|
||||
* currently always TRUE because the driver doesn't support 16 samples.
|
||||
*/
|
||||
bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= GFX7;
|
||||
bool exclusion = pipeline->device->physical_device->rad_info.gfx_level >= GFX7;
|
||||
radeon_set_context_reg(
|
||||
ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
|
||||
S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
|
||||
|
@ -5372,7 +5373,7 @@ radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs,
|
|||
const struct radv_shader *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
|
||||
|
||||
vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out,
|
||||
pipeline->device->physical_device->rad_info.chip_class);
|
||||
pipeline->device->physical_device->rad_info.gfx_level);
|
||||
} else if (outinfo->export_prim_id || vs->info.uses_prim_id) {
|
||||
vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
|
||||
vgt_primitiveid_en |= S_028A84_PRIMITIVEID_EN(1);
|
||||
|
@ -5409,7 +5410,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
nparams = MAX2(outinfo->param_exports, 1);
|
||||
spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(nparams - 1);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
spi_vs_out_config |= S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0);
|
||||
}
|
||||
|
||||
|
@ -5436,15 +5437,15 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
|
||||
total_mask << 8 | clip_dist_mask);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level <= GFX8)
|
||||
radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index);
|
||||
|
||||
unsigned late_alloc_wave64, cu_mask;
|
||||
ac_compute_late_alloc(&pipeline->device->physical_device->rad_info, false, false,
|
||||
shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
ac_set_reg_cu_en(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
|
||||
S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F),
|
||||
C_00B118_CU_EN, 0, &pipeline->device->physical_device->rad_info,
|
||||
|
@ -5455,9 +5456,10 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
}
|
||||
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
|
||||
}
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
uint32_t oversub_pc_lines = late_alloc_wave64 ? pipeline->device->physical_device->rad_info.pc_lines / 4 : 0;
|
||||
gfx10_emit_ge_pc_alloc(cs, pipeline->device->physical_device->rad_info.chip_class, oversub_pc_lines);
|
||||
gfx10_emit_ge_pc_alloc(cs, pipeline->device->physical_device->rad_info.gfx_level,
|
||||
oversub_pc_lines);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5485,7 +5487,7 @@ radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs, const struct radv_pipelin
|
|||
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
|
||||
rsrc2 |= S_00B52C_LDS_SIZE(num_lds_blocks);
|
||||
if (pipeline->device->physical_device->rad_info.chip_class == GFX7 &&
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level == GFX7 &&
|
||||
pipeline->device->physical_device->rad_info.family != CHIP_HAWAII)
|
||||
radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
|
||||
|
||||
|
@ -5604,7 +5606,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
*
|
||||
* Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5
|
||||
*/
|
||||
if (pipeline->device->physical_device->rad_info.chip_class == GFX10 &&
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level == GFX10 &&
|
||||
!radv_pipeline_has_tess(pipeline) && ngg_state->hw_max_esverts != 256) {
|
||||
ge_cntl &= C_03096C_VERT_GRP_SIZE;
|
||||
|
||||
|
@ -5619,7 +5621,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
ac_compute_late_alloc(&pipeline->device->physical_device->rad_info, true, shader->info.has_ngg_culling,
|
||||
shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
ac_set_reg_cu_en(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F),
|
||||
C_00B21C_CU_EN, 0, &pipeline->device->physical_device->rad_info,
|
||||
|
@ -5649,7 +5651,8 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
oversub_pc_lines *= oversub_factor;
|
||||
}
|
||||
|
||||
gfx10_emit_ge_pc_alloc(cs, pipeline->device->physical_device->rad_info.chip_class, oversub_pc_lines);
|
||||
gfx10_emit_ge_pc_alloc(cs, pipeline->device->physical_device->rad_info.gfx_level,
|
||||
oversub_pc_lines);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -5658,8 +5661,8 @@ radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs, const struct radv_pipelin
|
|||
{
|
||||
uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
|
@ -5718,7 +5721,7 @@ radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs, struct radeon_
|
|||
|
||||
radv_pipeline_generate_hw_hs(cs, pipeline, tcs);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10 &&
|
||||
!radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
|
||||
radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
|
||||
S_028A44_ES_VERTS_PER_SUBGRP(250) | S_028A44_GS_PRIMS_PER_SUBGRP(126) |
|
||||
|
@ -5744,7 +5747,7 @@ radv_pipeline_generate_tess_state(struct radeon_cmdbuf *ctx_cs,
|
|||
ls_hs_config = S_028B58_NUM_PATCHES(num_patches) | S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
|
||||
S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
|
||||
} else {
|
||||
radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
|
||||
|
@ -5857,8 +5860,8 @@ radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
|
||||
va = radv_shader_get_va(gs);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
|
@ -5879,7 +5882,7 @@ radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
radeon_emit(cs, gs->config.rsrc2);
|
||||
}
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
ac_set_reg_cu_en(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F),
|
||||
C_00B21C_CU_EN, 0, &pipeline->device->physical_device->rad_info,
|
||||
|
@ -5888,12 +5891,12 @@ radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0),
|
||||
C_00B204_CU_EN_GFX10, 16, &pipeline->device->physical_device->rad_info,
|
||||
(void*)gfx10_set_sh_reg_idx3);
|
||||
} else if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
} else if (pipeline->device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
radeon_set_sh_reg_idx(
|
||||
pipeline->device->physical_device, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
|
||||
S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg_idx(
|
||||
pipeline->device->physical_device, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
|
||||
S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0));
|
||||
|
@ -6145,7 +6148,7 @@ radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *ctx_cs,
|
|||
const struct radv_pipeline *pipeline)
|
||||
{
|
||||
if (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10 ||
|
||||
pipeline->device->physical_device->rad_info.chip_class >= GFX10)
|
||||
pipeline->device->physical_device->rad_info.gfx_level >= GFX10)
|
||||
return;
|
||||
|
||||
unsigned vtx_reuse_depth = 30;
|
||||
|
@ -6191,10 +6194,10 @@ radv_pipeline_generate_vgt_shader_config(struct radeon_cmdbuf *ctx_cs,
|
|||
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
|
||||
}
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX9)
|
||||
stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
|
||||
|
||||
if (radv_pipeline_has_tess(pipeline))
|
||||
|
@ -6414,11 +6417,11 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline,
|
|||
radv_pipeline_generate_cliprect_rule(ctx_cs, pCreateInfo);
|
||||
radv_pipeline_generate_vgt_gs_out(ctx_cs, pipeline, vgt_gs_out_prim_type);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10 &&
|
||||
!radv_pipeline_has_ngg(pipeline))
|
||||
gfx10_pipeline_generate_ge_cntl(ctx_cs, pipeline);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) {
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10_3) {
|
||||
gfx103_pipeline_generate_vgt_draw_payload_cntl(ctx_cs, pipeline, pCreateInfo);
|
||||
gfx103_pipeline_generate_vrs_state(ctx_cs, pipeline, pCreateInfo);
|
||||
}
|
||||
|
@ -6501,7 +6504,7 @@ radv_pipeline_init_shader_stages_state(struct radv_pipeline *pipeline)
|
|||
if (shader_exists || i < MESA_SHADER_COMPUTE) {
|
||||
/* We need this info for some stages even when the shader doesn't exist. */
|
||||
pipeline->user_data_0[i] = radv_pipeline_stage_to_user_data_0(
|
||||
pipeline, i, device->physical_device->rad_info.chip_class);
|
||||
pipeline, i, device->physical_device->rad_info.gfx_level);
|
||||
|
||||
if (shader_exists)
|
||||
pipeline->need_indirect_descriptor_sets |=
|
||||
|
@ -6663,7 +6666,7 @@ radv_graphics_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *
|
|||
struct radv_depth_stencil_state ds_state =
|
||||
radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
|
||||
if (pipeline->device->physical_device->rad_info.gfx_level >= GFX10_3)
|
||||
gfx103_pipeline_init_vrs_state(pipeline, pCreateInfo);
|
||||
|
||||
/* Ensure that some export memory is always allocated, for two reasons:
|
||||
|
@ -6682,8 +6685,7 @@ radv_graphics_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *
|
|||
* instructions if any are present.
|
||||
*/
|
||||
struct radv_shader *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
|
||||
if ((pipeline->device->physical_device->rad_info.chip_class <= GFX9 ||
|
||||
ps->info.ps.can_discard) &&
|
||||
if ((pipeline->device->physical_device->rad_info.gfx_level <= GFX9 || ps->info.ps.can_discard) &&
|
||||
!blend.spi_shader_col_format) {
|
||||
if (!ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask)
|
||||
blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
|
||||
|
@ -6867,7 +6869,7 @@ radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs, const struct radv_pipelin
|
|||
radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
|
||||
radeon_emit(cs, shader->config.rsrc1);
|
||||
radeon_emit(cs, shader->config.rsrc2);
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
|
||||
}
|
||||
}
|
||||
|
@ -6887,7 +6889,7 @@ radv_pipeline_generate_compute_state(struct radeon_cmdbuf *cs, const struct radv
|
|||
shader->info.cs.block_size[0] * shader->info.cs.block_size[1] * shader->info.cs.block_size[2];
|
||||
waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, shader->info.wave_size);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10 && waves_per_threadgroup == 1)
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10 && waves_per_threadgroup == 1)
|
||||
threadgroups_per_cu = 2;
|
||||
|
||||
radeon_set_sh_reg(
|
||||
|
@ -6907,7 +6909,7 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
|
|||
struct radv_device *device = pipeline->device;
|
||||
struct radeon_cmdbuf *cs = &pipeline->cs;
|
||||
|
||||
cs->max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 19 : 16;
|
||||
cs->max_dw = device->physical_device->rad_info.gfx_level >= GFX10 ? 19 : 16;
|
||||
cs->buf = malloc(cs->max_dw * 4);
|
||||
|
||||
radv_pipeline_generate_hw_cs(cs, pipeline);
|
||||
|
@ -6979,7 +6981,7 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
|
|||
}
|
||||
|
||||
pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(
|
||||
pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
|
||||
pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.gfx_level);
|
||||
pipeline->need_indirect_descriptor_sets |=
|
||||
radv_shader_need_indirect_descriptor_sets(pipeline, MESA_SHADER_COMPUTE);
|
||||
radv_pipeline_init_scratch(device, pipeline);
|
||||
|
@ -7184,8 +7186,8 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device,
|
|||
struct radv_shader *shader =
|
||||
radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
|
||||
|
||||
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
|
||||
unsigned lds_increment = chip_class >= GFX7 ? 512 : 256;
|
||||
enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
|
||||
unsigned lds_increment = gfx_level >= GFX7 ? 512 : 256;
|
||||
unsigned max_waves = radv_get_max_waves(device, shader, stage);
|
||||
|
||||
VkPipelineExecutableStatisticKHR *s = pStatistics;
|
||||
|
|
|
@ -1598,14 +1598,14 @@ uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool inst
|
|||
bool indirect_draw, bool count_from_stream_output,
|
||||
uint32_t draw_vertex_count, unsigned topology,
|
||||
bool prim_restart_enable);
|
||||
void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class, bool is_mec,
|
||||
void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec,
|
||||
unsigned event, unsigned event_flags, unsigned dst_sel,
|
||||
unsigned data_sel, uint64_t va, uint32_t new_fence,
|
||||
uint64_t gfx9_eop_bug_va);
|
||||
|
||||
void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref,
|
||||
uint32_t mask);
|
||||
void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class,
|
||||
void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
uint32_t *fence_ptr, uint64_t va, bool is_mec,
|
||||
enum radv_cmd_flush_bits flush_bits,
|
||||
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
|
||||
|
@ -2283,7 +2283,7 @@ radv_image_is_tc_compat_htile(const struct radv_image *image)
|
|||
static inline bool
|
||||
radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
return !vk_format_has_stencil(image->vk_format) && !radv_image_has_vrs_htile(device, image);
|
||||
} else {
|
||||
/* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
|
||||
|
@ -2392,11 +2392,10 @@ static inline bool
|
|||
radv_image_get_iterate256(struct radv_device *device, struct radv_image *image)
|
||||
{
|
||||
/* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
|
||||
return device->physical_device->rad_info.chip_class >= GFX10 &&
|
||||
(image->usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
|
||||
radv_image_is_tc_compat_htile(image) &&
|
||||
image->info.samples > 1;
|
||||
return device->physical_device->rad_info.gfx_level >= GFX10 &&
|
||||
(image->usage &
|
||||
(VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
|
||||
radv_image_is_tc_compat_htile(image) && image->info.samples > 1;
|
||||
}
|
||||
|
||||
unsigned radv_image_queue_family_mask(const struct radv_image *image,
|
||||
|
@ -2968,8 +2967,8 @@ radv_use_llvm_for_stage(struct radv_device *device, UNUSED gl_shader_stage stage
|
|||
static inline bool
|
||||
radv_has_shader_buffer_float_minmax(const struct radv_physical_device *pdevice)
|
||||
{
|
||||
return (pdevice->rad_info.chip_class <= GFX7 && !pdevice->use_llvm) ||
|
||||
pdevice->rad_info.chip_class >= GFX10;
|
||||
return (pdevice->rad_info.gfx_level <= GFX7 && !pdevice->use_llvm) ||
|
||||
pdevice->rad_info.gfx_level >= GFX10;
|
||||
}
|
||||
|
||||
struct radv_acceleration_structure {
|
||||
|
|
|
@ -1540,7 +1540,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
|||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS,
|
||||
0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
|
@ -1580,7 +1580,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
|||
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 |
|
||||
RADV_CMD_FLAG_INV_VCACHE;
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
cmd_buffer->active_query_flush_bits |=
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
|
||||
}
|
||||
|
@ -1681,7 +1681,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
|
|||
radeon_emit(cs, query_va);
|
||||
radeon_emit(cs, query_va >> 32);
|
||||
} else {
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
mec, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
|
||||
EOP_DATA_SEL_TIMESTAMP, query_va, 0,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
|
@ -1692,7 +1692,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
|
|||
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 |
|
||||
RADV_CMD_FLAG_INV_VCACHE;
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
cmd_buffer->active_query_flush_bits |=
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ bool
|
|||
radv_emulate_rt(const struct radv_physical_device *pdevice)
|
||||
{
|
||||
assert(radv_enable_rt(pdevice));
|
||||
return pdevice->rad_info.chip_class < GFX10_3 ||
|
||||
return pdevice->rad_info.gfx_level < GFX10_3 ||
|
||||
(pdevice->instance->perftest_flags & RADV_PERFTEST_FORCE_EMULATE_RT);
|
||||
}
|
||||
|
||||
|
@ -348,7 +348,7 @@ build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node
|
|||
addr = nir_ishl_imm(b, addr, 3);
|
||||
/* Assumes everything is in the top half of address space, which is true in
|
||||
* GFX9+ for now. */
|
||||
return device->physical_device->rad_info.chip_class >= GFX9
|
||||
return device->physical_device->rad_info.gfx_level >= GFX9
|
||||
? nir_ior_imm(b, addr, 0xffffull << 48)
|
||||
: addr;
|
||||
}
|
||||
|
|
|
@ -121,7 +121,7 @@ radv_sdma_v4_v5_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct
|
|||
unsigned linear_slice_pitch = region->bufferRowLength * copy_height;
|
||||
uint64_t tiled_address = src_address;
|
||||
uint64_t linear_address = dst_address;
|
||||
bool is_v5 = device->physical_device->rad_info.chip_class >= GFX10;
|
||||
bool is_v5 = device->physical_device->rad_info.gfx_level >= GFX10;
|
||||
/* Only SDMA 5 supports DCC with SDMA */
|
||||
bool dcc = radv_dcc_enabled(image, 0) && is_v5;
|
||||
|
||||
|
@ -191,6 +191,6 @@ bool
|
|||
radv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
|
||||
struct radv_buffer *buffer, const VkBufferImageCopy2 *region)
|
||||
{
|
||||
assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9);
|
||||
assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9);
|
||||
return radv_sdma_v4_v5_copy_image_to_buffer(cmd_buffer, image, buffer, region);
|
||||
}
|
||||
|
|
|
@ -79,13 +79,13 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s
|
|||
.lower_unpack_unorm_2x16 = true,
|
||||
.lower_unpack_unorm_4x8 = true,
|
||||
.lower_unpack_half_2x16 = true,
|
||||
.lower_ffma16 = split_fma || device->rad_info.chip_class < GFX9,
|
||||
.lower_ffma32 = split_fma || device->rad_info.chip_class < GFX10_3,
|
||||
.lower_ffma16 = split_fma || device->rad_info.gfx_level < GFX9,
|
||||
.lower_ffma32 = split_fma || device->rad_info.gfx_level < GFX10_3,
|
||||
.lower_ffma64 = split_fma,
|
||||
.lower_fpow = true,
|
||||
.lower_mul_2x32_64 = true,
|
||||
.lower_rotate = true,
|
||||
.lower_iadd_sat = device->rad_info.chip_class <= GFX8,
|
||||
.lower_iadd_sat = device->rad_info.gfx_level <= GFX8,
|
||||
.has_fsub = true,
|
||||
.has_isub = true,
|
||||
.has_sdot_4x8 = device->rad_info.has_accelerated_dot_product,
|
||||
|
@ -654,7 +654,7 @@ radv_shader_compile_to_nir(struct radv_device *device, const struct radv_pipelin
|
|||
.variable_pointers = true,
|
||||
.vk_memory_model = true,
|
||||
.vk_memory_model_device_scope = true,
|
||||
.fragment_shading_rate = device->physical_device->rad_info.chip_class >= GFX10_3,
|
||||
.fragment_shading_rate = device->physical_device->rad_info.gfx_level >= GFX10_3,
|
||||
.workgroup_memory_explicit_layout = true,
|
||||
},
|
||||
.ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
|
||||
|
@ -747,7 +747,7 @@ radv_shader_compile_to_nir(struct radv_device *device, const struct radv_pipelin
|
|||
|
||||
nir_lower_doubles_options lower_doubles = nir->options->lower_doubles_options;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class == GFX6) {
|
||||
if (device->physical_device->rad_info.gfx_level == GFX6) {
|
||||
/* GFX6 doesn't support v_floor_f64 and the precision
|
||||
* of v_fract_f64 which is used to implement 64-bit
|
||||
* floor is less than what Vulkan requires.
|
||||
|
@ -824,7 +824,7 @@ radv_shader_compile_to_nir(struct radv_device *device, const struct radv_pipelin
|
|||
|
||||
nir_lower_global_vars_to_local(nir);
|
||||
nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
|
||||
bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
|
||||
bool gfx7minus = device->physical_device->rad_info.gfx_level <= GFX7;
|
||||
nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options){
|
||||
.subgroup_size = subgroup_size,
|
||||
.ballot_bit_size = ballot_bit_size,
|
||||
|
@ -916,7 +916,7 @@ radv_shader_compile_to_nir(struct radv_device *device, const struct radv_pipelin
|
|||
* bloat the instruction count of the loop and cause it to be
|
||||
* considered too large for unrolling.
|
||||
*/
|
||||
if (ac_nir_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) &&
|
||||
if (ac_nir_lower_indirect_derefs(nir, device->physical_device->rad_info.gfx_level) &&
|
||||
!key->optimisations_disabled && nir->info.stage != MESA_SHADER_COMPUTE) {
|
||||
/* Optimize the lowered code before the linking optimizations. */
|
||||
radv_optimize_nir(nir, false, false);
|
||||
|
@ -1023,14 +1023,14 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta
|
|||
info->vs.num_linked_outputs);
|
||||
return true;
|
||||
} else if (info->vs.as_es) {
|
||||
ac_nir_lower_es_outputs_to_mem(nir, device->physical_device->rad_info.chip_class,
|
||||
ac_nir_lower_es_outputs_to_mem(nir, device->physical_device->rad_info.gfx_level,
|
||||
info->vs.num_linked_outputs);
|
||||
return true;
|
||||
}
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
ac_nir_lower_hs_inputs_to_mem(nir, info->vs.tcs_in_out_eq, info->tcs.num_linked_inputs);
|
||||
ac_nir_lower_hs_outputs_to_mem(
|
||||
nir, device->physical_device->rad_info.chip_class, info->tcs.tes_reads_tess_factors,
|
||||
nir, device->physical_device->rad_info.gfx_level, info->tcs.tes_reads_tess_factors,
|
||||
info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->tcs.num_linked_inputs,
|
||||
info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, true);
|
||||
|
||||
|
@ -1040,13 +1040,13 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta
|
|||
info->tes.num_linked_patch_inputs);
|
||||
|
||||
if (info->tes.as_es) {
|
||||
ac_nir_lower_es_outputs_to_mem(nir, device->physical_device->rad_info.chip_class,
|
||||
ac_nir_lower_es_outputs_to_mem(nir, device->physical_device->rad_info.gfx_level,
|
||||
info->tes.num_linked_outputs);
|
||||
}
|
||||
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
ac_nir_lower_gs_inputs_to_mem(nir, device->physical_device->rad_info.chip_class,
|
||||
ac_nir_lower_gs_inputs_to_mem(nir, device->physical_device->rad_info.gfx_level,
|
||||
info->gs.num_linked_inputs);
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TASK) {
|
||||
|
@ -1484,7 +1484,7 @@ static bool
|
|||
radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage,
|
||||
const struct radv_shader_info *info)
|
||||
{
|
||||
enum chip_class chip = device->physical_device->rad_info.chip_class;
|
||||
enum amd_gfx_level chip = device->physical_device->rad_info.gfx_level;
|
||||
switch (stage) {
|
||||
case MESA_SHADER_COMPUTE:
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
|
@ -1520,8 +1520,8 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
|
|||
unsigned num_sgprs = MAX2(config_in->num_sgprs, args->ac.num_sgprs_used + 2 + 3);
|
||||
unsigned num_shared_vgprs = config_in->num_shared_vgprs;
|
||||
/* shared VGPRs are introduced in Navi and are allocated in blocks of 8 (RDNA ref 3.6.5) */
|
||||
assert((pdevice->rad_info.chip_class >= GFX10 && num_shared_vgprs % 8 == 0) ||
|
||||
(pdevice->rad_info.chip_class < GFX10 && num_shared_vgprs == 0));
|
||||
assert((pdevice->rad_info.gfx_level >= GFX10 && num_shared_vgprs % 8 == 0) ||
|
||||
(pdevice->rad_info.gfx_level < GFX10 && num_shared_vgprs == 0));
|
||||
unsigned num_shared_vgpr_blocks = num_shared_vgprs / 8;
|
||||
unsigned excp_en = 0;
|
||||
|
||||
|
@ -1550,7 +1550,7 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
|
|||
config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 ? 8 : 4)) |
|
||||
S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(config_out->float_mode);
|
||||
|
||||
if (pdevice->rad_info.chip_class >= GFX10) {
|
||||
if (pdevice->rad_info.gfx_level >= GFX10) {
|
||||
config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(args->num_user_sgprs >> 5);
|
||||
} else {
|
||||
config_out->rsrc1 |= S_00B228_SGPRS((num_sgprs - 1) / 8);
|
||||
|
@ -1562,10 +1562,10 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
|
|||
switch (stage) {
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
if (info->is_ngg) {
|
||||
config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
|
||||
config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10);
|
||||
config_out->rsrc2 |= S_00B22C_OC_LDS_EN(1) | S_00B22C_EXCP_EN(excp_en);
|
||||
} else if (info->tes.as_es) {
|
||||
assert(pdevice->rad_info.chip_class <= GFX8);
|
||||
assert(pdevice->rad_info.gfx_level <= GFX8);
|
||||
vgpr_comp_cnt = info->uses_prim_id ? 3 : 2;
|
||||
|
||||
config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
|
||||
|
@ -1573,21 +1573,21 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
|
|||
bool enable_prim_id = info->tes.outinfo.export_prim_id || info->uses_prim_id;
|
||||
vgpr_comp_cnt = enable_prim_id ? 3 : 2;
|
||||
|
||||
config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
|
||||
config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10);
|
||||
config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
|
||||
}
|
||||
config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
if (pdevice->rad_info.chip_class >= GFX9) {
|
||||
if (pdevice->rad_info.gfx_level >= GFX9) {
|
||||
/* We need at least 2 components for LS.
|
||||
* VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
|
||||
* StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
|
||||
*/
|
||||
if (pdevice->rad_info.chip_class >= GFX10) {
|
||||
if (pdevice->rad_info.gfx_level >= GFX10) {
|
||||
if (info->vs.needs_instance_id) {
|
||||
vgpr_comp_cnt = 3;
|
||||
} else if (pdevice->rad_info.chip_class <= GFX10_3) {
|
||||
} else if (pdevice->rad_info.gfx_level <= GFX10_3) {
|
||||
vgpr_comp_cnt = 1;
|
||||
}
|
||||
config_out->rsrc2 |=
|
||||
|
@ -1601,21 +1601,21 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
|
|||
config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
|
||||
}
|
||||
config_out->rsrc1 |=
|
||||
S_00B428_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) | S_00B428_WGP_MODE(wgp_mode);
|
||||
S_00B428_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10) | S_00B428_WGP_MODE(wgp_mode);
|
||||
config_out->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
|
||||
break;
|
||||
case MESA_SHADER_VERTEX:
|
||||
if (info->is_ngg) {
|
||||
config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
|
||||
config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10);
|
||||
} else if (info->vs.as_ls) {
|
||||
assert(pdevice->rad_info.chip_class <= GFX8);
|
||||
assert(pdevice->rad_info.gfx_level <= GFX8);
|
||||
/* We need at least 2 components for LS.
|
||||
* VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
|
||||
* StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
|
||||
*/
|
||||
vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1;
|
||||
} else if (info->vs.as_es) {
|
||||
assert(pdevice->rad_info.chip_class <= GFX8);
|
||||
assert(pdevice->rad_info.gfx_level <= GFX8);
|
||||
/* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
|
||||
vgpr_comp_cnt = info->vs.needs_instance_id ? 1 : 0;
|
||||
} else {
|
||||
|
@ -1623,7 +1623,7 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
|
|||
* If PrimID is disabled. InstanceID / StepRate1 is loaded instead.
|
||||
* StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
|
||||
*/
|
||||
if (info->vs.needs_instance_id && pdevice->rad_info.chip_class >= GFX10) {
|
||||
if (info->vs.needs_instance_id && pdevice->rad_info.gfx_level >= GFX10) {
|
||||
vgpr_comp_cnt = 3;
|
||||
} else if (info->vs.outinfo.export_prim_id) {
|
||||
vgpr_comp_cnt = 2;
|
||||
|
@ -1633,7 +1633,7 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
|
|||
vgpr_comp_cnt = 0;
|
||||
}
|
||||
|
||||
config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
|
||||
config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10);
|
||||
}
|
||||
config_out->rsrc2 |=
|
||||
S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | S_00B12C_EXCP_EN(excp_en);
|
||||
|
@ -1644,19 +1644,19 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
|
|||
S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | S_00B12C_EXCP_EN(excp_en);
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
config_out->rsrc1 |= S_00B028_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
|
||||
config_out->rsrc1 |= S_00B028_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10);
|
||||
config_out->rsrc2 |= S_00B02C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) |
|
||||
S_00B02C_EXCP_EN(excp_en);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
|
||||
config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10);
|
||||
config_out->rsrc2 |=
|
||||
S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | S_00B22C_EXCP_EN(excp_en);
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
case MESA_SHADER_TASK:
|
||||
config_out->rsrc1 |=
|
||||
S_00B848_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) | S_00B848_WGP_MODE(wgp_mode);
|
||||
S_00B848_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10) | S_00B848_WGP_MODE(wgp_mode);
|
||||
config_out->rsrc2 |= S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) |
|
||||
S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
|
||||
S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) |
|
||||
|
@ -1673,7 +1673,7 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
|
|||
break;
|
||||
}
|
||||
|
||||
if (pdevice->rad_info.chip_class >= GFX10 && info->is_ngg &&
|
||||
if (pdevice->rad_info.gfx_level >= GFX10 && info->is_ngg &&
|
||||
(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL ||
|
||||
stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_MESH)) {
|
||||
unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
|
||||
|
@ -1727,14 +1727,14 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
|
|||
config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
|
||||
S_00B22C_LDS_SIZE(config_in->lds_size) |
|
||||
S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
|
||||
} else if (pdevice->rad_info.chip_class >= GFX9 && stage == MESA_SHADER_GEOMETRY) {
|
||||
} else if (pdevice->rad_info.gfx_level >= GFX9 && stage == MESA_SHADER_GEOMETRY) {
|
||||
unsigned es_type = info->gs.es_type;
|
||||
unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
|
||||
|
||||
if (es_type == MESA_SHADER_VERTEX) {
|
||||
/* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
|
||||
if (info->vs.needs_instance_id) {
|
||||
es_vgpr_comp_cnt = pdevice->rad_info.chip_class >= GFX10 ? 3 : 1;
|
||||
es_vgpr_comp_cnt = pdevice->rad_info.gfx_level >= GFX10 ? 3 : 1;
|
||||
} else {
|
||||
es_vgpr_comp_cnt = 0;
|
||||
}
|
||||
|
@ -1761,7 +1761,7 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
|
|||
S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode);
|
||||
config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
|
||||
S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL);
|
||||
} else if (pdevice->rad_info.chip_class >= GFX9 && stage == MESA_SHADER_TESS_CTRL) {
|
||||
} else if (pdevice->rad_info.gfx_level >= GFX9 && stage == MESA_SHADER_TESS_CTRL) {
|
||||
config_out->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt);
|
||||
} else {
|
||||
config_out->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
|
||||
|
@ -1777,9 +1777,9 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader *shad
|
|||
struct ac_rtld_symbol lds_symbols[2];
|
||||
unsigned num_lds_symbols = 0;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9 &&
|
||||
(binary->stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg) &&
|
||||
!binary->is_gs_copy_shader) {
|
||||
!binary->is_gs_copy_shader) {
|
||||
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
|
||||
sym->name = "esgs_ring";
|
||||
sym->size = binary->info.ngg_info.esgs_ring_size;
|
||||
|
@ -1986,7 +1986,7 @@ shader_compile(struct radv_device *device, struct nir_shader *const *shaders, in
|
|||
};
|
||||
|
||||
options->family = chip_family;
|
||||
options->chip_class = device->physical_device->rad_info.chip_class;
|
||||
options->gfx_level = device->physical_device->rad_info.gfx_level;
|
||||
options->info = &device->physical_device->rad_info;
|
||||
options->dump_shader = radv_can_dump_shader(device, shaders[0], gs_copy_shader || trap_handler_shader);
|
||||
options->dump_preoptir =
|
||||
|
@ -2100,7 +2100,7 @@ radv_create_trap_handler_shader(struct radv_device *device)
|
|||
struct radv_shader_args args;
|
||||
args.explicit_scratch_args = true;
|
||||
args.is_trap_handler_shader = true;
|
||||
radv_declare_shader_args(device->physical_device->rad_info.chip_class, &key, &info,
|
||||
radv_declare_shader_args(device->physical_device->rad_info.gfx_level, &key, &info,
|
||||
MESA_SHADER_COMPUTE, false, MESA_SHADER_VERTEX, &args);
|
||||
|
||||
shader = shader_compile(device, &b.shader, 1, MESA_SHADER_COMPUTE, &info, &args, &options,
|
||||
|
@ -2174,7 +2174,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke
|
|||
struct radv_shader_args args = {0};
|
||||
struct radv_nir_compiler_options options = {0};
|
||||
options.family = device->physical_device->rad_info.family;
|
||||
options.chip_class = device->physical_device->rad_info.chip_class;
|
||||
options.gfx_level = device->physical_device->rad_info.gfx_level;
|
||||
options.info = &device->physical_device->rad_info;
|
||||
options.address32_hi = device->physical_device->rad_info.address32_hi;
|
||||
options.dump_shader = device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS;
|
||||
|
@ -2194,7 +2194,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke
|
|||
struct radv_pipeline_key pipeline_key = {0};
|
||||
|
||||
args.explicit_scratch_args = true;
|
||||
radv_declare_shader_args(options.chip_class, &pipeline_key, &info, key->next_stage,
|
||||
radv_declare_shader_args(options.gfx_level, &pipeline_key, &info, key->next_stage,
|
||||
key->next_stage != MESA_SHADER_VERTEX, MESA_SHADER_VERTEX, &args);
|
||||
|
||||
info.user_sgprs_locs = args.user_sgprs_locs;
|
||||
|
@ -2338,7 +2338,7 @@ radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader,
|
|||
gl_shader_stage stage)
|
||||
{
|
||||
struct radeon_info *info = &device->physical_device->rad_info;
|
||||
enum chip_class chip_class = info->chip_class;
|
||||
enum amd_gfx_level gfx_level = info->gfx_level;
|
||||
uint8_t wave_size = shader->info.wave_size;
|
||||
struct ac_shader_config *conf = &shader->config;
|
||||
unsigned max_simd_waves;
|
||||
|
@ -2357,28 +2357,28 @@ radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader,
|
|||
lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size);
|
||||
}
|
||||
|
||||
if (conf->num_sgprs && chip_class < GFX10) {
|
||||
unsigned sgprs = align(conf->num_sgprs, chip_class >= GFX8 ? 16 : 8);
|
||||
if (conf->num_sgprs && gfx_level < GFX10) {
|
||||
unsigned sgprs = align(conf->num_sgprs, gfx_level >= GFX8 ? 16 : 8);
|
||||
max_simd_waves = MIN2(max_simd_waves, info->num_physical_sgprs_per_simd / sgprs);
|
||||
}
|
||||
|
||||
if (conf->num_vgprs) {
|
||||
unsigned physical_vgprs = info->num_physical_wave64_vgprs_per_simd * (64 / wave_size);
|
||||
unsigned vgprs = align(conf->num_vgprs, wave_size == 32 ? 8 : 4);
|
||||
if (chip_class >= GFX10_3)
|
||||
if (gfx_level >= GFX10_3)
|
||||
vgprs = align(vgprs, wave_size == 32 ? 16 : 8);
|
||||
max_simd_waves = MIN2(max_simd_waves, physical_vgprs / vgprs);
|
||||
}
|
||||
|
||||
unsigned simd_per_workgroup = info->num_simd_per_compute_unit;
|
||||
if (chip_class >= GFX10)
|
||||
if (gfx_level >= GFX10)
|
||||
simd_per_workgroup *= 2; /* like lds_size_per_workgroup, assume WGP on GFX10+ */
|
||||
|
||||
unsigned max_lds_per_simd = info->lds_size_per_workgroup / simd_per_workgroup;
|
||||
if (lds_per_wave)
|
||||
max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_lds_per_simd, lds_per_wave));
|
||||
|
||||
return chip_class >= GFX10 ? max_simd_waves * (wave_size / 32) : max_simd_waves;
|
||||
return gfx_level >= GFX10 ? max_simd_waves * (wave_size / 32) : max_simd_waves;
|
||||
}
|
||||
|
||||
unsigned
|
||||
|
|
|
@ -129,7 +129,7 @@ struct radv_nir_compiler_options {
|
|||
uint8_t enable_mrt_output_nan_fixup;
|
||||
bool wgp_mode;
|
||||
enum radeon_family family;
|
||||
enum chip_class chip_class;
|
||||
enum amd_gfx_level gfx_level;
|
||||
const struct radeon_info *info;
|
||||
uint32_t address32_hi;
|
||||
|
||||
|
@ -484,7 +484,7 @@ struct radv_shader {
|
|||
uint32_t exec_size;
|
||||
struct radv_shader_info info;
|
||||
|
||||
/* debug only */
|
||||
/* debug only */
|
||||
char *spirv;
|
||||
uint32_t spirv_size;
|
||||
char *nir_string;
|
||||
|
@ -528,9 +528,8 @@ nir_shader *radv_shader_compile_to_nir(struct radv_device *device,
|
|||
const struct radv_pipeline_stage *stage,
|
||||
const struct radv_pipeline_key *key);
|
||||
|
||||
void radv_nir_lower_abi(nir_shader *shader, enum chip_class chip_class,
|
||||
const struct radv_shader_info *info,
|
||||
const struct radv_shader_args *args,
|
||||
void radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level,
|
||||
const struct radv_shader_info *info, const struct radv_shader_args *args,
|
||||
const struct radv_pipeline_key *pl_key);
|
||||
|
||||
void radv_init_shader_arenas(struct radv_device *device);
|
||||
|
@ -601,7 +600,7 @@ VkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline
|
|||
gl_shader_stage stage, FILE *output);
|
||||
|
||||
static inline unsigned
|
||||
calculate_tess_lds_size(enum chip_class chip_class, unsigned tcs_num_input_vertices,
|
||||
calculate_tess_lds_size(enum amd_gfx_level gfx_level, unsigned tcs_num_input_vertices,
|
||||
unsigned tcs_num_output_vertices, unsigned tcs_num_inputs,
|
||||
unsigned tcs_num_patches, unsigned tcs_num_outputs,
|
||||
unsigned tcs_num_patch_outputs)
|
||||
|
@ -618,7 +617,7 @@ calculate_tess_lds_size(enum chip_class chip_class, unsigned tcs_num_input_verti
|
|||
|
||||
unsigned lds_size = output_patch0_offset + output_patch_size * tcs_num_patches;
|
||||
|
||||
if (chip_class >= GFX7) {
|
||||
if (gfx_level >= GFX7) {
|
||||
assert(lds_size <= 65536);
|
||||
lds_size = align(lds_size, 512) / 512;
|
||||
} else {
|
||||
|
@ -633,7 +632,7 @@ static inline unsigned
|
|||
get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices,
|
||||
unsigned tcs_num_inputs, unsigned tcs_num_outputs,
|
||||
unsigned tcs_num_patch_outputs, unsigned tess_offchip_block_dw_size,
|
||||
enum chip_class chip_class, enum radeon_family family)
|
||||
enum amd_gfx_level gfx_level, enum radeon_family family)
|
||||
{
|
||||
uint32_t input_vertex_size = tcs_num_inputs * 16;
|
||||
uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size;
|
||||
|
@ -656,7 +655,7 @@ get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_ver
|
|||
*
|
||||
* Test: dEQP-VK.tessellation.shader_input_output.barrier
|
||||
*/
|
||||
if (chip_class >= GFX7 && family != CHIP_STONEY)
|
||||
if (gfx_level >= GFX7 && family != CHIP_STONEY)
|
||||
hardware_lds_size = 65536;
|
||||
|
||||
if (input_patch_size + output_patch_size)
|
||||
|
@ -670,7 +669,7 @@ get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_ver
|
|||
num_patches = MIN2(num_patches, 40);
|
||||
|
||||
/* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
|
||||
if (chip_class == GFX6) {
|
||||
if (gfx_level == GFX6) {
|
||||
unsigned one_wave = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices);
|
||||
num_patches = MIN2(num_patches, one_wave);
|
||||
}
|
||||
|
|
|
@ -148,7 +148,7 @@ allocate_inline_push_consts(const struct radv_shader_info *info,
|
|||
}
|
||||
|
||||
static void
|
||||
allocate_user_sgprs(enum chip_class chip_class, const struct radv_shader_info *info,
|
||||
allocate_user_sgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
|
||||
struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage,
|
||||
gl_shader_stage previous_stage, bool needs_view_index, bool has_api_gs,
|
||||
struct user_sgpr_info *user_sgpr_info)
|
||||
|
@ -224,7 +224,7 @@ allocate_user_sgprs(enum chip_class chip_class, const struct radv_shader_info *i
|
|||
user_sgpr_count++;
|
||||
|
||||
uint32_t available_sgprs =
|
||||
chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
|
||||
gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
|
||||
uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
|
||||
uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask);
|
||||
|
||||
|
@ -295,18 +295,18 @@ declare_vs_specific_input_sgprs(const struct radv_shader_info *info, struct radv
|
|||
}
|
||||
|
||||
static void
|
||||
declare_vs_input_vgprs(enum chip_class chip_class, const struct radv_shader_info *info,
|
||||
declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info *info,
|
||||
struct radv_shader_args *args)
|
||||
{
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
|
||||
if (!args->is_gs_copy_shader) {
|
||||
if (info->vs.as_ls) {
|
||||
|
||||
if (chip_class >= GFX11) {
|
||||
if (gfx_level >= GFX11) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
|
||||
} else if (chip_class >= GFX10) {
|
||||
} else if (gfx_level >= GFX10) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
|
||||
|
@ -316,7 +316,7 @@ declare_vs_input_vgprs(enum chip_class chip_class, const struct radv_shader_info
|
|||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
|
||||
}
|
||||
} else {
|
||||
if (chip_class >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
if (info->is_ngg) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
|
||||
|
@ -524,7 +524,7 @@ set_ms_input_locs(struct radv_shader_args *args, uint8_t *user_sgpr_idx)
|
|||
}
|
||||
|
||||
void
|
||||
radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_key *key,
|
||||
radv_declare_shader_args(enum amd_gfx_level gfx_level, const struct radv_pipeline_key *key,
|
||||
const struct radv_shader_info *info, gl_shader_stage stage,
|
||||
bool has_previous_stage, gl_shader_stage previous_stage,
|
||||
struct radv_shader_args *args)
|
||||
|
@ -533,7 +533,7 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
|
|||
bool needs_view_index = info->uses_view_index;
|
||||
bool has_api_gs = stage == MESA_SHADER_GEOMETRY;
|
||||
|
||||
if (chip_class >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) {
|
||||
if (gfx_level >= GFX10 && info->is_ngg && stage != MESA_SHADER_GEOMETRY) {
|
||||
/* Handle all NGG shaders as GS to simplify the code here. */
|
||||
previous_stage = stage;
|
||||
stage = MESA_SHADER_GEOMETRY;
|
||||
|
@ -545,7 +545,7 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
|
|||
for (int i = 0; i < AC_UD_MAX_UD; i++)
|
||||
args->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
|
||||
|
||||
allocate_user_sgprs(chip_class, info, args, stage, has_previous_stage, previous_stage,
|
||||
allocate_user_sgprs(gfx_level, info, args, stage, has_previous_stage, previous_stage,
|
||||
needs_view_index, has_api_gs, &user_sgpr_info);
|
||||
|
||||
if (args->explicit_scratch_args) {
|
||||
|
@ -603,7 +603,7 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
|
|||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
||||
}
|
||||
|
||||
if (chip_class >= GFX11)
|
||||
if (gfx_level >= GFX11)
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
|
||||
else
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
|
||||
|
@ -636,7 +636,7 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
|
|||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
||||
}
|
||||
|
||||
declare_vs_input_vgprs(chip_class, info, args);
|
||||
declare_vs_input_vgprs(gfx_level, info, args);
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
if (has_previous_stage) {
|
||||
|
@ -645,7 +645,7 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
|
|||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
|
||||
|
||||
if (chip_class >= GFX11) {
|
||||
if (gfx_level >= GFX11) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_wave_id);
|
||||
} else {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
||||
|
@ -665,7 +665,7 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
|
|||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
|
||||
|
||||
declare_vs_input_vgprs(chip_class, info, args);
|
||||
declare_vs_input_vgprs(gfx_level, info, args);
|
||||
} else {
|
||||
declare_global_input_sgprs(info, &user_sgpr_info, args);
|
||||
|
||||
|
@ -747,7 +747,7 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
|
|||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
|
||||
|
||||
if (previous_stage == MESA_SHADER_VERTEX) {
|
||||
declare_vs_input_vgprs(chip_class, info, args);
|
||||
declare_vs_input_vgprs(gfx_level, info, args);
|
||||
} else if (previous_stage == MESA_SHADER_TESS_EVAL) {
|
||||
declare_tes_input_vgprs(args);
|
||||
} else if (previous_stage == MESA_SHADER_MESH) {
|
||||
|
|
|
@ -76,7 +76,7 @@ radv_shader_args_from_ac(struct ac_shader_args *args)
|
|||
struct radv_pipeline_key;
|
||||
struct radv_shader_info;
|
||||
|
||||
void radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_key *key,
|
||||
void radv_declare_shader_args(enum amd_gfx_level gfx_level, const struct radv_pipeline_key *key,
|
||||
const struct radv_shader_info *info, gl_shader_stage stage,
|
||||
bool has_previous_stage, gl_shader_stage previous_stage,
|
||||
struct radv_shader_args *args);
|
||||
|
|
|
@ -200,7 +200,7 @@ radv_spm_init(struct radv_device *device)
|
|||
{GL1C, 0, 0xe}, /* Number of GL1C requests. */
|
||||
{GL1C, 0, 0x12}, /* Number of GL1C misses. */
|
||||
{GL2C, 0, 0x3}, /* Number of GL2C requests. */
|
||||
{GL2C, 0, info->chip_class >= GFX10_3 ? 0x2b : 0x23}, /* Number of GL2C misses. */
|
||||
{GL2C, 0, info->gfx_level >= GFX10_3 ? 0x2b : 0x23}, /* Number of GL2C misses. */
|
||||
};
|
||||
|
||||
if (!ac_init_perfcounters(info, false, false, pc))
|
||||
|
|
|
@ -51,7 +51,7 @@ gfx10_get_thread_trace_ctrl(struct radv_device *device, bool enable)
|
|||
S_008D1C_SPI_STALL_EN(1) | S_008D1C_SQ_STALL_EN(1) |
|
||||
S_008D1C_REG_DROP_ON_STALL(0);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class == GFX10_3)
|
||||
if (device->physical_device->rad_info.gfx_level == GFX10_3)
|
||||
thread_trace_ctrl |= S_008D1C_LOWATER_OFFSET(4);
|
||||
|
||||
if (device->physical_device->rad_info.has_sqtt_auto_flush_mode_bug)
|
||||
|
@ -65,8 +65,8 @@ radv_emit_wait_for_idle(struct radv_device *device, struct radeon_cmdbuf *cs, in
|
|||
{
|
||||
enum rgp_flush_bits sqtt_flush_bits = 0;
|
||||
si_cs_emit_cache_flush(
|
||||
cs, device->physical_device->rad_info.chip_class, NULL, 0,
|
||||
family == AMD_IP_COMPUTE && device->physical_device->rad_info.chip_class >= GFX7,
|
||||
cs, device->physical_device->rad_info.gfx_level, NULL, 0,
|
||||
family == AMD_IP_COMPUTE && device->physical_device->rad_info.gfx_level >= GFX7,
|
||||
(family == RADV_QUEUE_COMPUTE
|
||||
? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
|
||||
: (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
|
||||
|
@ -97,7 +97,7 @@ radv_emit_thread_trace_start(struct radv_device *device, struct radeon_cmdbuf *c
|
|||
cs, R_030800_GRBM_GFX_INDEX,
|
||||
S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
/* Order seems important for the following 2 registers. */
|
||||
radeon_set_privileged_config_reg(
|
||||
cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
|
||||
|
@ -149,7 +149,7 @@ radv_emit_thread_trace_start(struct radv_device *device, struct radeon_cmdbuf *c
|
|||
S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) |
|
||||
S_030CC8_SQ_STALL_EN(1);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class < GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level < GFX9) {
|
||||
thread_trace_mask |= S_030CC8_RANDOM_SEED(0xffff);
|
||||
}
|
||||
|
||||
|
@ -168,7 +168,7 @@ radv_emit_thread_trace_start(struct radv_device *device, struct radeon_cmdbuf *c
|
|||
|
||||
radeon_set_uconfig_reg(cs, R_030CEC_SQ_THREAD_TRACE_HIWATER, S_030CEC_HIWATER(4));
|
||||
|
||||
if (device->physical_device->rad_info.chip_class == GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
/* Reset thread trace status errors. */
|
||||
radeon_set_uconfig_reg(cs, R_030CE8_SQ_THREAD_TRACE_STATUS, S_030CE8_UTC_ERROR(0));
|
||||
}
|
||||
|
@ -180,7 +180,7 @@ radv_emit_thread_trace_start(struct radv_device *device, struct radeon_cmdbuf *c
|
|||
S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
|
||||
S_030CD8_MODE(1);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class == GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
/* Count SQTT traffic in TCC perf counters. */
|
||||
thread_trace_mode |= S_030CD8_TC_PERF_EN(1);
|
||||
}
|
||||
|
@ -227,12 +227,12 @@ radv_copy_thread_trace_info_regs(struct radv_device *device, struct radeon_cmdbu
|
|||
{
|
||||
const uint32_t *thread_trace_info_regs = NULL;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
thread_trace_info_regs = gfx10_thread_trace_info_regs;
|
||||
} else if (device->physical_device->rad_info.chip_class == GFX9) {
|
||||
} else if (device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
thread_trace_info_regs = gfx9_thread_trace_info_regs;
|
||||
} else {
|
||||
assert(device->physical_device->rad_info.chip_class == GFX8);
|
||||
assert(device->physical_device->rad_info.gfx_level == GFX8);
|
||||
thread_trace_info_regs = gfx8_thread_trace_info_regs;
|
||||
}
|
||||
|
||||
|
@ -283,7 +283,7 @@ radv_emit_thread_trace_stop(struct radv_device *device, struct radeon_cmdbuf *cs
|
|||
cs, R_030800_GRBM_GFX_INDEX,
|
||||
S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
if (!device->physical_device->rad_info.has_sqtt_rb_harvest_bug) {
|
||||
/* Make sure to wait for the trace buffer. */
|
||||
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
|
||||
|
@ -347,7 +347,7 @@ radv_emit_thread_trace_userdata(const struct radv_device *device, struct radeon_
|
|||
|
||||
/* Without the perfctr bit the CP might not always pass the
|
||||
* write on correctly. */
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10)
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10)
|
||||
radeon_set_uconfig_reg_seq_perfctr(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
|
||||
else
|
||||
radeon_set_uconfig_reg_seq(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
|
||||
|
@ -361,12 +361,12 @@ radv_emit_thread_trace_userdata(const struct radv_device *device, struct radeon_
|
|||
static void
|
||||
radv_emit_spi_config_cntl(struct radv_device *device, struct radeon_cmdbuf *cs, bool enable)
|
||||
{
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
uint32_t spi_config_cntl =
|
||||
S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) |
|
||||
S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10)
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10)
|
||||
spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3);
|
||||
|
||||
radeon_set_uconfig_reg(cs, R_031100_SPI_CONFIG_CNTL, spi_config_cntl);
|
||||
|
@ -381,10 +381,10 @@ radv_emit_spi_config_cntl(struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
static void
|
||||
radv_emit_inhibit_clockgating(struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit)
|
||||
{
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL,
|
||||
S_037390_PERFMON_CLOCK_STATE(inhibit));
|
||||
} else if (device->physical_device->rad_info.chip_class >= GFX8) {
|
||||
} else if (device->physical_device->rad_info.gfx_level >= GFX8) {
|
||||
radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL,
|
||||
S_0372FC_PERFMON_CLOCK_STATE(inhibit));
|
||||
}
|
||||
|
@ -704,7 +704,7 @@ radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_t
|
|||
thread_trace_se.shader_engine = se;
|
||||
|
||||
/* RGP seems to expect units of WGP on GFX10+. */
|
||||
thread_trace_se.compute_unit = device->physical_device->rad_info.chip_class >= GFX10
|
||||
thread_trace_se.compute_unit = device->physical_device->rad_info.gfx_level >= GFX10
|
||||
? (first_active_cu / 2)
|
||||
: first_active_cu;
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ radv_wsi_get_prime_blit_queue(VkDevice _device)
|
|||
if (device->private_sdma_queue != VK_NULL_HANDLE)
|
||||
return vk_queue_to_handle(&device->private_sdma_queue->vk);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9 &&
|
||||
!(device->physical_device->instance->debug_flags & RADV_DEBUG_NO_DMA_BLIT)) {
|
||||
|
||||
device->physical_device->vk_queue_to_radv[device->physical_device->num_queues++] = RADV_QUEUE_TRANSFER;
|
||||
|
@ -96,7 +96,7 @@ radv_init_wsi(struct radv_physical_device *physical_device)
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
physical_device->wsi_device.supports_modifiers = physical_device->rad_info.chip_class >= GFX9;
|
||||
physical_device->wsi_device.supports_modifiers = physical_device->rad_info.gfx_level >= GFX9;
|
||||
physical_device->wsi_device.set_memory_ownership = radv_wsi_set_memory_ownership;
|
||||
physical_device->wsi_device.get_buffer_blit_queue = radv_wsi_get_prime_blit_queue;
|
||||
physical_device->wsi_device.signal_semaphore_with_memory = true;
|
||||
|
|
|
@ -46,7 +46,7 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
|
|||
|
||||
for (se = 0; se < num_se; se++) {
|
||||
/* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
|
||||
if (physical_device->rad_info.chip_class < GFX7)
|
||||
if (physical_device->rad_info.gfx_level < GFX7)
|
||||
radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
|
||||
S_00802C_SE_INDEX(se) | S_00802C_SH_BROADCAST_WRITES(1) |
|
||||
S_00802C_INSTANCE_BROADCAST_WRITES(1));
|
||||
|
@ -58,7 +58,7 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
|
|||
}
|
||||
|
||||
/* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
|
||||
if (physical_device->rad_info.chip_class < GFX7)
|
||||
if (physical_device->rad_info.gfx_level < GFX7)
|
||||
radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
|
||||
S_00802C_SE_BROADCAST_WRITES(1) | S_00802C_SH_BROADCAST_WRITES(1) |
|
||||
S_00802C_INSTANCE_BROADCAST_WRITES(1));
|
||||
|
@ -67,7 +67,7 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
|
|||
S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
|
||||
S_030800_INSTANCE_BROADCAST_WRITES(1));
|
||||
|
||||
if (physical_device->rad_info.chip_class >= GFX7)
|
||||
if (physical_device->rad_info.gfx_level >= GFX7)
|
||||
radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
|
||||
}
|
||||
|
||||
|
@ -90,7 +90,7 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
radeon_emit(cs, S_00B858_SH0_CU_EN(info->spi_cu_en) | S_00B858_SH1_CU_EN(info->spi_cu_en));
|
||||
radeon_emit(cs, S_00B858_SH0_CU_EN(info->spi_cu_en) | S_00B858_SH1_CU_EN(info->spi_cu_en));
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
/* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
|
||||
radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
|
||||
radeon_emit(cs, S_00B858_SH0_CU_EN(info->spi_cu_en) | S_00B858_SH1_CU_EN(info->spi_cu_en));
|
||||
|
@ -105,12 +105,12 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
}
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY,
|
||||
device->physical_device->rad_info.chip_class >= GFX10 ? 0x20 : 0);
|
||||
device->physical_device->rad_info.gfx_level >= GFX10 ? 0x20 : 0);
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg_seq(cs, R_00B890_COMPUTE_USER_ACCUM_0, 5);
|
||||
radeon_emit(cs, 0); /* R_00B890_COMPUTE_USER_ACCUM_0 */
|
||||
radeon_emit(cs, 0); /* R_00B894_COMPUTE_USER_ACCUM_1 */
|
||||
|
@ -124,7 +124,7 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
* kernel if we want to use something other than the default value,
|
||||
* which is now 0x22f.
|
||||
*/
|
||||
if (device->physical_device->rad_info.chip_class <= GFX6) {
|
||||
if (device->physical_device->rad_info.gfx_level <= GFX6) {
|
||||
/* XXX: This should be:
|
||||
* (number of compute units) * 4 * (waves per simd) - 1 */
|
||||
|
||||
|
@ -139,7 +139,7 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
if (device->tma_bo) {
|
||||
uint64_t tba_va, tma_va;
|
||||
|
||||
assert(device->physical_device->rad_info.chip_class == GFX8);
|
||||
assert(device->physical_device->rad_info.gfx_level == GFX8);
|
||||
|
||||
tba_va = radv_trap_handler_shader_get_va(device->trap_handler_shader);
|
||||
tma_va = radv_buffer_get_va(device->tma_bo);
|
||||
|
@ -173,7 +173,7 @@ si_set_raster_config(struct radv_physical_device *physical_device, struct radeon
|
|||
*/
|
||||
if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
|
||||
radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config);
|
||||
if (physical_device->rad_info.chip_class >= GFX7)
|
||||
if (physical_device->rad_info.gfx_level >= GFX7)
|
||||
radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
|
||||
} else {
|
||||
si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1);
|
||||
|
@ -197,7 +197,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
radeon_emit(cs, 0);
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.chip_class <= GFX8)
|
||||
if (physical_device->rad_info.gfx_level <= GFX8)
|
||||
si_set_raster_config(physical_device, cs);
|
||||
|
||||
radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
|
||||
|
@ -205,7 +205,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
|
||||
|
||||
/* FIXME calculate these values somehow ??? */
|
||||
if (physical_device->rad_info.chip_class <= GFX8) {
|
||||
if (physical_device->rad_info.gfx_level <= GFX8) {
|
||||
radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
|
||||
radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40);
|
||||
}
|
||||
|
@ -216,11 +216,11 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.chip_class <= GFX9)
|
||||
if (physical_device->rad_info.gfx_level <= GFX9)
|
||||
radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
|
||||
if (!has_clear_state)
|
||||
radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
|
||||
if (physical_device->rad_info.chip_class < GFX7)
|
||||
if (physical_device->rad_info.gfx_level < GFX7)
|
||||
radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE,
|
||||
S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1));
|
||||
|
||||
|
@ -230,7 +230,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
/* CLEAR_STATE doesn't clear these correctly on certain generations.
|
||||
* I don't know why. Deduced by trial and error.
|
||||
*/
|
||||
if (physical_device->rad_info.chip_class <= GFX7 || !has_clear_state) {
|
||||
if (physical_device->rad_info.gfx_level <= GFX7 || !has_clear_state) {
|
||||
radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
|
||||
radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL,
|
||||
S_028204_WINDOW_OFFSET_DISABLE(1));
|
||||
|
@ -267,7 +267,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
|
||||
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
|
||||
|
||||
if (physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (physical_device->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_context_reg(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0);
|
||||
radeon_set_uconfig_reg(cs, R_030964_GE_MAX_VTX_INDX, ~0);
|
||||
radeon_set_uconfig_reg(cs, R_030924_GE_MIN_VTX_INDX, 0);
|
||||
|
@ -278,7 +278,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
radeon_set_context_reg(cs, R_028038_DB_DFSM_CONTROL,
|
||||
S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) |
|
||||
S_028038_POPS_DRAIN_PS_ON_OVERLAP(1));
|
||||
} else if (physical_device->rad_info.chip_class == GFX9) {
|
||||
} else if (physical_device->rad_info.gfx_level == GFX9) {
|
||||
radeon_set_uconfig_reg(cs, R_030920_VGT_MAX_VTX_INDX, ~0);
|
||||
radeon_set_uconfig_reg(cs, R_030924_VGT_MIN_VTX_INDX, 0);
|
||||
radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0);
|
||||
|
@ -297,12 +297,12 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS,
|
||||
S_00B524_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES,
|
||||
S_00B324_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
|
||||
} else if (device->physical_device->rad_info.chip_class == GFX9) {
|
||||
} else if (device->physical_device->rad_info.gfx_level == GFX9) {
|
||||
radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS,
|
||||
S_00B414_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES,
|
||||
|
@ -327,11 +327,11 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
* CUs. In the future, we might disable or enable this tweak only for
|
||||
* certain apps.
|
||||
*/
|
||||
if (physical_device->rad_info.chip_class >= GFX10_3)
|
||||
if (physical_device->rad_info.gfx_level >= GFX10_3)
|
||||
cu_mask_ps = u_bit_consecutive(0, physical_device->rad_info.min_good_cu_per_sa);
|
||||
|
||||
if (physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (physical_device->rad_info.gfx_level >= GFX7) {
|
||||
if (physical_device->rad_info.gfx_level >= GFX10) {
|
||||
/* Logical CUs 16 - 31 */
|
||||
ac_set_reg_cu_en(cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff),
|
||||
C_00B404_CU_EN, 16, &physical_device->rad_info,
|
||||
|
@ -344,12 +344,12 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
(void*)gfx10_set_sh_reg_idx3);
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (physical_device->rad_info.gfx_level >= GFX10) {
|
||||
ac_set_reg_cu_en(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
|
||||
S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F),
|
||||
C_00B41C_CU_EN, 0, &physical_device->rad_info,
|
||||
(void*)gfx10_set_sh_reg_idx3);
|
||||
} else if (physical_device->rad_info.chip_class == GFX9) {
|
||||
} else if (physical_device->rad_info.gfx_level == GFX9) {
|
||||
radeon_set_sh_reg_idx(physical_device, cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 3,
|
||||
S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
|
||||
} else {
|
||||
|
@ -366,7 +366,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4));
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (physical_device->rad_info.gfx_level >= GFX10) {
|
||||
ac_set_reg_cu_en(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
|
||||
S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F),
|
||||
C_00B01C_CU_EN, 0, &physical_device->rad_info,
|
||||
|
@ -377,7 +377,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
}
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.chip_class >= GFX10) {
|
||||
if (physical_device->rad_info.gfx_level >= GFX10) {
|
||||
/* Break up a pixel wave if it contains deallocs for more than
|
||||
* half the parameter cache.
|
||||
*
|
||||
|
@ -394,7 +394,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
* need to prevent drawing lines on internal edges of
|
||||
* decomposed primitives (such as quads) with polygon mode = lines.
|
||||
*/
|
||||
unsigned vertex_reuse_depth = physical_device->rad_info.chip_class >= GFX10_3 ? 30 : 0;
|
||||
unsigned vertex_reuse_depth = physical_device->rad_info.gfx_level >= GFX10_3 ? 30 : 0;
|
||||
radeon_set_context_reg(cs, R_028838_PA_CL_NGG_CNTL,
|
||||
S_028838_INDEX_BUF_EDGE_FLAG_ENA(0) |
|
||||
S_028838_VERTEX_REUSE_DEPTH(vertex_reuse_depth));
|
||||
|
@ -454,7 +454,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
|
||||
radeon_set_sh_reg(cs, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
|
||||
|
||||
if (physical_device->rad_info.chip_class >= GFX10_3) {
|
||||
if (physical_device->rad_info.gfx_level >= GFX10_3) {
|
||||
radeon_set_context_reg(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
|
||||
/* This allows sample shading. */
|
||||
radeon_set_context_reg(
|
||||
|
@ -463,12 +463,12 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
}
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (physical_device->rad_info.gfx_level >= GFX9) {
|
||||
radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
|
||||
S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) |
|
||||
S_028B50_ACCUM_QUAD(24) | S_028B50_DONUT_SPLIT_GFX9(24) |
|
||||
S_028B50_TRAP_SPLIT(6));
|
||||
} else if (physical_device->rad_info.chip_class >= GFX8) {
|
||||
} else if (physical_device->rad_info.gfx_level >= GFX8) {
|
||||
uint32_t vgt_tess_distribution;
|
||||
|
||||
vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) |
|
||||
|
@ -488,13 +488,13 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
uint64_t border_color_va = radv_buffer_get_va(device->border_color_data.bo);
|
||||
|
||||
radeon_set_context_reg(cs, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
|
||||
if (physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (physical_device->rad_info.gfx_level >= GFX7) {
|
||||
radeon_set_context_reg(cs, R_028084_TA_BC_BASE_ADDR_HI,
|
||||
S_028084_ADDRESS(border_color_va >> 40));
|
||||
}
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (physical_device->rad_info.gfx_level >= GFX9) {
|
||||
radeon_set_context_reg(
|
||||
cs, R_028C48_PA_SC_BINNER_CNTL_1,
|
||||
S_028C48_MAX_ALLOC_COUNT(physical_device->rad_info.pbb_max_alloc_count - 1) |
|
||||
|
@ -551,7 +551,7 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
if (device->tma_bo) {
|
||||
uint64_t tba_va, tma_va;
|
||||
|
||||
assert(device->physical_device->rad_info.chip_class == GFX8);
|
||||
assert(device->physical_device->rad_info.gfx_level == GFX8);
|
||||
|
||||
tba_va = radv_trap_handler_shader_get_va(device->trap_handler_shader);
|
||||
tma_va = radv_buffer_get_va(device->tma_bo);
|
||||
|
@ -738,7 +738,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra
|
|||
bool indirect_draw, bool count_from_stream_output,
|
||||
uint32_t draw_vertex_count, unsigned topology, bool prim_restart_enable)
|
||||
{
|
||||
enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
|
||||
enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
|
||||
enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family;
|
||||
struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
|
||||
const unsigned max_primgroup_in_wave = 2;
|
||||
|
@ -768,7 +768,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra
|
|||
ia_switch_on_eoi = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.ia_switch_on_eoi;
|
||||
partial_vs_wave = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.partial_vs_wave;
|
||||
|
||||
if (chip_class >= GFX7) {
|
||||
if (gfx_level >= GFX7) {
|
||||
/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
|
||||
* 4 shader engines. Set 1 to pass the assertion below.
|
||||
* The other cases are hardware requirements. */
|
||||
|
@ -791,7 +791,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra
|
|||
* Assume indirect draws always use small instances.
|
||||
* This is needed for good VS wave utilization.
|
||||
*/
|
||||
if (chip_class <= GFX8 && info->max_se == 4 && multi_instances_smaller_than_primgroup)
|
||||
if (gfx_level <= GFX8 && info->max_se == 4 && multi_instances_smaller_than_primgroup)
|
||||
wd_switch_on_eop = true;
|
||||
|
||||
/* Required on GFX7 and later. */
|
||||
|
@ -801,7 +801,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra
|
|||
/* Required by Hawaii and, for some special cases, by GFX8. */
|
||||
if (ia_switch_on_eoi &&
|
||||
(family == CHIP_HAWAII ||
|
||||
(chip_class == GFX8 &&
|
||||
(gfx_level == GFX8 &&
|
||||
/* max primgroup in wave is always 2 - leave this for documentation */
|
||||
(radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2))))
|
||||
partial_vs_wave = true;
|
||||
|
@ -820,7 +820,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra
|
|||
assert(wd_switch_on_eop || !ia_switch_on_eop);
|
||||
}
|
||||
/* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
|
||||
if (chip_class <= GFX8 && ia_switch_on_eoi)
|
||||
if (gfx_level <= GFX8 && ia_switch_on_eoi)
|
||||
partial_es_wave = true;
|
||||
|
||||
if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
|
||||
|
@ -853,11 +853,11 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra
|
|||
S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
|
||||
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
|
||||
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
|
||||
S_028AA8_WD_SWITCH_ON_EOP(chip_class >= GFX7 ? wd_switch_on_eop : 0);
|
||||
S_028AA8_WD_SWITCH_ON_EOP(gfx_level >= GFX7 ? wd_switch_on_eop : 0);
|
||||
}
|
||||
|
||||
void
|
||||
si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class, bool is_mec,
|
||||
si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec,
|
||||
unsigned event, unsigned event_flags, unsigned dst_sel,
|
||||
unsigned data_sel, uint64_t va, uint32_t new_fence,
|
||||
uint64_t gfx9_eop_bug_va)
|
||||
|
@ -865,7 +865,7 @@ si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class,
|
|||
unsigned op = EVENT_TYPE(event) |
|
||||
EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) |
|
||||
event_flags;
|
||||
unsigned is_gfx8_mec = is_mec && chip_class < GFX9;
|
||||
unsigned is_gfx8_mec = is_mec && gfx_level < GFX9;
|
||||
unsigned sel = EOP_DST_SEL(dst_sel) | EOP_DATA_SEL(data_sel);
|
||||
|
||||
/* Wait for write confirmation before writing data, but don't send
|
||||
|
@ -873,12 +873,12 @@ si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class,
|
|||
if (data_sel != EOP_DATA_SEL_DISCARD)
|
||||
sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
|
||||
|
||||
if (chip_class >= GFX9 || is_gfx8_mec) {
|
||||
if (gfx_level >= GFX9 || is_gfx8_mec) {
|
||||
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
|
||||
* counters) must immediately precede every timestamp event to
|
||||
* prevent a GPU hang on GFX9.
|
||||
*/
|
||||
if (chip_class == GFX9 && !is_mec) {
|
||||
if (gfx_level == GFX9 && !is_mec) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
|
||||
radeon_emit(cs, gfx9_eop_bug_va);
|
||||
|
@ -920,7 +920,7 @@ si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class,
|
|||
radeon_emit(cs, new_fence);
|
||||
}
|
||||
} else {
|
||||
if (chip_class == GFX7 || chip_class == GFX8) {
|
||||
if (gfx_level == GFX7 || gfx_level == GFX8) {
|
||||
/* Two EOP events are required to make all
|
||||
* engines go idle (and optional cache flushes
|
||||
* executed) before the timestamp is written.
|
||||
|
@ -981,9 +981,10 @@ si_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsigne
|
|||
}
|
||||
|
||||
static void
|
||||
gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uint32_t *flush_cnt,
|
||||
uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
|
||||
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va)
|
||||
gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
||||
uint32_t *flush_cnt, uint64_t flush_va, bool is_mec,
|
||||
enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits,
|
||||
uint64_t gfx9_eop_bug_va)
|
||||
{
|
||||
uint32_t gcr_cntl = 0;
|
||||
unsigned cb_db_event = 0;
|
||||
|
@ -1107,7 +1108,7 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class,
|
|||
(*flush_cnt)++;
|
||||
|
||||
si_cs_emit_write_event_eop(
|
||||
cs, chip_class, false, cb_db_event,
|
||||
cs, gfx_level, false, cb_db_event,
|
||||
S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) |
|
||||
S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) |
|
||||
S_490_SEQ(gcr_seq),
|
||||
|
@ -1157,7 +1158,7 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class,
|
|||
}
|
||||
|
||||
void
|
||||
si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uint32_t *flush_cnt,
|
||||
si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt,
|
||||
uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
|
||||
enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va)
|
||||
{
|
||||
|
@ -1165,9 +1166,9 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uin
|
|||
uint32_t flush_cb_db =
|
||||
flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB);
|
||||
|
||||
if (chip_class >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
/* GFX10 cache flush handling is quite different. */
|
||||
gfx10_cs_emit_cache_flush(cs, chip_class, flush_cnt, flush_va, is_mec, flush_bits,
|
||||
gfx10_cs_emit_cache_flush(cs, gfx_level, flush_cnt, flush_va, is_mec, flush_bits,
|
||||
sqtt_flush_bits, gfx9_eop_bug_va);
|
||||
return;
|
||||
}
|
||||
|
@ -1181,7 +1182,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uin
|
|||
*sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0;
|
||||
}
|
||||
|
||||
if (chip_class <= GFX8) {
|
||||
if (gfx_level <= GFX8) {
|
||||
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
|
||||
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) |
|
||||
S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_CB2_DEST_BASE_ENA(1) |
|
||||
|
@ -1190,8 +1191,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uin
|
|||
S_0085F0_CB7_DEST_BASE_ENA(1);
|
||||
|
||||
/* Necessary for DCC */
|
||||
if (chip_class >= GFX8) {
|
||||
si_cs_emit_write_event_eop(cs, chip_class, is_mec, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0,
|
||||
if (gfx_level >= GFX8) {
|
||||
si_cs_emit_write_event_eop(cs, gfx_level, is_mec, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0,
|
||||
EOP_DST_SEL_MEM, EOP_DATA_SEL_DISCARD, 0, 0,
|
||||
gfx9_eop_bug_va);
|
||||
}
|
||||
|
@ -1238,7 +1239,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uin
|
|||
*sqtt_flush_bits |= RGP_FLUSH_CS_PARTIAL_FLUSH;
|
||||
}
|
||||
|
||||
if (chip_class == GFX9 && flush_cb_db) {
|
||||
if (gfx_level == GFX9 && flush_cb_db) {
|
||||
unsigned cb_db_event, tc_flags;
|
||||
|
||||
/* Set the CB/DB flush event. */
|
||||
|
@ -1275,7 +1276,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uin
|
|||
assert(flush_cnt);
|
||||
(*flush_cnt)++;
|
||||
|
||||
si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event, tc_flags, EOP_DST_SEL_MEM,
|
||||
si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event, tc_flags, EOP_DST_SEL_MEM,
|
||||
EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va);
|
||||
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
|
||||
}
|
||||
|
@ -1305,10 +1306,10 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uin
|
|||
}
|
||||
|
||||
if ((flush_bits & RADV_CMD_FLAG_INV_L2) ||
|
||||
(chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
|
||||
si_emit_acquire_mem(cs, is_mec, chip_class == GFX9,
|
||||
(gfx_level <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
|
||||
si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9,
|
||||
cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
|
||||
S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8));
|
||||
S_0301F0_TC_WB_ACTION_ENA(gfx_level >= GFX8));
|
||||
cp_coher_cntl = 0;
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_INVAL_L2 | RGP_FLUSH_INVAL_VMEM_L0;
|
||||
|
@ -1321,14 +1322,14 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uin
|
|||
* WB doesn't work without NC.
|
||||
*/
|
||||
si_emit_acquire_mem(
|
||||
cs, is_mec, chip_class == GFX9,
|
||||
cs, is_mec, gfx_level == GFX9,
|
||||
cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
|
||||
cp_coher_cntl = 0;
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0;
|
||||
}
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
|
||||
si_emit_acquire_mem(cs, is_mec, chip_class == GFX9,
|
||||
si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9,
|
||||
cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1));
|
||||
cp_coher_cntl = 0;
|
||||
|
||||
|
@ -1340,7 +1341,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uin
|
|||
* Therefore, it should be last. Done in PFP.
|
||||
*/
|
||||
if (cp_coher_cntl)
|
||||
si_emit_acquire_mem(cs, is_mec, chip_class == GFX9, cp_coher_cntl);
|
||||
si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9, cp_coher_cntl);
|
||||
|
||||
if (flush_bits & RADV_CMD_FLAG_START_PIPELINE_STATS) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
|
@ -1371,7 +1372,7 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);
|
||||
|
||||
si_cs_emit_cache_flush(cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||
si_cs_emit_cache_flush(cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
|
||||
&cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits,
|
||||
&cmd_buffer->state.sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va);
|
||||
|
@ -1415,7 +1416,7 @@ si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visi
|
|||
*/
|
||||
op |= draw_visible ? PREDICATION_DRAW_VISIBLE : PREDICATION_DRAW_NOT_VISIBLE;
|
||||
}
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
|
||||
radeon_emit(cmd_buffer->cs, op);
|
||||
radeon_emit(cmd_buffer->cs, va);
|
||||
|
@ -1445,7 +1446,7 @@ si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visi
|
|||
static inline unsigned
|
||||
cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9
|
||||
unsigned max = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9
|
||||
? S_415_BYTE_COUNT_GFX9(~0u)
|
||||
: S_415_BYTE_COUNT_GFX6(~0u);
|
||||
|
||||
|
@ -1467,7 +1468,7 @@ si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src
|
|||
assert(size <= cp_dma_max_byte_count(cmd_buffer));
|
||||
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9)
|
||||
command |= S_415_BYTE_COUNT_GFX9(size);
|
||||
else
|
||||
command |= S_415_BYTE_COUNT_GFX6(size);
|
||||
|
@ -1476,7 +1477,7 @@ si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src
|
|||
if (flags & CP_DMA_SYNC)
|
||||
header |= S_411_CP_SYNC(1);
|
||||
else {
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9)
|
||||
command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
|
||||
else
|
||||
command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
|
||||
|
@ -1486,8 +1487,8 @@ si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src
|
|||
command |= S_415_RAW_WAIT(1);
|
||||
|
||||
/* Src and dst flags. */
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
!(flags & CP_DMA_CLEAR) && src_va == dst_va)
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) &&
|
||||
src_va == dst_va)
|
||||
header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
|
||||
else if (flags & CP_DMA_USE_L2)
|
||||
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
|
||||
|
@ -1497,7 +1498,7 @@ si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src
|
|||
else if (flags & CP_DMA_USE_L2)
|
||||
header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, cmd_buffer->state.predicating));
|
||||
radeon_emit(cs, header);
|
||||
radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
|
||||
|
@ -1548,7 +1549,7 @@ si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned siz
|
|||
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
command |= S_415_BYTE_COUNT_GFX9(aligned_size) |
|
||||
S_415_DISABLE_WR_CONFIRM_GFX9(1);
|
||||
header |= S_411_DST_SEL(V_411_NOWHERE);
|
||||
|
@ -1650,7 +1651,7 @@ si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint6
|
|||
unsigned dma_flags = 0;
|
||||
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
/* DMA operations via L2 are coherent and faster.
|
||||
* TODO: GFX7-GFX8 should also support this but it
|
||||
* requires tests/benchmarks.
|
||||
|
@ -1704,7 +1705,7 @@ si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t
|
|||
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
|
||||
unsigned dma_flags = CP_DMA_CLEAR;
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
/* DMA operations via L2 are coherent and faster.
|
||||
* TODO: GFX7-GFX8 should also support this but it
|
||||
* requires tests/benchmarks.
|
||||
|
@ -1727,7 +1728,7 @@ si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t
|
|||
void
|
||||
si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX7)
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX7)
|
||||
return;
|
||||
|
||||
if (!cmd_buffer->state.dma_is_busy)
|
||||
|
|
|
@ -53,7 +53,7 @@ radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_
|
|||
if (bo) {
|
||||
flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;
|
||||
|
||||
if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9)
|
||||
if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.gfx_level >= GFX9)
|
||||
flags |= AMDGPU_VM_MTYPE_UC;
|
||||
|
||||
if (!(bo_flags & RADEON_FLAG_READ_ONLY))
|
||||
|
@ -573,7 +573,7 @@ radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t siz
|
|||
/* Gfx9: Increase the VM alignment to the most significant bit set
|
||||
* in the size for faster address translation.
|
||||
*/
|
||||
if (ws->info.chip_class >= GFX9) {
|
||||
if (ws->info.gfx_level >= GFX9) {
|
||||
unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
|
||||
uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
|
||||
|
||||
|
@ -870,7 +870,7 @@ radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_wins
|
|||
struct amdgpu_bo_metadata metadata = {0};
|
||||
uint64_t tiling_flags = 0;
|
||||
|
||||
if (ws->info.chip_class >= GFX9) {
|
||||
if (ws->info.gfx_level >= GFX9) {
|
||||
tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
|
||||
tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b);
|
||||
tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
|
||||
|
@ -924,7 +924,7 @@ radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws, struct radeon_wins
|
|||
|
||||
uint64_t tiling_flags = info.metadata.tiling_info;
|
||||
|
||||
if (ws->info.chip_class >= GFX9) {
|
||||
if (ws->info.gfx_level >= GFX9) {
|
||||
md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
|
||||
md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
|
||||
} else {
|
||||
|
|
|
@ -285,7 +285,7 @@ static uint32_t get_nop_packet(struct radv_amdgpu_cs *cs)
|
|||
case AMDGPU_HW_IP_COMPUTE:
|
||||
return cs->ws->info.gfx_ib_pad_with_type2 ? PKT2_NOP_PAD : PKT3_NOP_PAD;
|
||||
case AMDGPU_HW_IP_DMA:
|
||||
return cs->ws->info.chip_class <= GFX6 ? 0xF0000000 : SDMA_NOP_PAD;
|
||||
return cs->ws->info.gfx_level <= GFX6 ? 0xF0000000 : SDMA_NOP_PAD;
|
||||
case AMDGPU_HW_IP_UVD:
|
||||
case AMDGPU_HW_IP_UVD_ENC:
|
||||
return PKT2_NOP_PAD;
|
||||
|
@ -1492,7 +1492,7 @@ radv_amdgpu_winsys_cs_dump(struct radeon_cmdbuf *_cs, FILE *file, const int *tra
|
|||
num_dw = cs->ib.size;
|
||||
}
|
||||
assert(ib);
|
||||
ac_parse_ib(file, ib, num_dw, trace_ids, trace_id_count, "main IB", cs->ws->info.chip_class,
|
||||
ac_parse_ib(file, ib, num_dw, trace_ids, trace_id_count, "main IB", cs->ws->info.gfx_level,
|
||||
radv_amdgpu_winsys_get_cpu_addr, cs);
|
||||
}
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
|
|||
ws->info.ip[AMD_IP_SDMA].num_queues = MIN2(ws->info.ip[AMD_IP_SDMA].num_queues, MAX_RINGS_PER_TYPE);
|
||||
ws->info.ip[AMD_IP_COMPUTE].num_queues = MIN2(ws->info.ip[AMD_IP_COMPUTE].num_queues, MAX_RINGS_PER_TYPE);
|
||||
|
||||
ws->use_ib_bos = ws->info.chip_class >= GFX7;
|
||||
ws->use_ib_bos = ws->info.gfx_level >= GFX7;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -77,29 +77,29 @@ radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
|
|||
const char *family = getenv("RADV_FORCE_FAMILY");
|
||||
unsigned i;
|
||||
|
||||
info->chip_class = CLASS_UNKNOWN;
|
||||
info->gfx_level = CLASS_UNKNOWN;
|
||||
info->family = CHIP_UNKNOWN;
|
||||
|
||||
for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
|
||||
if (!strcasecmp(family, ac_get_family_name(i))) {
|
||||
/* Override family and chip_class. */
|
||||
/* Override family and gfx_level. */
|
||||
info->family = i;
|
||||
info->name = ac_get_family_name(i);
|
||||
|
||||
if (info->family >= CHIP_GFX1100)
|
||||
info->chip_class = GFX11;
|
||||
info->gfx_level = GFX11;
|
||||
else if (i >= CHIP_SIENNA_CICHLID)
|
||||
info->chip_class = GFX10_3;
|
||||
info->gfx_level = GFX10_3;
|
||||
else if (i >= CHIP_NAVI10)
|
||||
info->chip_class = GFX10;
|
||||
info->gfx_level = GFX10;
|
||||
else if (i >= CHIP_VEGA10)
|
||||
info->chip_class = GFX9;
|
||||
info->gfx_level = GFX9;
|
||||
else if (i >= CHIP_TONGA)
|
||||
info->chip_class = GFX8;
|
||||
info->gfx_level = GFX8;
|
||||
else if (i >= CHIP_BONAIRE)
|
||||
info->chip_class = GFX7;
|
||||
info->gfx_level = GFX7;
|
||||
else
|
||||
info->chip_class = GFX6;
|
||||
info->gfx_level = GFX6;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -111,32 +111,32 @@ radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
|
|||
info->pci_id = gpu_info[info->family].pci_id;
|
||||
info->max_se = 4;
|
||||
info->num_se = 4;
|
||||
if (info->chip_class >= GFX10_3)
|
||||
if (info->gfx_level >= GFX10_3)
|
||||
info->max_wave64_per_simd = 16;
|
||||
else if (info->chip_class >= GFX10)
|
||||
else if (info->gfx_level >= GFX10)
|
||||
info->max_wave64_per_simd = 20;
|
||||
else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
|
||||
info->max_wave64_per_simd = 8;
|
||||
else
|
||||
info->max_wave64_per_simd = 10;
|
||||
|
||||
if (info->chip_class >= GFX10)
|
||||
if (info->gfx_level >= GFX10)
|
||||
info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd * 2;
|
||||
else if (info->chip_class >= GFX8)
|
||||
else if (info->gfx_level >= GFX8)
|
||||
info->num_physical_sgprs_per_simd = 800;
|
||||
else
|
||||
info->num_physical_sgprs_per_simd = 512;
|
||||
|
||||
info->num_physical_wave64_vgprs_per_simd = info->chip_class >= GFX10 ? 512 : 256;
|
||||
info->num_simd_per_compute_unit = info->chip_class >= GFX10 ? 2 : 4;
|
||||
info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
|
||||
info->lds_encode_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
|
||||
info->num_physical_wave64_vgprs_per_simd = info->gfx_level >= GFX10 ? 512 : 256;
|
||||
info->num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4;
|
||||
info->lds_size_per_workgroup = info->gfx_level >= GFX10 ? 128 * 1024 : 64 * 1024;
|
||||
info->lds_encode_granularity = info->gfx_level >= GFX7 ? 128 * 4 : 64 * 4;
|
||||
info->lds_alloc_granularity =
|
||||
info->chip_class >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
|
||||
info->gfx_level >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
|
||||
info->max_render_backends = gpu_info[info->family].num_render_backends;
|
||||
|
||||
info->has_dedicated_vram = gpu_info[info->family].has_dedicated_vram;
|
||||
info->has_packed_math_16bit = info->chip_class >= GFX9;
|
||||
info->has_packed_math_16bit = info->gfx_level >= GFX9;
|
||||
|
||||
info->has_image_load_dcc_bug =
|
||||
info->family == CHIP_DIMGREY_CAVEFISH || info->family == CHIP_VANGOGH;
|
||||
|
@ -145,13 +145,13 @@ radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
|
|||
info->family == CHIP_ARCTURUS || info->family == CHIP_ALDEBARAN ||
|
||||
info->family == CHIP_VEGA20 || info->family >= CHIP_NAVI12;
|
||||
|
||||
info->address32_hi = info->chip_class >= GFX9 ? 0xffff8000u : 0x0;
|
||||
info->address32_hi = info->gfx_level >= GFX9 ? 0xffff8000u : 0x0;
|
||||
|
||||
info->has_rbplus = info->family == CHIP_STONEY || info->chip_class >= GFX9;
|
||||
info->has_rbplus = info->family == CHIP_STONEY || info->gfx_level >= GFX9;
|
||||
info->rbplus_allowed =
|
||||
info->has_rbplus &&
|
||||
(info->family == CHIP_STONEY || info->family == CHIP_VEGA12 || info->family == CHIP_RAVEN ||
|
||||
info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->chip_class >= GFX10_3);
|
||||
info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->gfx_level >= GFX10_3);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -75,7 +75,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
|
|||
S_SQ_CF_WORD1_BARRIER(1) |
|
||||
S_SQ_CF_WORD1_VALID_PIXEL_MODE(cf->vpm) |
|
||||
S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
|
||||
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
|
||||
if (bc->gfx_level == EVERGREEN) /* no EOP on cayman */
|
||||
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
|
||||
id++;
|
||||
} else if (cfop->flags & CF_EXP) {
|
||||
|
@ -95,7 +95,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
|
|||
S_SQ_CF_ALLOC_EXPORT_WORD1_MARK(cf->mark) |
|
||||
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode);
|
||||
|
||||
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
|
||||
if (bc->gfx_level == EVERGREEN) /* no EOP on cayman */
|
||||
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
|
||||
id++;
|
||||
} else if (cfop->flags & CF_RAT) {
|
||||
|
@ -114,7 +114,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
|
|||
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) |
|
||||
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) |
|
||||
S_SQ_CF_ALLOC_EXPORT_WORD1_MARK(cf->output.mark);
|
||||
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
|
||||
if (bc->gfx_level == EVERGREEN) /* no EOP on cayman */
|
||||
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
|
||||
id++;
|
||||
|
||||
|
@ -131,7 +131,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
|
|||
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
|
||||
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) |
|
||||
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size);
|
||||
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
|
||||
if (bc->gfx_level == EVERGREEN) /* no EOP on cayman */
|
||||
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
|
||||
id++;
|
||||
} else {
|
||||
|
@ -143,7 +143,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
|
|||
S_SQ_CF_WORD1_COND(cf->cond) |
|
||||
S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
|
||||
S_SQ_CF_WORD1_COUNT(cf->count);
|
||||
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
|
||||
if (bc->gfx_level == EVERGREEN) /* no EOP on cayman */
|
||||
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
|
||||
id++;
|
||||
}
|
||||
|
@ -181,7 +181,7 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c
|
|||
unsigned type;
|
||||
|
||||
assert(id < 2);
|
||||
assert(bc->chip_class >= EVERGREEN);
|
||||
assert(bc->gfx_level >= EVERGREEN);
|
||||
|
||||
if (bc->index_loaded[id])
|
||||
return 0;
|
||||
|
@ -190,7 +190,7 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c
|
|||
alu.op = ALU_OP1_MOVA_INT;
|
||||
alu.src[0].sel = bc->index_reg[id];
|
||||
alu.src[0].chan = bc->index_reg_chan[id];
|
||||
if (bc->chip_class == CAYMAN)
|
||||
if (bc->gfx_level == CAYMAN)
|
||||
alu.dst.sel = id == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
|
||||
|
||||
alu.last = 1;
|
||||
|
@ -200,7 +200,7 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c
|
|||
|
||||
bc->ar_loaded = 0; /* clobbered */
|
||||
|
||||
if (bc->chip_class == EVERGREEN) {
|
||||
if (bc->gfx_level == EVERGREEN) {
|
||||
memset(&alu, 0, sizeof(alu));
|
||||
alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
|
||||
alu.last = 1;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue