tu: ir3: Emit push constants directly
Carve out some space at the beginning for push constants, and push them directly, rather than remapping them to a UBO and then relying on the UBO pushing code. Remapping to a UBO is easy now, where there's a single table of UBO's, but with the bindless model it'll be a lot harder. I haven't removed all the code to move the remaining UBO's over by 1, though, because it's going to all get rewritten with bindless anyways. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
This commit is contained in:
parent
63c2e8137d
commit
d3b7681df2
|
@ -235,7 +235,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
|
|||
* first.
|
||||
*/
|
||||
const uint32_t max_upload = 16 * 1024;
|
||||
uint32_t offset = 0;
|
||||
uint32_t offset = shader->const_state.num_reserved_user_consts * 16;
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
|
||||
uint32_t range_size = state->range[i].end - state->range[i].start;
|
||||
|
||||
|
|
|
@ -75,7 +75,7 @@ enum ir3_driver_param {
|
|||
|
||||
/**
|
||||
* Describes the layout of shader consts. This includes:
|
||||
* + Driver lowered UBO ranges
|
||||
* + User consts + driver lowered UBO ranges
|
||||
* + SSBO sizes
|
||||
* + Image sizes/dimensions
|
||||
* + Driver params (ie. IR3_DP_*)
|
||||
|
@ -114,6 +114,7 @@ enum ir3_driver_param {
|
|||
*/
|
||||
struct ir3_const_state {
|
||||
unsigned num_ubos;
|
||||
unsigned num_reserved_user_consts;
|
||||
unsigned num_driver_params; /* scalar */
|
||||
|
||||
struct {
|
||||
|
|
|
@ -2676,6 +2676,21 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
|
|||
&pipeline->program.link[type];
|
||||
const struct ir3_ubo_analysis_state *state = &link->ubo_state;
|
||||
|
||||
if (link->push_consts.count > 0) {
|
||||
unsigned num_units = link->push_consts.count;
|
||||
unsigned offset = link->push_consts.lo;
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + num_units * 4);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(num_units));
|
||||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit(cs, 0);
|
||||
for (unsigned i = 0; i < num_units * 4; i++)
|
||||
tu_cs_emit(cs, push_constants[i + offset * 4]);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
|
||||
if (state->range[i].start < state->range[i].end) {
|
||||
uint32_t size = state->range[i].end - state->range[i].start;
|
||||
|
@ -2694,21 +2709,6 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
|
|||
debug_assert((size % 16) == 0);
|
||||
debug_assert((offset % 16) == 0);
|
||||
|
||||
if (i == 0) {
|
||||
/* push constants */
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + (size / 4));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
|
||||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit(cs, 0);
|
||||
for (unsigned i = 0; i < size / 4; i++)
|
||||
tu_cs_emit(cs, push_constants[i + offset / 4]);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Look through the UBO map to find our UBO index, and get the VA for
|
||||
* that UBO.
|
||||
*/
|
||||
|
|
|
@ -1948,6 +1948,7 @@ tu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link,
|
|||
link->ubo_state = v->shader->ubo_state;
|
||||
link->const_state = v->shader->const_state;
|
||||
link->constlen = v->constlen;
|
||||
link->push_consts = shader->push_consts;
|
||||
link->texture_map = shader->texture_map;
|
||||
link->sampler_map = shader->sampler_map;
|
||||
link->ubo_map = shader->ubo_map;
|
||||
|
|
|
@ -1129,10 +1129,17 @@ struct tu_descriptor_map
|
|||
int array_size[128];
|
||||
};
|
||||
|
||||
struct tu_push_constant_range
|
||||
{
|
||||
uint32_t lo;
|
||||
uint32_t count;
|
||||
};
|
||||
|
||||
struct tu_shader
|
||||
{
|
||||
struct ir3_shader ir3_shader;
|
||||
|
||||
struct tu_push_constant_range push_consts;
|
||||
struct tu_descriptor_map texture_map;
|
||||
struct tu_descriptor_map sampler_map;
|
||||
struct tu_descriptor_map ubo_map;
|
||||
|
@ -1181,6 +1188,7 @@ struct tu_program_descriptor_linkage
|
|||
|
||||
uint32_t constlen;
|
||||
|
||||
struct tu_push_constant_range push_consts;
|
||||
struct tu_descriptor_map texture_map;
|
||||
struct tu_descriptor_map sampler_map;
|
||||
struct tu_descriptor_map ubo_map;
|
||||
|
|
|
@ -210,17 +210,16 @@ static void
|
|||
lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
|
||||
struct tu_shader *shader)
|
||||
{
|
||||
/* note: ir3 wants load_ubo, not load_uniform */
|
||||
assert(nir_intrinsic_base(instr) == 0);
|
||||
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
|
||||
|
||||
nir_intrinsic_set_align(load, 4, 0);
|
||||
|
||||
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
|
||||
load->num_components = instr->num_components;
|
||||
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
|
||||
load->src[1] = instr->src[0];
|
||||
uint32_t base = nir_intrinsic_base(instr);
|
||||
assert(base % 4 == 0);
|
||||
assert(base >= shader->push_consts.lo * 16);
|
||||
base -= shader->push_consts.lo * 16;
|
||||
nir_intrinsic_set_base(load, base / 4);
|
||||
load->src[0] =
|
||||
nir_src_for_ssa(nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)));
|
||||
nir_ssa_dest_init(&load->instr, &load->dest,
|
||||
load->num_components, instr->dest.ssa.bit_size,
|
||||
instr->dest.ssa.name);
|
||||
|
@ -343,6 +342,55 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
|||
}
|
||||
}
|
||||
|
||||
/* Figure out the range of push constants that we're actually going to push to
|
||||
* the shader, and tell the backend to reserve this range when pushing UBO
|
||||
* constants.
|
||||
*/
|
||||
|
||||
static void
|
||||
gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
|
||||
{
|
||||
uint32_t min = UINT32_MAX, max = 0;
|
||||
nir_foreach_function(function, shader) {
|
||||
if (!function->impl)
|
||||
continue;
|
||||
|
||||
nir_foreach_block(block, function->impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_push_constant)
|
||||
continue;
|
||||
|
||||
uint32_t base = nir_intrinsic_base(intrin);
|
||||
uint32_t range = nir_intrinsic_range(intrin);
|
||||
min = MIN2(min, base);
|
||||
max = MAX2(max, base + range);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (min >= max) {
|
||||
tu_shader->push_consts.lo = 0;
|
||||
tu_shader->push_consts.count = 0;
|
||||
tu_shader->ir3_shader.const_state.num_reserved_user_consts = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords),
|
||||
* however there's an alignment requirement of 4 on OFFSET. Expand the
|
||||
* range and change units accordingly.
|
||||
*/
|
||||
tu_shader->push_consts.lo = (min / 16) / 4 * 4;
|
||||
tu_shader->push_consts.count =
|
||||
align(max, 16) / 16 - tu_shader->push_consts.lo;
|
||||
tu_shader->ir3_shader.const_state.num_reserved_user_consts =
|
||||
align(tu_shader->push_consts.count, 4);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_impl(nir_function_impl *impl, struct tu_shader *shader,
|
||||
const struct tu_pipeline_layout *layout)
|
||||
|
@ -376,6 +424,8 @@ tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
|
|||
{
|
||||
bool progress = false;
|
||||
|
||||
gather_push_constants(shader, tu_shader);
|
||||
|
||||
nir_foreach_function(function, shader) {
|
||||
if (function->impl)
|
||||
progress |= lower_impl(function->impl, tu_shader, layout);
|
||||
|
|
Loading…
Reference in New Issue