iris: Enable nir_opt_large_constants
Shader-db results on Kaby Lake: total instructions in shared programs: 15306230 -> 15304726 (<.01%) instructions in affected programs: 4570 -> 3066 (-32.91%) helped: 16 HURT: 0 total cycles in shared programs: 361703436 -> 361680041 (<.01%) cycles in affected programs: 129388 -> 105993 (-18.08%) helped: 16 HURT: 0 LOST: 0 GAINED: 2 The helped programs were in XCom 2, Deus Ex: Mankind Divided, and Kerbal Space Program Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
9dc57eebd5
commit
e459d6d6df
|
@ -273,6 +273,12 @@ struct iris_uncompiled_shader {
|
|||
|
||||
/** Should we use ALT mode for math? Useful for ARB programs. */
|
||||
bool use_alt_mode;
|
||||
|
||||
/** Constant data scraped from the shader by nir_opt_large_constants */
|
||||
struct pipe_resource *const_data;
|
||||
|
||||
/** Surface state for const_data */
|
||||
struct iris_state_ref const_data_state;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -273,6 +273,14 @@ assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
|
|||
prog_data->binding_table.image_start = 0xd0d0d0d0;
|
||||
}
|
||||
|
||||
/* Allocate a slot in the UBO section for NIR constants if present.
|
||||
* We don't include them in iris_compiled_shader::num_cbufs because
|
||||
* they are uploaded separately from shs->constbuf[], but from a shader
|
||||
* point of view, they're another UBO (at the end of the section).
|
||||
*/
|
||||
if (nir->constant_data_size > 0)
|
||||
num_cbufs++;
|
||||
|
||||
if (num_cbufs) {
|
||||
//assert(info->num_ubos <= BRW_MAX_UBO);
|
||||
prog_data->binding_table.ubo_start = next_binding_table_offset;
|
||||
|
@ -361,6 +369,7 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
|
|||
|
||||
b.cursor = nir_before_block(nir_start_block(impl));
|
||||
nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
|
||||
nir_ssa_def *temp_const_ubo_name = NULL;
|
||||
|
||||
/* Turn system value intrinsics into uniforms */
|
||||
nir_foreach_block(block, impl) {
|
||||
|
@ -372,6 +381,34 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
|
|||
nir_ssa_def *offset;
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_constant: {
|
||||
/* This one is special because it reads from the shader constant
|
||||
* data and not cbuf0 which gallium uploads for us.
|
||||
*/
|
||||
b.cursor = nir_before_instr(instr);
|
||||
nir_ssa_def *offset =
|
||||
nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
|
||||
nir_intrinsic_base(intrin));
|
||||
|
||||
if (temp_const_ubo_name == NULL)
|
||||
temp_const_ubo_name = nir_imm_int(&b, 0);
|
||||
|
||||
nir_intrinsic_instr *load_ubo =
|
||||
nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
|
||||
load_ubo->num_components = intrin->num_components;
|
||||
load_ubo->src[0] = nir_src_for_ssa(temp_const_ubo_name);
|
||||
load_ubo->src[1] = nir_src_for_ssa(offset);
|
||||
nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
|
||||
intrin->dest.ssa.num_components,
|
||||
intrin->dest.ssa.bit_size,
|
||||
intrin->dest.ssa.name);
|
||||
nir_builder_instr_insert(&b, &load_ubo->instr);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
||||
nir_src_for_ssa(&load_ubo->dest.ssa));
|
||||
nir_instr_remove(&intrin->instr);
|
||||
continue;
|
||||
}
|
||||
case nir_intrinsic_load_user_clip_plane: {
|
||||
unsigned ucp = nir_intrinsic_ucp_id(intrin);
|
||||
|
||||
|
@ -529,6 +566,16 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
|
|||
if (num_cbufs || num_system_values || nir->num_uniforms)
|
||||
num_cbufs++;
|
||||
|
||||
/* Constant loads (if any) need to go at the end of the constant buffers so
|
||||
* we need to know num_cbufs before we can lower to them.
|
||||
*/
|
||||
if (temp_const_ubo_name != NULL) {
|
||||
nir_load_const_instr *const_ubo_index =
|
||||
nir_instr_as_load_const(temp_const_ubo_name->parent_instr);
|
||||
assert(const_ubo_index->def.bit_size == 32);
|
||||
const_ubo_index->value[0].u32 = num_cbufs;
|
||||
}
|
||||
|
||||
*out_system_values = system_values;
|
||||
*out_num_system_values = num_system_values;
|
||||
*out_num_cbufs = num_cbufs;
|
||||
|
@ -1514,6 +1561,7 @@ iris_create_uncompiled_shader(struct pipe_context *ctx,
|
|||
nir_shader *nir,
|
||||
const struct pipe_stream_output_info *so_info)
|
||||
{
|
||||
struct iris_context *ice = (void *)ctx;
|
||||
struct iris_screen *screen = (struct iris_screen *)ctx->screen;
|
||||
const struct gen_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
|
@ -1527,6 +1575,19 @@ iris_create_uncompiled_shader(struct pipe_context *ctx,
|
|||
NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
|
||||
NIR_PASS_V(nir, iris_lower_storage_image_derefs);
|
||||
|
||||
if (nir->constant_data_size > 0) {
|
||||
unsigned data_offset;
|
||||
u_upload_data(ice->shaders.uploader, 0, nir->constant_data_size,
|
||||
32, nir->constant_data, &data_offset, &ish->const_data);
|
||||
|
||||
struct pipe_shader_buffer psb = {
|
||||
.buffer = ish->const_data,
|
||||
.buffer_offset = data_offset,
|
||||
.buffer_size = nir->constant_data_size,
|
||||
};
|
||||
iris_upload_ubo_ssbo_surf_state(ice, &psb, &ish->const_data_state, false);
|
||||
}
|
||||
|
||||
ish->program_id = get_new_program_id(screen);
|
||||
ish->nir = nir;
|
||||
if (so_info) {
|
||||
|
@ -1771,6 +1832,11 @@ iris_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage
|
|||
ice->state.dirty |= IRIS_DIRTY_UNCOMPILED_VS << stage;
|
||||
}
|
||||
|
||||
if (ish->const_data) {
|
||||
pipe_resource_reference(&ish->const_data, NULL);
|
||||
pipe_resource_reference(&ish->const_data_state.res, NULL);
|
||||
}
|
||||
|
||||
ralloc_free(ish->nir);
|
||||
free(ish);
|
||||
}
|
||||
|
|
|
@ -647,6 +647,7 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
|
|||
screen->compiler->shader_debug_log = iris_shader_debug_log;
|
||||
screen->compiler->shader_perf_log = iris_shader_perf_log;
|
||||
screen->compiler->supports_pull_constants = false;
|
||||
screen->compiler->supports_shader_constants = true;
|
||||
|
||||
iris_disk_cache_init(screen);
|
||||
|
||||
|
|
|
@ -4120,6 +4120,7 @@ iris_populate_binding_table(struct iris_context *ice,
|
|||
bool pin_only)
|
||||
{
|
||||
const struct iris_binder *binder = &ice->state.binder;
|
||||
struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
|
||||
struct iris_compiled_shader *shader = ice->shaders.prog[stage];
|
||||
if (!shader)
|
||||
return;
|
||||
|
@ -4194,6 +4195,14 @@ iris_populate_binding_table(struct iris_context *ice,
|
|||
push_bt_entry(addr);
|
||||
}
|
||||
|
||||
if (ish->const_data) {
|
||||
iris_use_pinned_bo(batch, iris_resource_bo(ish->const_data), false);
|
||||
iris_use_pinned_bo(batch, iris_resource_bo(ish->const_data_state.res),
|
||||
false);
|
||||
uint32_t addr = ish->const_data_state.offset;
|
||||
push_bt_entry(addr);
|
||||
}
|
||||
|
||||
bt_assert(ssbo_start, info->num_abos + info->num_ssbos > 0);
|
||||
|
||||
/* XXX: st is wasting 16 binding table slots for ABOs. Should add a cap
|
||||
|
|
Loading…
Reference in New Issue