From b15c46e6bf4cd375ae0b580bd1a0ec139e8dd3ef Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 7 May 2019 06:38:01 -0700 Subject: [PATCH] freedreno/ir3: move const_state to ir3_shader For a6xx, we construct/emit a single VS const state used for both binning pass and draw pass. So far we were mostly getting lucky that there were not (obvious) mismatches between the const_state (like different lowered immediates) between the binning and draw pass VS ir3_shader_variant. And I guess this situation will come up more as GS and tess is added into the equation. Since really everything about the const state is not specific to the variant, move this. The main exception is lowered immediates, but these are the last to appear in the layout, and it doesn't hurt for each new shader variant to just append any immed's it lowers to the end of the immediate state. Signed-off-by: Rob Clark --- src/freedreno/ir3/ir3_a4xx.c | 2 +- src/freedreno/ir3/ir3_compiler_nir.c | 10 +++++----- src/freedreno/ir3/ir3_context.c | 2 -- src/freedreno/ir3/ir3_cp.c | 2 +- src/freedreno/ir3/ir3_nir.c | 18 +++++++++++++----- src/freedreno/ir3/ir3_nir.h | 2 -- src/freedreno/ir3/ir3_shader.c | 5 ++--- src/freedreno/ir3/ir3_shader.h | 2 +- src/freedreno/vulkan/tu_shader.c | 4 ++-- .../drivers/freedreno/ir3/ir3_gallium.c | 16 ++++++++-------- 10 files changed, 33 insertions(+), 30 deletions(-) diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index 5fe15cf8e27..30e452540bf 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -217,7 +217,7 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var, /* to calculate the byte offset (yes, uggg) we need (up to) three * const values to know the bytes per pixel, and y and z stride: */ - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned cb = regid(const_state->offsets.image_dims, 0) + const_state->image_dims.off[var->data.driver_location]; diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 3eb34f44b14..a35a1518398 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -107,7 +107,7 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) { /* first four vec4 sysval's reserved for UBOs: */ /* NOTE: dp is in scalar, but there can be >4 dp components: */ - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned n = const_state->offsets.driver_param; unsigned r = regid(n + dp / 4, dp % 4); return create_uniform(ctx->block, r); @@ -684,7 +684,7 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr, /* UBO addresses are the first driver params, but subtract 2 here to * account for nir_lower_uniforms_to_ubo rebasing the UBOs such that UBO 0 * is the uniforms: */ - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned ubo = regid(const_state->offsets.ubo, 0) - 2; const unsigned ptrsz = ir3_pointer_size(ctx->compiler); @@ -753,7 +753,7 @@ emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction **dst) { /* SSBO size stored as a const starting at ssbo_sizes: */ - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned blk_idx = nir_src_as_uint(intr->src[0]); unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) + const_state->ssbo_size.off[blk_idx]; @@ -1009,7 +1009,7 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, * bytes-per-pixel should have been emitted in 2nd slot of * image_dims. See ir3_shader::emit_image_dims(). */ - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned cb = regid(const_state->offsets.image_dims, 0) + const_state->image_dims.off[var->data.driver_location]; struct ir3_instruction *aux = create_uniform(b, cb + 1); @@ -2286,7 +2286,7 @@ emit_stream_out(struct ir3_context *ctx) * stripped out in the backend. */ for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) { - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned stride = strmout->stride[i]; struct ir3_instruction *base, *off; diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 5b95373b675..7cd87de0f29 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -101,8 +101,6 @@ ir3_context_init(struct ir3_compiler *compiler, ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); - ir3_setup_const_state(so); - return ctx; } diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index 5d46b19d6e5..dedbd8dbb1d 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -298,7 +298,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags } /* Reallocate for 4 more elements whenever it's necessary */ - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; if (const_state->immediate_idx == const_state->immediates_size * 4) { const_state->immediates_size += 4; const_state->immediates = realloc (const_state->immediates, diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index edb5490d664..c692274d8e3 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -32,6 +32,8 @@ #include "ir3_compiler.h" #include "ir3_shader.h" +static void ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir); + static const nir_shader_compiler_options options = { .lower_fpow = true, .lower_scmp = true, @@ -274,6 +276,14 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, nir_sweep(s); + /* The first time thru, when not creating variant, do the one-time + * const_state layout setup. This should be done after ubo range + * analysis. + */ + if (!key) { + ir3_setup_const_state(shader, s); + } + return s; } @@ -330,13 +340,11 @@ ir3_nir_scan_driver_consts(nir_shader *shader, } } -void -ir3_setup_const_state(struct ir3_shader_variant *v) +static void +ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir) { - struct ir3_shader *shader = v->shader; struct ir3_compiler *compiler = shader->compiler; - struct ir3_const_state *const_state = &v->const_state; - nir_shader *nir = shader->nir; + struct ir3_const_state *const_state = &shader->const_state; memset(&const_state->offsets, ~0, sizeof(const_state->offsets)); diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 6cfe27a56b2..84c09b073f0 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -50,6 +50,4 @@ bool ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader); nir_ssa_def * ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift); -void ir3_setup_const_state(struct ir3_shader_variant *v); - #endif /* IR3_NIR_H_ */ diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 29f7fa05f0c..dacccc1329e 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -47,8 +47,6 @@ delete_variant(struct ir3_shader_variant *v) ir3_destroy(v->ir); if (v->bo) fd_bo_del(v->bo); - if (v->const_state.immediates) - free(v->const_state.immediates); free(v); } @@ -262,6 +260,7 @@ ir3_shader_destroy(struct ir3_shader *shader) v = v->next; delete_variant(t); } + free(shader->const_state.immediates); ralloc_free(shader->nir); free(shader); } @@ -350,7 +349,7 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) (regid >> 2), "xyzw"[regid & 0x3], i); } - struct ir3_const_state *const_state = &so->const_state; + struct ir3_const_state *const_state = &so->shader->const_state; for (i = 0; i < const_state->immediates_count; i++) { fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i); fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n", diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index a4386d7762d..c13cf1df0bb 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -393,7 +393,6 @@ struct ir3_shader_variant { bool binning_pass; struct ir3_shader_variant *binning; - struct ir3_const_state const_state; struct ir3_info info; struct ir3 *ir; @@ -539,6 +538,7 @@ struct ir3_shader { struct ir3_compiler *compiler; struct ir3_ubo_analysis_state ubo_state; + struct ir3_const_state const_state; struct nir_shader *nir; struct ir3_stream_output_info stream_output; diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index 62f8f91c7f3..8d6ccecdd9c 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -196,10 +196,10 @@ tu_shader_destroy(struct tu_device *dev, for (uint32_t i = 0; i < 1 + shader->has_binning_pass; i++) { if (shader->variants[i].ir) ir3_destroy(shader->variants[i].ir); - if (shader->variants[i].const_state.immediates) - free(shader->variants[i].const_state.immediates); } + if (shader->ir3_shader.const_state.immediates) + free(shader->ir3_shader.const_state.immediates); if (shader->binary) free(shader->binary); if (shader->binning_binary) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 08a7c90aab3..e605e531ecb 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -241,7 +241,7 @@ emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v, * the user consts early to avoid HLSQ lockup caused by * writing too many consts */ - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t max_const = MIN2(const_state->num_uniforms, v->constlen); /* and even if the start of the const buffer is before @@ -281,7 +281,7 @@ static void emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) { - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.ubo; if (v->constlen > offset) { uint32_t params = const_state->num_ubos; @@ -311,7 +311,7 @@ static void emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb) { - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.ssbo_sizes; if (v->constlen > offset) { uint32_t sizes[align(const_state->ssbo_size.count, 4)]; @@ -333,7 +333,7 @@ static void emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si) { - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.image_dims; if (v->constlen > offset) { uint32_t dims[align(const_state->image_dims.count, 4)]; @@ -386,7 +386,7 @@ static void emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t base = const_state->offsets.immediate; int size = const_state->immediates_count; @@ -412,7 +412,7 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { /* streamout addresses after driver-params: */ - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.tfbo; if (v->constlen > offset) { struct fd_streamout_stateobj *so = &ctx->streamout; @@ -540,7 +540,7 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin /* emit driver params every time: */ /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */ if (info) { - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.driver_param; if (v->constlen > offset) { uint32_t vertex_params[IR3_DP_VS_COUNT] = { @@ -635,7 +635,7 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE); /* emit compute-shader driver-params: */ - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.driver_param; if (v->constlen > offset) { ring_wfi(ctx->batch, ring);