diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index 5fe15cf8e27..30e452540bf 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -217,7 +217,7 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var, /* to calculate the byte offset (yes, uggg) we need (up to) three * const values to know the bytes per pixel, and y and z stride: */ - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned cb = regid(const_state->offsets.image_dims, 0) + const_state->image_dims.off[var->data.driver_location]; diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 3eb34f44b14..a35a1518398 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -107,7 +107,7 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) { /* first four vec4 sysval's reserved for UBOs: */ /* NOTE: dp is in scalar, but there can be >4 dp components: */ - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned n = const_state->offsets.driver_param; unsigned r = regid(n + dp / 4, dp % 4); return create_uniform(ctx->block, r); @@ -684,7 +684,7 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr, /* UBO addresses are the first driver params, but subtract 2 here to * account for nir_lower_uniforms_to_ubo rebasing the UBOs such that UBO 0 * is the uniforms: */ - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned ubo = regid(const_state->offsets.ubo, 0) - 2; const unsigned ptrsz = ir3_pointer_size(ctx->compiler); @@ -753,7 +753,7 @@ emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction **dst) { /* SSBO size stored as a const starting at ssbo_sizes: */ - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned blk_idx = nir_src_as_uint(intr->src[0]); unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) + const_state->ssbo_size.off[blk_idx]; @@ -1009,7 +1009,7 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, * bytes-per-pixel should have been emitted in 2nd slot of * image_dims. See ir3_shader::emit_image_dims(). */ - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned cb = regid(const_state->offsets.image_dims, 0) + const_state->image_dims.off[var->data.driver_location]; struct ir3_instruction *aux = create_uniform(b, cb + 1); @@ -2286,7 +2286,7 @@ emit_stream_out(struct ir3_context *ctx) * stripped out in the backend. */ for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) { - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; unsigned stride = strmout->stride[i]; struct ir3_instruction *base, *off; diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 5b95373b675..7cd87de0f29 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -101,8 +101,6 @@ ir3_context_init(struct ir3_compiler *compiler, ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); - ir3_setup_const_state(so); - return ctx; } diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index 5d46b19d6e5..dedbd8dbb1d 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -298,7 +298,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags } /* Reallocate for 4 more elements whenever it's necessary */ - struct ir3_const_state *const_state = &ctx->so->const_state; + struct ir3_const_state *const_state = &ctx->so->shader->const_state; if (const_state->immediate_idx == const_state->immediates_size * 4) { const_state->immediates_size += 4; const_state->immediates = realloc (const_state->immediates, diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index edb5490d664..c692274d8e3 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -32,6 +32,8 @@ #include "ir3_compiler.h" #include "ir3_shader.h" +static void ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir); + static const nir_shader_compiler_options options = { .lower_fpow = true, .lower_scmp = true, @@ -274,6 +276,14 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, nir_sweep(s); + /* The first time thru, when not creating variant, do the one-time + * const_state layout setup. This should be done after ubo range + * analysis. + */ + if (!key) { + ir3_setup_const_state(shader, s); + } + return s; } @@ -330,13 +340,11 @@ ir3_nir_scan_driver_consts(nir_shader *shader, } } -void -ir3_setup_const_state(struct ir3_shader_variant *v) +static void +ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir) { - struct ir3_shader *shader = v->shader; struct ir3_compiler *compiler = shader->compiler; - struct ir3_const_state *const_state = &v->const_state; - nir_shader *nir = shader->nir; + struct ir3_const_state *const_state = &shader->const_state; memset(&const_state->offsets, ~0, sizeof(const_state->offsets)); diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 6cfe27a56b2..84c09b073f0 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -50,6 +50,4 @@ bool ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader); nir_ssa_def * ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift); -void ir3_setup_const_state(struct ir3_shader_variant *v); - #endif /* IR3_NIR_H_ */ diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 29f7fa05f0c..dacccc1329e 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -47,8 +47,6 @@ delete_variant(struct ir3_shader_variant *v) ir3_destroy(v->ir); if (v->bo) fd_bo_del(v->bo); - if (v->const_state.immediates) - free(v->const_state.immediates); free(v); } @@ -262,6 +260,7 @@ ir3_shader_destroy(struct ir3_shader *shader) v = v->next; delete_variant(t); } + free(shader->const_state.immediates); ralloc_free(shader->nir); free(shader); } @@ -350,7 +349,7 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) (regid >> 2), "xyzw"[regid & 0x3], i); } - struct ir3_const_state *const_state = &so->const_state; + struct ir3_const_state *const_state = &so->shader->const_state; for (i = 0; i < const_state->immediates_count; i++) { fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i); fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n", diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index a4386d7762d..c13cf1df0bb 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -393,7 +393,6 @@ struct ir3_shader_variant { bool binning_pass; struct ir3_shader_variant *binning; - struct ir3_const_state const_state; struct ir3_info info; struct ir3 *ir; @@ -539,6 +538,7 @@ struct ir3_shader { struct ir3_compiler *compiler; struct ir3_ubo_analysis_state ubo_state; + struct ir3_const_state const_state; struct nir_shader *nir; struct ir3_stream_output_info stream_output; diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index 62f8f91c7f3..8d6ccecdd9c 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -196,10 +196,10 @@ tu_shader_destroy(struct tu_device *dev, for (uint32_t i = 0; i < 1 + shader->has_binning_pass; i++) { if (shader->variants[i].ir) ir3_destroy(shader->variants[i].ir); - if (shader->variants[i].const_state.immediates) - free(shader->variants[i].const_state.immediates); } + if (shader->ir3_shader.const_state.immediates) + free(shader->ir3_shader.const_state.immediates); if (shader->binary) free(shader->binary); if (shader->binning_binary) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 08a7c90aab3..e605e531ecb 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -241,7 +241,7 @@ emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v, * the user consts early to avoid HLSQ lockup caused by * writing too many consts */ - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t max_const = MIN2(const_state->num_uniforms, v->constlen); /* and even if the start of the const buffer is before @@ -281,7 +281,7 @@ static void emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) { - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.ubo; if (v->constlen > offset) { uint32_t params = const_state->num_ubos; @@ -311,7 +311,7 @@ static void emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb) { - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.ssbo_sizes; if (v->constlen > offset) { uint32_t sizes[align(const_state->ssbo_size.count, 4)]; @@ -333,7 +333,7 @@ static void emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si) { - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.image_dims; if (v->constlen > offset) { uint32_t dims[align(const_state->image_dims.count, 4)]; @@ -386,7 +386,7 @@ static void emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t base = const_state->offsets.immediate; int size = const_state->immediates_count; @@ -412,7 +412,7 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { /* streamout addresses after driver-params: */ - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.tfbo; if (v->constlen > offset) { struct fd_streamout_stateobj *so = &ctx->streamout; @@ -540,7 +540,7 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin /* emit driver params every time: */ /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */ if (info) { - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.driver_param; if (v->constlen > offset) { uint32_t vertex_params[IR3_DP_VS_COUNT] = { @@ -635,7 +635,7 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE); /* emit compute-shader driver-params: */ - const struct ir3_const_state *const_state = &v->const_state; + const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.driver_param; if (v->constlen > offset) { ring_wfi(ctx->batch, ring);