freedreno/ir3: move const_state to ir3_shader

For a6xx, we construct/emit a single VS const state used for both
binning pass and draw pass.  So far we were mostly getting lucky that
there were not (obvious) mismatches between the const_state (like
different lowered immediates) between the binning and draw pass
VS ir3_shader_variant.

And I guess this situation will come up more as GS and tess is added
into the equation.

Since really everything about the const state is not specific to the
variant, move this.  The main exception is lowered immediates, but these
are the last to appear in the layout, and it doesn't hurt for each new
shader variant to just append any immed's it lowers to the end of the
immediate state.

Signed-off-by: Rob Clark <robdclark@chromium.org>
This commit is contained in:
Rob Clark 2019-05-07 06:38:01 -07:00
parent 5690f83bb5
commit b15c46e6bf
10 changed files with 33 additions and 30 deletions

View File

@ -217,7 +217,7 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var,
/* to calculate the byte offset (yes, uggg) we need (up to) three
* const values to know the bytes per pixel, and y and z stride:
*/
struct ir3_const_state *const_state = &ctx->so->const_state;
struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned cb = regid(const_state->offsets.image_dims, 0) +
const_state->image_dims.off[var->data.driver_location];

View File

@ -107,7 +107,7 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
{
/* first four vec4 sysval's reserved for UBOs: */
/* NOTE: dp is in scalar, but there can be >4 dp components: */
struct ir3_const_state *const_state = &ctx->so->const_state;
struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned n = const_state->offsets.driver_param;
unsigned r = regid(n + dp / 4, dp % 4);
return create_uniform(ctx->block, r);
@ -684,7 +684,7 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
/* UBO addresses are the first driver params, but subtract 2 here to
* account for nir_lower_uniforms_to_ubo rebasing the UBOs such that UBO 0
* is the uniforms: */
struct ir3_const_state *const_state = &ctx->so->const_state;
struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned ubo = regid(const_state->offsets.ubo, 0) - 2;
const unsigned ptrsz = ir3_pointer_size(ctx->compiler);
@ -753,7 +753,7 @@ emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst)
{
/* SSBO size stored as a const starting at ssbo_sizes: */
struct ir3_const_state *const_state = &ctx->so->const_state;
struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned blk_idx = nir_src_as_uint(intr->src[0]);
unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) +
const_state->ssbo_size.off[blk_idx];
@ -1009,7 +1009,7 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
* bytes-per-pixel should have been emitted in 2nd slot of
* image_dims. See ir3_shader::emit_image_dims().
*/
struct ir3_const_state *const_state = &ctx->so->const_state;
struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned cb = regid(const_state->offsets.image_dims, 0) +
const_state->image_dims.off[var->data.driver_location];
struct ir3_instruction *aux = create_uniform(b, cb + 1);
@ -2286,7 +2286,7 @@ emit_stream_out(struct ir3_context *ctx)
* stripped out in the backend.
*/
for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
struct ir3_const_state *const_state = &ctx->so->const_state;
struct ir3_const_state *const_state = &ctx->so->shader->const_state;
unsigned stride = strmout->stride[i];
struct ir3_instruction *base, *off;

View File

@ -101,8 +101,6 @@ ir3_context_init(struct ir3_compiler *compiler,
ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
ir3_setup_const_state(so);
return ctx;
}

View File

@ -298,7 +298,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags
}
/* Reallocate for 4 more elements whenever it's necessary */
struct ir3_const_state *const_state = &ctx->so->const_state;
struct ir3_const_state *const_state = &ctx->so->shader->const_state;
if (const_state->immediate_idx == const_state->immediates_size * 4) {
const_state->immediates_size += 4;
const_state->immediates = realloc (const_state->immediates,

View File

@ -32,6 +32,8 @@
#include "ir3_compiler.h"
#include "ir3_shader.h"
static void ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir);
static const nir_shader_compiler_options options = {
.lower_fpow = true,
.lower_scmp = true,
@ -274,6 +276,14 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
nir_sweep(s);
/* The first time thru, when not creating variant, do the one-time
* const_state layout setup. This should be done after ubo range
* analysis.
*/
if (!key) {
ir3_setup_const_state(shader, s);
}
return s;
}
@ -330,13 +340,11 @@ ir3_nir_scan_driver_consts(nir_shader *shader,
}
}
void
ir3_setup_const_state(struct ir3_shader_variant *v)
static void
ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir)
{
struct ir3_shader *shader = v->shader;
struct ir3_compiler *compiler = shader->compiler;
struct ir3_const_state *const_state = &v->const_state;
nir_shader *nir = shader->nir;
struct ir3_const_state *const_state = &shader->const_state;
memset(&const_state->offsets, ~0, sizeof(const_state->offsets));

View File

@ -50,6 +50,4 @@ bool ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader);
nir_ssa_def *
ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift);
void ir3_setup_const_state(struct ir3_shader_variant *v);
#endif /* IR3_NIR_H_ */

View File

@ -47,8 +47,6 @@ delete_variant(struct ir3_shader_variant *v)
ir3_destroy(v->ir);
if (v->bo)
fd_bo_del(v->bo);
if (v->const_state.immediates)
free(v->const_state.immediates);
free(v);
}
@ -262,6 +260,7 @@ ir3_shader_destroy(struct ir3_shader *shader)
v = v->next;
delete_variant(t);
}
free(shader->const_state.immediates);
ralloc_free(shader->nir);
free(shader);
}
@ -350,7 +349,7 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
(regid >> 2), "xyzw"[regid & 0x3], i);
}
struct ir3_const_state *const_state = &so->const_state;
struct ir3_const_state *const_state = &so->shader->const_state;
for (i = 0; i < const_state->immediates_count; i++) {
fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i);
fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",

View File

@ -393,7 +393,6 @@ struct ir3_shader_variant {
bool binning_pass;
struct ir3_shader_variant *binning;
struct ir3_const_state const_state;
struct ir3_info info;
struct ir3 *ir;
@ -539,6 +538,7 @@ struct ir3_shader {
struct ir3_compiler *compiler;
struct ir3_ubo_analysis_state ubo_state;
struct ir3_const_state const_state;
struct nir_shader *nir;
struct ir3_stream_output_info stream_output;

View File

@ -196,10 +196,10 @@ tu_shader_destroy(struct tu_device *dev,
for (uint32_t i = 0; i < 1 + shader->has_binning_pass; i++) {
if (shader->variants[i].ir)
ir3_destroy(shader->variants[i].ir);
if (shader->variants[i].const_state.immediates)
free(shader->variants[i].const_state.immediates);
}
if (shader->ir3_shader.const_state.immediates)
free(shader->ir3_shader.const_state.immediates);
if (shader->binary)
free(shader->binary);
if (shader->binning_binary)

View File

@ -241,7 +241,7 @@ emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v,
* the user consts early to avoid HLSQ lockup caused by
* writing too many consts
*/
const struct ir3_const_state *const_state = &v->const_state;
const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t max_const = MIN2(const_state->num_uniforms, v->constlen);
/* and even if the start of the const buffer is before
@ -281,7 +281,7 @@ static void
emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
{
const struct ir3_const_state *const_state = &v->const_state;
const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t offset = const_state->offsets.ubo;
if (v->constlen > offset) {
uint32_t params = const_state->num_ubos;
@ -311,7 +311,7 @@ static void
emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb)
{
const struct ir3_const_state *const_state = &v->const_state;
const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t offset = const_state->offsets.ssbo_sizes;
if (v->constlen > offset) {
uint32_t sizes[align(const_state->ssbo_size.count, 4)];
@ -333,7 +333,7 @@ static void
emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si)
{
const struct ir3_const_state *const_state = &v->const_state;
const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t offset = const_state->offsets.image_dims;
if (v->constlen > offset) {
uint32_t dims[align(const_state->image_dims.count, 4)];
@ -386,7 +386,7 @@ static void
emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
{
const struct ir3_const_state *const_state = &v->const_state;
const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t base = const_state->offsets.immediate;
int size = const_state->immediates_count;
@ -412,7 +412,7 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
{
/* streamout addresses after driver-params: */
const struct ir3_const_state *const_state = &v->const_state;
const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t offset = const_state->offsets.tfbo;
if (v->constlen > offset) {
struct fd_streamout_stateobj *so = &ctx->streamout;
@ -540,7 +540,7 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
/* emit driver params every time: */
/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
if (info) {
const struct ir3_const_state *const_state = &v->const_state;
const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t offset = const_state->offsets.driver_param;
if (v->constlen > offset) {
uint32_t vertex_params[IR3_DP_VS_COUNT] = {
@ -635,7 +635,7 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
/* emit compute-shader driver-params: */
const struct ir3_const_state *const_state = &v->const_state;
const struct ir3_const_state *const_state = &v->shader->const_state;
uint32_t offset = const_state->offsets.driver_param;
if (v->constlen > offset) {
ring_wfi(ctx->batch, ring);