diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index b27253afd058d..7478ec14d0051 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2644,6 +2644,14 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) src, 0); dst[0]->dsts[0]->flags |= IR3_REG_SHARED; dst[0]->srcs[0]->flags |= IR3_REG_PREDICATE; + /* Work around a bug with half-register shared -> non-shared moves by + * adding an extra mov here so that the original destination stays full. + */ + if (src->dsts[0]->flags & IR3_REG_HALF) { + dst[0] = ir3_MOV(b, dst[0], TYPE_U32); + if (!ctx->compiler->has_scalar_alu) + dst[0]->dsts[0]->flags &= ~IR3_REG_SHARED; + } break; } @@ -2651,6 +2659,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0]; dst[0] = ir3_READ_FIRST_MACRO(ctx->block, src, 0); dst[0]->dsts[0]->flags |= IR3_REG_SHARED; + /* See above. */ + if (src->dsts[0]->flags & IR3_REG_HALF) { + dst[0] = ir3_MOV(b, dst[0], TYPE_U32); + if (!ctx->compiler->has_scalar_alu) + dst[0]->dsts[0]->flags &= ~IR3_REG_SHARED; + } break; } @@ -3586,7 +3600,12 @@ read_phi_src(struct ir3_context *ctx, struct ir3_block *blk, /* Create an ir3 undef */ return NULL; } else { - return ir3_get_src(ctx, &nsrc->src)[0]; + /* We need to insert the move at the end of the block */ + struct ir3_block *old_block = ctx->block; + ctx->block = blk; + struct ir3_instruction *src = ir3_get_src(ctx, &nsrc->src)[0]; + ctx->block = old_block; + return src; } } } diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 423bb1fbaa04e..65db920e42981 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -227,7 +227,7 @@ ir3_get_def(struct ir3_context *ctx, nir_def *def, unsigned n) } struct ir3_instruction *const * -ir3_get_src(struct ir3_context *ctx, nir_src *src) +ir3_get_src_maybe_shared(struct ir3_context *ctx, nir_src *src) { struct hash_entry *entry; entry = _mesa_hash_table_search(ctx->def_ht, src->ssa); @@ -235,24 +235,49 @@ ir3_get_src(struct ir3_context *ctx, nir_src *src) return entry->data; } +static struct ir3_instruction * +get_shared(struct ir3_block *block, struct ir3_instruction *src, bool shared) +{ + if (!!(src->dsts[0]->flags & IR3_REG_SHARED) != shared) { + struct ir3_instruction *mov = + ir3_MOV(block, src, (src->dsts[0]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32); + mov->dsts[0]->flags &= ~IR3_REG_SHARED; + mov->dsts[0]->flags |= COND(shared, IR3_REG_SHARED); + return mov; + } + + return src; +} + +struct ir3_instruction *const * +ir3_get_src_shared(struct ir3_context *ctx, nir_src *src, bool shared) +{ + unsigned num_components = nir_src_num_components(*src); + struct ir3_instruction *const *value = ir3_get_src_maybe_shared(ctx, src); + bool mismatch = false; + for (unsigned i = 0; i < nir_src_num_components(*src); i++) { + if (!!(value[i]->dsts[0]->flags & IR3_REG_SHARED) != shared) { + mismatch = true; + break; + } + } + + if (!mismatch) + return value; + + struct ir3_instruction **new_value = + ralloc_array(ctx, struct ir3_instruction *, num_components); + for (unsigned i = 0; i < num_components; i++) + new_value[i] = get_shared(ctx->block, value[i], shared); + + return new_value; +} + void ir3_put_def(struct ir3_context *ctx, nir_def *def) { unsigned bit_size = ir3_bitsize(ctx, def->bit_size); - /* add extra mov if dst value is shared reg.. in some cases not all - * instructions can read from shared regs, in cases where they can - * ir3_cp will clean up the extra mov: - */ - for (unsigned i = 0; i < ctx->last_dst_n; i++) { - if (!ctx->last_dst[i]) - continue; - if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) { - ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32); - ctx->last_dst[i]->dsts[0]->flags &= ~IR3_REG_SHARED; - } - } - if (bit_size <= 16) { for (unsigned i = 0; i < ctx->last_dst_n; i++) { struct ir3_instruction *dst = ctx->last_dst[i]; @@ -627,7 +652,8 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n, dst->array.id = arr->id; dst->array.offset = n; dst->array.base = INVALID_REG; - ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | flags)->def = src->dsts[0]; + ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | flags | + (src->dsts[0]->flags & IR3_REG_SHARED))->def = src->dsts[0]; if (arr->last_write && arr->last_write->instr->block == block) ir3_reg_set_last_array(mov, dst, arr->last_write); diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index c0274deeb92be..fbecb2c95e587 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -201,8 +201,17 @@ struct ir3_instruction **ir3_get_dst_ssa(struct ir3_context *ctx, nir_def *dst, unsigned n); struct ir3_instruction **ir3_get_def(struct ir3_context *ctx, nir_def *def, unsigned n); -struct ir3_instruction *const *ir3_get_src(struct ir3_context *ctx, - nir_src *src); +struct ir3_instruction *const *ir3_get_src_maybe_shared(struct ir3_context *ctx, + nir_src *src); +struct ir3_instruction *const *ir3_get_src_shared(struct ir3_context *ctx, + nir_src *src, bool shared); + +static inline struct ir3_instruction *const * +ir3_get_src(struct ir3_context *ctx, nir_src *src) +{ + return ir3_get_src_shared(ctx, src, false); +} + void ir3_put_def(struct ir3_context *ctx, nir_def *def); struct ir3_instruction *ir3_create_collect(struct ir3_block *block, struct ir3_instruction *const *arr,