From 300f8dec76f846f2c49bafd3bd3d76b7ffc0923b Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 15 Jan 2020 14:44:26 +0100 Subject: [PATCH] aco: implement stream output with vec3 on GFX6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GFX6 doesn't support vec3. Signed-off-by: Samuel Pitoiset Reviewed-by: Daniel Schürmann Tested-by: Marge Bot Part-of: --- .../compiler/aco_instruction_selection.cpp | 90 +++++++++++-------- 1 file changed, 51 insertions(+), 39 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index aeec2f981e7..6d6d806a2d0 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7844,9 +7844,9 @@ static void emit_stream_output(isel_context *ctx, const struct radv_stream_output *output) { unsigned num_comps = util_bitcount(output->component_mask); + unsigned writemask = (1 << num_comps) - 1; unsigned loc = output->location; unsigned buf = output->buffer; - unsigned offset = output->offset; assert(num_comps && num_comps <= 4); if (!num_comps || num_comps > 4) @@ -7864,47 +7864,59 @@ static void emit_stream_output(isel_context *ctx, if (all_undef) return; - Temp write_data = {ctx->program->allocateId(), RegClass(RegType::vgpr, num_comps)}; - aco_ptr vec{create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, num_comps, 1)}; - for (unsigned i = 0; i < num_comps; ++i) - vec->operands[i] = (ctx->vs_output.mask[loc] & 1 << i) ? Operand(out[i]) : Operand(0u); - vec->definitions[0] = Definition(write_data); - ctx->block->instructions.emplace_back(std::move(vec)); + while (writemask) { + int start, count; + u_bit_scan_consecutive_range(&writemask, &start, &count); + if (count == 3 && ctx->options->chip_class == GFX6) { + /* GFX6 doesn't support storing vec3, split it. */ + writemask |= 1u << (start + 2); + count = 2; + } - aco_opcode opcode; - switch (num_comps) { - case 1: - opcode = aco_opcode::buffer_store_dword; - break; - case 2: - opcode = aco_opcode::buffer_store_dwordx2; - break; - case 3: - opcode = aco_opcode::buffer_store_dwordx3; - break; - case 4: - opcode = aco_opcode::buffer_store_dwordx4; - break; - } + unsigned offset = output->offset + start * 4; - aco_ptr store{create_instruction(opcode, Format::MUBUF, 4, 0)}; - store->operands[0] = Operand(so_write_offset[buf]); - store->operands[1] = Operand(so_buffers[buf]); - store->operands[2] = Operand((uint32_t) 0); - store->operands[3] = Operand(write_data); - if (offset > 4095) { - /* Don't think this can happen in RADV, but maybe GL? It's easy to do this anyway. */ - Builder bld(ctx->program, ctx->block); - store->operands[0] = bld.vadd32(bld.def(v1), Operand(offset), Operand(so_write_offset[buf])); - } else { - store->offset = offset; + Temp write_data = {ctx->program->allocateId(), RegClass(RegType::vgpr, count)}; + aco_ptr vec{create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, count, 1)}; + for (int i = 0; i < count; ++i) + vec->operands[i] = (ctx->vs_output.mask[loc] & 1 << (start + i)) ? Operand(out[start + i]) : Operand(0u); + vec->definitions[0] = Definition(write_data); + ctx->block->instructions.emplace_back(std::move(vec)); + + aco_opcode opcode; + switch (count) { + case 1: + opcode = aco_opcode::buffer_store_dword; + break; + case 2: + opcode = aco_opcode::buffer_store_dwordx2; + break; + case 3: + opcode = aco_opcode::buffer_store_dwordx3; + break; + case 4: + opcode = aco_opcode::buffer_store_dwordx4; + break; + } + + aco_ptr store{create_instruction(opcode, Format::MUBUF, 4, 0)}; + store->operands[0] = Operand(so_write_offset[buf]); + store->operands[1] = Operand(so_buffers[buf]); + store->operands[2] = Operand((uint32_t) 0); + store->operands[3] = Operand(write_data); + if (offset > 4095) { + /* Don't think this can happen in RADV, but maybe GL? It's easy to do this anyway. */ + Builder bld(ctx->program, ctx->block); + store->operands[0] = bld.vadd32(bld.def(v1), Operand(offset), Operand(so_write_offset[buf])); + } else { + store->offset = offset; + } + store->offen = true; + store->glc = true; + store->dlc = false; + store->slc = true; + store->can_reorder = true; + ctx->block->instructions.emplace_back(std::move(store)); } - store->offen = true; - store->glc = true; - store->dlc = false; - store->slc = true; - store->can_reorder = true; - ctx->block->instructions.emplace_back(std::move(store)); } static void emit_streamout(isel_context *ctx, unsigned stream)