diff --git a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp index ddeb3a9e0e6..92003070d7c 100644 --- a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp @@ -100,6 +100,11 @@ bool EmitSSBOInstruction::do_emit(nir_instr* instr) return emit_image_size(intr); case nir_intrinsic_get_ssbo_size: return emit_buffer_size(intr); + case nir_intrinsic_memory_barrier: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_group_memory_barrier: + return make_stores_ack_and_waitack(); default: return false; } @@ -352,17 +357,21 @@ bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr) auto values = vec_from_nir_with_fetch_constant(instr->src[0], (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true); - emit_instruction(new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED, - values, addr_vec, m_ssbo_image_offset, rat_id, 1, - 1, 0, false)); + auto store = new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED, + values, addr_vec, m_ssbo_image_offset, rat_id, 1, + 1, 0, false); + emit_instruction(store); + m_store_ops.push_back(store); for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) { emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), write)); emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0), {addr_vec.reg_i(0), Value::one_i}, last_write)); - emit_instruction(new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED, - temp2, addr_vec, 0, rat_id, 1, - 1, 0, false)); + store = new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED, + temp2, addr_vec, 0, rat_id, 1, + 1, 0, false); + emit_instruction(store); + m_store_ops.push_back(store); } #endif return true; @@ -392,6 +401,8 @@ EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin) auto store = new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED, value, coord, imageid, image_offset, 1, 0xf, 0, false); + + m_store_ops.push_back(store); emit_instruction(store); return true; } @@ -617,6 +628,19 @@ bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr) return true; } +bool EmitSSBOInstruction::make_stores_ack_and_waitack() +{ + for (auto&& store: m_store_ops) + store->set_ack(); + + if (!m_store_ops.empty()) + emit_instruction(new WaitAck(0)); + + m_store_ops.clear(); + + return true; +} + GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir) { GPRVector::Values v; diff --git a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h index f289c5d5a50..56e0e31f112 100644 --- a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h +++ b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h @@ -37,6 +37,8 @@ private: bool fetch_return_value(const nir_intrinsic_instr *intrin); + bool make_stores_ack_and_waitack(); + ESDOp get_opcode(nir_intrinsic_op opcode); RatInstruction::ERatOp get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const; @@ -47,6 +49,7 @@ private: bool m_require_rat_return_address; GPRVector m_rat_return_address; int m_ssbo_image_offset; + std::vector m_store_ops; }; } diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h b/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h index 2a3d4089c9a..46d4280fa9e 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h @@ -168,6 +168,8 @@ public: int data_swz(int chan) const {return m_data.chan_i(chan);} + void set_ack() {m_need_ack = true; } + private: bool is_equal_to(const Instruction& lhs) const override; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp index 60e4bc2ed57..57996162b25 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp @@ -678,8 +678,8 @@ bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* ins case nir_intrinsic_control_barrier: case nir_intrinsic_memory_barrier_tcs_patch: case nir_intrinsic_memory_barrier_shared: - case nir_intrinsic_memory_barrier: case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier: case nir_intrinsic_memory_barrier_image: case nir_intrinsic_group_memory_barrier: return emit_barrier(instr);