aco: form sparse load clauses

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7775>
This commit is contained in:
Rhys Perry 2020-11-23 12:51:15 +00:00 committed by Marge Bot
parent 0bd14be962
commit a502aa7b04
5 changed files with 61 additions and 2 deletions

View File

@ -5887,6 +5887,11 @@ Operand emit_tfe_init(Builder& bld, Temp dst)
for (unsigned i = 0; i < dst.size(); i++)
vec->operands[i] = Operand(0u);
vec->definitions[0] = Definition(tmp);
/* Since this is fixed to an instruction's definition register, any CSE will
* just create copies. Copying costs about the same as zero-initialization,
* but these copies can break up clauses.
*/
vec->definitions[0].setNoCSE(true);
bld.insert(std::move(vec));
return Operand(tmp);

View File

@ -851,7 +851,7 @@ class Definition final
{
public:
constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0),
isKill_(0), isPrecise_(0), isNUW_(0) {}
isKill_(0), isPrecise_(0), isNUW_(0), isNoCSE_(0) {}
Definition(uint32_t index, RegClass type) noexcept
: temp(index, type) {}
explicit Definition(Temp tmp) noexcept
@ -959,6 +959,16 @@ public:
return isNUW_;
}
constexpr void setNoCSE(bool noCSE) noexcept
{
isNoCSE_ = noCSE;
}
constexpr bool isNoCSE() const noexcept
{
return isNoCSE_;
}
private:
Temp temp = Temp(0, s1);
PhysReg reg_;
@ -969,6 +979,7 @@ private:
uint8_t isKill_:1;
uint8_t isPrecise_:1;
uint8_t isNUW_:1;
uint8_t isNoCSE_:1;
};
/* can't initialize bit-fields in c++11, so work around using a union */
uint8_t control_ = 0;

View File

@ -383,7 +383,7 @@ void process_block(vn_ctx& ctx, Block& block)
instr->opcode == aco_opcode::p_demote_to_helper)
ctx.exec_id++;
if (instr->definitions.empty() || instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi) {
if (instr->definitions.empty() || is_phi(instr) || instr->definitions[0].isNoCSE()) {
new_instructions.emplace_back(std::move(instr));
continue;
}

View File

@ -192,6 +192,8 @@ static void print_definition(const Definition *definition, FILE *output)
fprintf(output, "(precise)");
if (definition->isNUW())
fprintf(output, "(nuw)");
if (definition->isNoCSE())
fprintf(output, "(noCSE)");
fprintf(output, "%%%d", definition->tempId());
if (definition->isFixed())

View File

@ -133,3 +133,44 @@ BEGIN_TEST(isel.gs.no_verts)
fprintf(output, "success\n");
}
END_TEST
BEGIN_TEST(isel.sparse.clause)
for (unsigned i = GFX10; i <= GFX10; i++) {
if (!set_variant((chip_class)i))
continue;
QoShaderModuleCreateInfo cs = qoShaderModuleCreateInfoGLSL(COMPUTE,
QO_EXTENSION GL_ARB_sparse_texture2 : require
layout(local_size_x=1) in;
layout(binding=0) uniform sampler2D tex;
layout(binding=0) buffer Buf {
vec4 res[4];
uint code[4];
};
void main() {
//>> v5: (noCSE)%zero0 = p_create_vector 0, 0, 0, 0, 0
//>> v5: %_ = image_sample_lz_o %_, %_, %_, %zero0 dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: (noCSE)%zero1 = p_create_vector 0, 0, 0, 0, 0
//>> v5: %_ = image_sample_lz_o %_, %_, %_, %zero1 dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: (noCSE)%zero2 = p_create_vector 0, 0, 0, 0, 0
//>> v5: %_ = image_sample_lz_o %_, %_, %_, %zero2 dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: (noCSE)%zero3 = p_create_vector 0, 0, 0, 0, 0
//>> v5: %_ = image_sample_lz_o %_, %_, %_, %zero3 dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> s_clause 0x3
//! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
//! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
//! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
//! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
code[0] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(1, 0), res[0]);
code[1] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(2, 0), res[1]);
code[2] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(3, 0), res[2]);
code[3] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(4, 0), res[3]);
}
);
PipelineBuilder pbld(get_vk_device((chip_class)i));
pbld.add_cs(cs);
pbld.print_ir(VK_SHADER_STAGE_COMPUTE_BIT, "ACO IR", true);
pbld.print_ir(VK_SHADER_STAGE_COMPUTE_BIT, "Assembly", true);
}
END_TEST