nir: Add a writemask to store intrinsics.

Tessellation control shaders need to be careful when writing outputs.
Because multiple threads can concurrently write the same output
variables, we need to only write the exact components we were told.

Traditionally, for sub-vector writes, we've read the whole vector,
updated the temporary, and written the whole vector back.  This breaks
down with concurrent access.

This patch prepares the way for a solution by adding a writemask field
to store_var intrinsics, as well as the other store intrinsics.  It then
updates all produces to emit a writemask of "all channels enabled".  It
updates nir_lower_io to copy the writemask to output store intrinsics.

Finally, it updates nir_lower_vars_to_ssa to handle partial writemasks
by doing a read-modify-write cycle (which is safe, because local
variables are specific to a single thread).

This should have no functional change, since no one actually emits
partial writemasks yet.

v2: Make nir_validate momentarily assert that writemasks cover the
    complete value - we shouldn't have partial writemasks yet
    (requested by Jason Ekstrand).

v3: Fix accidental SSBO change that arose from merge conflicts.

v4: Don't try to handle writemasks in ir3_compiler_nir - my code
    for indirects was likely wrong, and TTN doesn't generate partial
    writemasks today anyway.  Change them to asserts as requested by
    Rob Clark.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com> [v3]
This commit is contained in:
Kenneth Graunke 2015-11-17 00:26:37 -08:00
parent 50fc4a9256
commit 7d539080c1
12 changed files with 65 additions and 19 deletions

View File

@ -1903,6 +1903,7 @@ ttn_emit_instruction(struct ttn_compile *c)
&tgsi_dst->Indirect : NULL;
store->num_components = 4;
store->const_index[0] = 0xf;
store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
store->src[0] = nir_src_for_reg(dest.dest.reg.reg);

View File

@ -1321,6 +1321,10 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
case nir_deref_array_type_direct:
/* direct access does not require anything special: */
for (int i = 0; i < intr->num_components; i++) {
/* ttn doesn't generate partial writemasks */
assert(intr->const_index[0] ==
(1 << intr->num_components) - 1);
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
arr->arr[n] = src[i];
@ -1333,6 +1337,10 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction *addr =
get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
for (int i = 0; i < intr->num_components; i++) {
/* ttn doesn't generate partial writemasks */
assert(intr->const_index[0] ==
(1 << intr->num_components) - 1);
struct ir3_instruction *store;
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);

View File

@ -1067,6 +1067,7 @@ nir_visitor::visit(ir_call *ir)
nir_intrinsic_instr *store_instr =
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
store_instr->num_components = ir->return_deref->type->vector_elements;
store_instr->const_index[0] = (1 << store_instr->num_components) - 1;
store_instr->variables[0] =
evaluate_deref(&store_instr->instr, ir->return_deref);
@ -1165,6 +1166,7 @@ nir_visitor::visit(ir_assignment *ir)
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
store->num_components = ir->lhs->type->vector_elements;
store->const_index[0] = (1 << store->num_components) - 1;
nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
store->variables[0] = nir_deref_as_var(store_deref);
store->src[0] = nir_src_for_ssa(src);

View File

@ -310,13 +310,15 @@ nir_load_var(nir_builder *build, nir_variable *var)
}
static inline void
nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value)
nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,
unsigned writemask)
{
const unsigned num_components = glsl_get_vector_elements(var->type);
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
store->num_components = num_components;
store->const_index[0] = writemask;
store->variables[0] = nir_deref_var_create(store, var);
store->src[0] = nir_src_for_ssa(value);
nir_builder_instr_insert(build, &store->instr);

View File

@ -43,7 +43,7 @@
INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)
INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
/*
@ -302,10 +302,10 @@ LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
#define STORE(name, srcs, indices, flags) \
INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)
/* src[] = { value, offset }. const_index[] = { base } */
STORE(output, 2, 1, 0)
/* src[] = { value, vertex, offset }. const_index[] = { base } */
STORE(per_vertex_output, 3, 1, 0)
/* src[] = { value, offset }. const_index[] = { base, write_mask } */
STORE(output, 2, 2, 0)
/* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */
STORE(per_vertex_output, 3, 2, 0)
/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
STORE(ssbo, 3, 1, 0)
/* src[] = { value, offset }. const_index[] = { base, write_mask } */

View File

@ -99,7 +99,8 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
/* Increment the vertex count by 1 */
nir_store_var(b, state->vertex_count_var,
nir_iadd(b, count, nir_imm_int(b, 1)));
nir_iadd(b, count, nir_imm_int(b, 1)),
0x1); /* .x */
nir_instr_remove(&intrin->instr);

View File

@ -261,6 +261,9 @@ nir_lower_io_block(nir_block *block, void *void_state)
store->const_index[0] =
intrin->variables[0]->var->data.driver_location;
/* Copy the writemask */
store->const_index[1] = intrin->const_index[0];
if (per_vertex)
store->src[1] = nir_src_for_ssa(vertex_index);

View File

@ -243,7 +243,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
mov->dest.write_mask = (1 << intrin->num_components) - 1;
mov->dest.write_mask = intrin->const_index[0];
mov->dest.dest.is_ssa = false;
mov->dest.dest.reg.reg = reg_src.reg.reg;
mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;

View File

@ -128,6 +128,7 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
store->num_components = num_components;
store->const_index[0] = (1 << num_components) - 1;
store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));
store->src[0].is_ssa = true;

View File

@ -26,6 +26,7 @@
*/
#include "nir.h"
#include "nir_builder.h"
#include "nir_vla.h"
@ -590,6 +591,9 @@ add_phi_sources(nir_block *block, nir_block *pred,
static bool
rename_variables_block(nir_block *block, struct lower_variables_state *state)
{
nir_builder b;
nir_builder_init(&b, state->impl);
nir_foreach_instr_safe(block, instr) {
if (instr->type == nir_instr_type_phi) {
nir_phi_instr *phi = nir_instr_as_phi(instr);
@ -675,20 +679,40 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
assert(intrin->src[0].is_ssa);
nir_alu_instr *mov = nir_alu_instr_create(state->shader,
nir_op_imov);
mov->src[0].src.is_ssa = true;
mov->src[0].src.ssa = intrin->src[0].ssa;
for (unsigned i = intrin->num_components; i < 4; i++)
mov->src[0].swizzle[i] = 0;
nir_ssa_def *new_def;
b.cursor = nir_before_instr(&intrin->instr);
mov->dest.write_mask = (1 << intrin->num_components) - 1;
nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
intrin->num_components, NULL);
if (intrin->const_index[0] == (1 << intrin->num_components) - 1) {
/* Whole variable store - just copy the source. Note that
* intrin->num_components and intrin->src[0].ssa->num_components
* may differ.
*/
unsigned swiz[4];
for (unsigned i = 0; i < 4; i++)
swiz[i] = i < intrin->num_components ? i : 0;
nir_instr_insert_before(&intrin->instr, &mov->instr);
new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
intrin->num_components, false);
} else {
nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state);
/* For writemasked store_var intrinsics, we combine the newly
* written values with the existing contents of unwritten
* channels, creating a new SSA value for the whole vector.
*/
nir_ssa_def *srcs[4];
for (unsigned i = 0; i < intrin->num_components; i++) {
if (intrin->const_index[0] & (1 << i)) {
srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
} else {
srcs[i] = nir_channel(&b, old_def, i);
}
}
new_def = nir_vec(&b, srcs, intrin->num_components);
}
def_stack_push(node, &mov->dest.dest.ssa, state);
assert(new_def->num_components == intrin->num_components);
def_stack_push(node, new_def, state);
/* We'll wait to remove the instruction until the next pass
* where we pop the node we just pushed back off the stack.

View File

@ -417,6 +417,8 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
instr->variables[0]->var->data.mode != nir_var_uniform &&
instr->variables[0]->var->data.mode != nir_var_shader_storage);
/* Currently, writemasks must cover the entire value */
assert(instr->const_index[0] == (1 << instr->num_components) - 1);
break;
}
case nir_intrinsic_copy_var:

View File

@ -927,6 +927,7 @@ ptn_add_output_stores(struct ptn_compile *c)
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
store->num_components = glsl_get_vector_elements(var->type);
store->const_index[0] = (1 << store->num_components) - 1;
store->variables[0] =
nir_deref_var_create(store, c->output_vars[var->data.location]);
@ -997,6 +998,7 @@ setup_registers_and_variables(struct ptn_compile *c)
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
store->num_components = 4;
store->const_index[0] = WRITEMASK_XYZW;
store->variables[0] = nir_deref_var_create(store, fullvar);
store->src[0] = nir_src_for_ssa(f001);
nir_builder_instr_insert(b, &store->instr);