nir: Use writemasked store_vars in glsl_to_nir.
Instead of performing the read-modify-write cycle in glsl->nir, we can simply emit a partial writemask. For locals, nir_lower_vars_to_ssa will do the equivalent read-modify-write cycle for us, so we continue to get the same SSA values we had before. Because glsl_to_nir calls nir_lower_outputs_to_temporaries, all outputs are shadowed with temporary values, and written out as whole vectors at the end of the shader. So, most consumers will still not see partial writemasks. However, nir_lower_outputs_to_temporaries bails for tessellation control shader outputs. So those remain actual variables, and stores to those variables now get a writemask. nir_lower_io passes that through. This means that TCS outputs should actually work now. This is a functional change for tessellation control shaders. v2: Relax the nir_validate assert to allow partial writemasks. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
This commit is contained in:
parent
7d539080c1
commit
0daf51e130
|
@ -1129,44 +1129,23 @@ nir_visitor::visit(ir_assignment *ir)
|
|||
nir_ssa_def *src = evaluate_rvalue(ir->rhs);
|
||||
|
||||
if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) {
|
||||
/*
|
||||
* We have no good way to update only part of a variable, so just load
|
||||
* the LHS and do a vec operation to combine the old with the new, and
|
||||
* then store it
|
||||
* back into the LHS. Copy propagation should get rid of the mess.
|
||||
/* GLSL IR will give us the input to the write-masked assignment in a
|
||||
* single packed vector. So, for example, if the writemask is xzw, then
|
||||
* we have to swizzle x -> x, y -> z, and z -> w and get the y component
|
||||
* from the load.
|
||||
*/
|
||||
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
|
||||
load->num_components = ir->lhs->type->vector_elements;
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
|
||||
load->variables[0] = lhs_deref;
|
||||
ralloc_steal(load, load->variables[0]);
|
||||
nir_builder_instr_insert(&b, &load->instr);
|
||||
|
||||
nir_ssa_def *srcs[4];
|
||||
|
||||
unsigned swiz[4];
|
||||
unsigned component = 0;
|
||||
for (unsigned i = 0; i < ir->lhs->type->vector_elements; i++) {
|
||||
if (ir->write_mask & (1 << i)) {
|
||||
/* GLSL IR will give us the input to the write-masked assignment
|
||||
* in a single packed vector. So, for example, if the
|
||||
* writemask is xzw, then we have to swizzle x -> x, y -> z,
|
||||
* and z -> w and get the y component from the load.
|
||||
*/
|
||||
srcs[i] = nir_channel(&b, src, component++);
|
||||
} else {
|
||||
srcs[i] = nir_channel(&b, &load->dest.ssa, i);
|
||||
}
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
swiz[i] = ir->write_mask & (1 << i) ? component++ : 0;
|
||||
}
|
||||
|
||||
src = nir_vec(&b, srcs, ir->lhs->type->vector_elements);
|
||||
src = nir_swizzle(&b, src, swiz, num_components, !supports_ints);
|
||||
}
|
||||
|
||||
nir_intrinsic_instr *store =
|
||||
nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
|
||||
store->num_components = ir->lhs->type->vector_elements;
|
||||
store->const_index[0] = (1 << store->num_components) - 1;
|
||||
store->const_index[0] = ir->write_mask;
|
||||
nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
|
||||
store->variables[0] = nir_deref_as_var(store_deref);
|
||||
store->src[0] = nir_src_for_ssa(src);
|
||||
|
|
|
@ -417,8 +417,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
|
|||
assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
|
||||
instr->variables[0]->var->data.mode != nir_var_uniform &&
|
||||
instr->variables[0]->var->data.mode != nir_var_shader_storage);
|
||||
/* Currently, writemasks must cover the entire value */
|
||||
assert(instr->const_index[0] == (1 << instr->num_components) - 1);
|
||||
assert((instr->const_index[0] & ~((1 << instr->num_components) - 1)) == 0);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_copy_var:
|
||||
|
|
Loading…
Reference in New Issue