i965, anv: Use NIR FragCoord re-center and y-transform passes.
This handles gl_FragCoord transformations and other window system vs. user FBO coordinate system flipping by multiplying/adding uniform values, rather than recompiles. This is much better because we have no decent way to guess whether the application is going to use a shader with the window system FBO or a user FBO, much less the drawable height. This led to a lot of recompiles in many applications. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
6e5d86c07a
commit
dac10e8a13
|
@ -142,6 +142,11 @@ anv_shader_compile_to_nir(struct anv_device *device,
|
||||||
|
|
||||||
free(spec_entries);
|
free(spec_entries);
|
||||||
|
|
||||||
|
if (stage == MESA_SHADER_FRAGMENT) {
|
||||||
|
nir_lower_wpos_center(nir);
|
||||||
|
nir_validate_shader(nir);
|
||||||
|
}
|
||||||
|
|
||||||
nir_lower_returns(nir);
|
nir_lower_returns(nir);
|
||||||
nir_validate_shader(nir);
|
nir_validate_shader(nir);
|
||||||
|
|
||||||
|
|
|
@ -1105,7 +1105,6 @@ enum opcode {
|
||||||
FS_OPCODE_DDX_FINE,
|
FS_OPCODE_DDX_FINE,
|
||||||
/**
|
/**
|
||||||
* Compute dFdy(), dFdyCoarse(), or dFdyFine().
|
* Compute dFdy(), dFdyCoarse(), or dFdyFine().
|
||||||
* src1 is an immediate storing the key->render_to_fbo boolean.
|
|
||||||
*/
|
*/
|
||||||
FS_OPCODE_DDY_COARSE,
|
FS_OPCODE_DDY_COARSE,
|
||||||
FS_OPCODE_DDY_FINE,
|
FS_OPCODE_DDY_FINE,
|
||||||
|
|
|
@ -1060,37 +1060,18 @@ fs_visitor::import_uniforms(fs_visitor *v)
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg *
|
fs_reg *
|
||||||
fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
|
fs_visitor::emit_fragcoord_interpolation()
|
||||||
bool origin_upper_left)
|
|
||||||
{
|
{
|
||||||
assert(stage == MESA_SHADER_FRAGMENT);
|
assert(stage == MESA_SHADER_FRAGMENT);
|
||||||
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
|
|
||||||
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::vec4_type));
|
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::vec4_type));
|
||||||
fs_reg wpos = *reg;
|
fs_reg wpos = *reg;
|
||||||
bool flip = !origin_upper_left ^ key->render_to_fbo;
|
|
||||||
|
|
||||||
/* gl_FragCoord.x */
|
/* gl_FragCoord.x */
|
||||||
if (pixel_center_integer) {
|
bld.MOV(wpos, this->pixel_x);
|
||||||
bld.MOV(wpos, this->pixel_x);
|
|
||||||
} else {
|
|
||||||
bld.ADD(wpos, this->pixel_x, brw_imm_f(0.5f));
|
|
||||||
}
|
|
||||||
wpos = offset(wpos, bld, 1);
|
wpos = offset(wpos, bld, 1);
|
||||||
|
|
||||||
/* gl_FragCoord.y */
|
/* gl_FragCoord.y */
|
||||||
if (!flip && pixel_center_integer) {
|
bld.MOV(wpos, this->pixel_y);
|
||||||
bld.MOV(wpos, this->pixel_y);
|
|
||||||
} else {
|
|
||||||
fs_reg pixel_y = this->pixel_y;
|
|
||||||
float offset = (pixel_center_integer ? 0.0f : 0.5f);
|
|
||||||
|
|
||||||
if (flip) {
|
|
||||||
pixel_y.negate = true;
|
|
||||||
offset += key->drawable_height - 1.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
bld.ADD(wpos, pixel_y, brw_imm_f(offset));
|
|
||||||
}
|
|
||||||
wpos = offset(wpos, bld, 1);
|
wpos = offset(wpos, bld, 1);
|
||||||
|
|
||||||
/* gl_FragCoord.z */
|
/* gl_FragCoord.z */
|
||||||
|
|
|
@ -183,8 +183,7 @@ public:
|
||||||
|
|
||||||
void emit_dummy_fs();
|
void emit_dummy_fs();
|
||||||
void emit_repclear_shader();
|
void emit_repclear_shader();
|
||||||
fs_reg *emit_fragcoord_interpolation(bool pixel_center_integer,
|
fs_reg *emit_fragcoord_interpolation();
|
||||||
bool origin_upper_left);
|
|
||||||
fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp,
|
fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp,
|
||||||
glsl_interp_qualifier interpolation_mode,
|
glsl_interp_qualifier interpolation_mode,
|
||||||
bool is_centroid, bool is_sample);
|
bool is_centroid, bool is_sample);
|
||||||
|
@ -457,8 +456,7 @@ private:
|
||||||
struct brw_reg dst,
|
struct brw_reg dst,
|
||||||
struct brw_reg src);
|
struct brw_reg src);
|
||||||
void generate_ddx(enum opcode op, struct brw_reg dst, struct brw_reg src);
|
void generate_ddx(enum opcode op, struct brw_reg dst, struct brw_reg src);
|
||||||
void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg src,
|
void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg src);
|
||||||
bool negate_value);
|
|
||||||
void generate_scratch_write(fs_inst *inst, struct brw_reg src);
|
void generate_scratch_write(fs_inst *inst, struct brw_reg src);
|
||||||
void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
|
void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
|
||||||
void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
|
void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
|
||||||
|
|
|
@ -1111,9 +1111,10 @@ fs_generator::generate_ddx(enum opcode opcode,
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
fs_generator::generate_ddy(enum opcode opcode,
|
fs_generator::generate_ddy(enum opcode opcode,
|
||||||
struct brw_reg dst, struct brw_reg src,
|
struct brw_reg dst, struct brw_reg src)
|
||||||
bool negate_value)
|
|
||||||
{
|
{
|
||||||
|
bool negate_value = true;
|
||||||
|
|
||||||
if (opcode == FS_OPCODE_DDY_FINE) {
|
if (opcode == FS_OPCODE_DDY_FINE) {
|
||||||
/* From the Ivy Bridge PRM, volume 4 part 3, section 3.3.9 (Register
|
/* From the Ivy Bridge PRM, volume 4 part 3, section 3.3.9 (Register
|
||||||
* Region Restrictions):
|
* Region Restrictions):
|
||||||
|
@ -2154,8 +2155,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||||
break;
|
break;
|
||||||
case FS_OPCODE_DDY_COARSE:
|
case FS_OPCODE_DDY_COARSE:
|
||||||
case FS_OPCODE_DDY_FINE:
|
case FS_OPCODE_DDY_FINE:
|
||||||
assert(src[1].file == BRW_IMMEDIATE_VALUE);
|
generate_ddy(inst->opcode, dst, src[0]);
|
||||||
generate_ddy(inst->opcode, dst, src[0], src[1].ud);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
||||||
|
|
|
@ -62,8 +62,7 @@ fs_visitor::nir_setup_inputs()
|
||||||
|
|
||||||
fs_reg reg;
|
fs_reg reg;
|
||||||
if (var->data.location == VARYING_SLOT_POS) {
|
if (var->data.location == VARYING_SLOT_POS) {
|
||||||
reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
|
reg = *emit_fragcoord_interpolation();
|
||||||
var->data.origin_upper_left);
|
|
||||||
emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
|
emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
|
||||||
input, reg), 0xF);
|
input, reg), 0xF);
|
||||||
} else if (var->data.location == VARYING_SLOT_LAYER) {
|
} else if (var->data.location == VARYING_SLOT_LAYER) {
|
||||||
|
@ -879,22 +878,18 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
||||||
break;
|
break;
|
||||||
case nir_op_fddy:
|
case nir_op_fddy:
|
||||||
if (fs_key->high_quality_derivatives) {
|
if (fs_key->high_quality_derivatives) {
|
||||||
inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0],
|
inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0]);
|
||||||
brw_imm_d(fs_key->render_to_fbo));
|
|
||||||
} else {
|
} else {
|
||||||
inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0],
|
inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0]);
|
||||||
brw_imm_d(fs_key->render_to_fbo));
|
|
||||||
}
|
}
|
||||||
inst->saturate = instr->dest.saturate;
|
inst->saturate = instr->dest.saturate;
|
||||||
break;
|
break;
|
||||||
case nir_op_fddy_fine:
|
case nir_op_fddy_fine:
|
||||||
inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0],
|
inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0]);
|
||||||
brw_imm_d(fs_key->render_to_fbo));
|
|
||||||
inst->saturate = instr->dest.saturate;
|
inst->saturate = instr->dest.saturate;
|
||||||
break;
|
break;
|
||||||
case nir_op_fddy_coarse:
|
case nir_op_fddy_coarse:
|
||||||
inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0],
|
inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0]);
|
||||||
brw_imm_d(fs_key->render_to_fbo));
|
|
||||||
inst->saturate = instr->dest.saturate;
|
inst->saturate = instr->dest.saturate;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -3074,12 +3069,9 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
|
||||||
case nir_intrinsic_interp_var_at_offset: {
|
case nir_intrinsic_interp_var_at_offset: {
|
||||||
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
|
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
|
||||||
|
|
||||||
const bool flip = !wm_key->render_to_fbo;
|
|
||||||
|
|
||||||
if (const_offset) {
|
if (const_offset) {
|
||||||
unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf;
|
unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf;
|
||||||
unsigned off_y = MIN2((int)(const_offset->f32[1] * 16 *
|
unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf;
|
||||||
(flip ? -1 : 1)), 7) & 0xf;
|
|
||||||
|
|
||||||
emit_pixel_interpolater_send(bld,
|
emit_pixel_interpolater_send(bld,
|
||||||
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
|
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
|
||||||
|
@ -3096,7 +3088,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
|
||||||
bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f));
|
bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f));
|
||||||
fs_reg itemp = vgrf(glsl_type::int_type);
|
fs_reg itemp = vgrf(glsl_type::int_type);
|
||||||
/* float to int */
|
/* float to int */
|
||||||
bld.MOV(itemp, (i == 1 && flip) ? negate(temp) : temp);
|
bld.MOV(itemp, temp);
|
||||||
|
|
||||||
/* Clamp the upper end of the range to +7/16.
|
/* Clamp the upper end of the range to +7/16.
|
||||||
* ARB_gpu_shader5 requires that we support a maximum offset
|
* ARB_gpu_shader5 requires that we support a maximum offset
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include "compiler/nir/glsl_to_nir.h"
|
#include "compiler/nir/glsl_to_nir.h"
|
||||||
#include "compiler/nir/nir_builder.h"
|
#include "compiler/nir/nir_builder.h"
|
||||||
#include "program/prog_to_nir.h"
|
#include "program/prog_to_nir.h"
|
||||||
|
#include "program/prog_parameter.h"
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
is_input(nir_intrinsic_instr *intrin)
|
is_input(nir_intrinsic_instr *intrin)
|
||||||
|
@ -573,6 +574,18 @@ brw_create_nir(struct brw_context *brw,
|
||||||
|
|
||||||
nir = brw_preprocess_nir(brw->intelScreen->compiler, nir);
|
nir = brw_preprocess_nir(brw->intelScreen->compiler, nir);
|
||||||
|
|
||||||
|
if (stage == MESA_SHADER_FRAGMENT) {
|
||||||
|
static const struct nir_lower_wpos_ytransform_options wpos_options = {
|
||||||
|
.state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0},
|
||||||
|
.fs_coord_pixel_center_integer = 1,
|
||||||
|
.fs_coord_origin_upper_left = 1,
|
||||||
|
};
|
||||||
|
_mesa_add_state_reference(prog->Parameters,
|
||||||
|
(gl_state_index *) wpos_options.state_tokens);
|
||||||
|
|
||||||
|
OPT(nir_lower_wpos_ytransform, &wpos_options);
|
||||||
|
}
|
||||||
|
|
||||||
OPT(nir_lower_system_values);
|
OPT(nir_lower_system_values);
|
||||||
OPT_V(brw_nir_lower_uniforms, is_scalar);
|
OPT_V(brw_nir_lower_uniforms, is_scalar);
|
||||||
|
|
||||||
|
|
|
@ -157,17 +157,11 @@ brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
|
||||||
{
|
{
|
||||||
struct gl_program_parameter_list *plist = prog->Parameters;
|
struct gl_program_parameter_list *plist = prog->Parameters;
|
||||||
|
|
||||||
#ifndef NDEBUG
|
/* For ARB programs, prog_to_nir generates a single "parameters" variable
|
||||||
if (!shader->uniforms.is_empty()) {
|
* for all uniform data. nir_lower_wpos_ytransform may also create an
|
||||||
/* For ARB programs, only a single "parameters" variable is generated to
|
* additional variable.
|
||||||
* support uniform data.
|
*/
|
||||||
*/
|
assert(shader->uniforms.length() <= 2);
|
||||||
assert(shader->uniforms.length() == 1);
|
|
||||||
nir_variable *var = (nir_variable *) shader->uniforms.get_head();
|
|
||||||
assert(strcmp(var->name, "parameters") == 0);
|
|
||||||
assert(var->type->array_size() == (int)plist->NumParameters);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (unsigned p = 0; p < plist->NumParameters; p++) {
|
for (unsigned p = 0; p < plist->NumParameters; p++) {
|
||||||
/* Parameters should be either vec4 uniforms or single component
|
/* Parameters should be either vec4 uniforms or single component
|
||||||
|
|
Loading…
Reference in New Issue