r300: Request that nir-to-tgsi avoid generating TGSI_OPCODE_CMP.
Given that our fcsels are on float-bools, we can emit the LRP directly and save the backend having to emit a SLT to turn the CMP src[0] into a bool. This required passing a codegen flags struct for nir-to-tgsi. I think this is a good way forward for it, as the alternative I think has mostly been adding flags to nir_shader_compiler_options (since adding PIPE_SHADER_CAPs is an unreasonable amount of pain). r300 shader-db: total instructions in shared programs: 1484320 -> 1472463 (-0.80%) instructions in affected programs: 243588 -> 231731 (-4.87%) total temps in shared programs: 212485 -> 212143 (-0.16%) temps in affected programs: 3845 -> 3503 (-8.89%) Acked-by: Pavel Ondračka <pavel.ondracka@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14886>
This commit is contained in:
parent
4a1ba7914a
commit
d52d500f17
|
@ -37,6 +37,7 @@
|
||||||
struct ntt_compile {
|
struct ntt_compile {
|
||||||
nir_shader *s;
|
nir_shader *s;
|
||||||
nir_function_impl *impl;
|
nir_function_impl *impl;
|
||||||
|
const struct nir_to_tgsi_options *options;
|
||||||
struct pipe_screen *screen;
|
struct pipe_screen *screen;
|
||||||
struct ureg_program *ureg;
|
struct ureg_program *ureg;
|
||||||
|
|
||||||
|
@ -1156,15 +1157,26 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_fcsel:
|
case nir_op_fcsel:
|
||||||
/* NIR is src0 != 0 ? src1 : src2.
|
/* NIR fcsel is src0 != 0 ? src1 : src2.
|
||||||
* TGSI is src0 < 0 ? src1 : src2.
|
* TGSI CMP is src0 < 0 ? src1 : src2.
|
||||||
*
|
*
|
||||||
* However, fcsel so far as I can find only appears on bools-as-floats
|
* However, fcsel so far as I can find only appears on bools-as-floats
|
||||||
* (1.0 or 0.0), so we can just negate it for the TGSI op. It's
|
* (1.0 or 0.0), so we can just negate it for the TGSI op. It's
|
||||||
* important to not have an abs here, as i915g has to make extra
|
* important to not have an abs here, as i915g has to make extra
|
||||||
* instructions to do the abs.
|
* instructions to do the abs.
|
||||||
*/
|
*/
|
||||||
ureg_CMP(c->ureg, dst, ureg_negate(src[0]), src[1], src[2]);
|
if (c->options->lower_cmp) {
|
||||||
|
/* If the HW doesn't support TGSI CMP (r300 VS), then lower it to a
|
||||||
|
* LRP on the boolean 1.0/0.0 value, instead of requiring the
|
||||||
|
* backend to turn the src0 into 1.0/0.0 first.
|
||||||
|
*
|
||||||
|
* We don't use this in general because some hardware (i915 FS) the
|
||||||
|
* LRP gets expanded to MUL/MAD.
|
||||||
|
*/
|
||||||
|
ureg_LRP(c->ureg, dst, src[0], src[1], src[2]);
|
||||||
|
} else {
|
||||||
|
ureg_CMP(c->ureg, dst, ureg_negate(src[0]), src[1], src[2]);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* It would be nice if we could get this left as scalar in NIR, since
|
/* It would be nice if we could get this left as scalar in NIR, since
|
||||||
|
@ -2906,7 +2918,8 @@ nir_to_tgsi_lower_tex(nir_shader *s)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
|
ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s,
|
||||||
|
const struct nir_to_tgsi_options *ntt_options)
|
||||||
{
|
{
|
||||||
const struct nir_shader_compiler_options *options = s->options;
|
const struct nir_shader_compiler_options *options = s->options;
|
||||||
bool lower_fsqrt =
|
bool lower_fsqrt =
|
||||||
|
@ -3051,6 +3064,14 @@ nir_lower_primid_sysval_to_input(nir_shader *s)
|
||||||
nir_lower_primid_sysval_to_input_lower, &input);
|
nir_lower_primid_sysval_to_input_lower, &input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const void *
|
||||||
|
nir_to_tgsi(struct nir_shader *s,
|
||||||
|
struct pipe_screen *screen)
|
||||||
|
{
|
||||||
|
static const struct nir_to_tgsi_options default_ntt_options = {0};
|
||||||
|
return nir_to_tgsi_options(s, screen, &default_ntt_options);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Translates the NIR shader to TGSI.
|
* Translates the NIR shader to TGSI.
|
||||||
*
|
*
|
||||||
|
@ -3058,9 +3079,9 @@ nir_lower_primid_sysval_to_input(nir_shader *s)
|
||||||
* We take ownership of the NIR shader passed, returning a reference to the new
|
* We take ownership of the NIR shader passed, returning a reference to the new
|
||||||
* TGSI tokens instead. If you need to keep the NIR, then pass us a clone.
|
* TGSI tokens instead. If you need to keep the NIR, then pass us a clone.
|
||||||
*/
|
*/
|
||||||
const void *
|
const void *nir_to_tgsi_options(struct nir_shader *s,
|
||||||
nir_to_tgsi(struct nir_shader *s,
|
struct pipe_screen *screen,
|
||||||
struct pipe_screen *screen)
|
const struct nir_to_tgsi_options *options)
|
||||||
{
|
{
|
||||||
struct ntt_compile *c;
|
struct ntt_compile *c;
|
||||||
const void *tgsi_tokens;
|
const void *tgsi_tokens;
|
||||||
|
@ -3070,7 +3091,7 @@ nir_to_tgsi(struct nir_shader *s,
|
||||||
PIPE_SHADER_CAP_INTEGERS);
|
PIPE_SHADER_CAP_INTEGERS);
|
||||||
const struct nir_shader_compiler_options *original_options = s->options;
|
const struct nir_shader_compiler_options *original_options = s->options;
|
||||||
|
|
||||||
ntt_fix_nir_options(screen, s);
|
ntt_fix_nir_options(screen, s, options);
|
||||||
|
|
||||||
NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
|
NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
|
||||||
type_size, (nir_lower_io_options)0);
|
type_size, (nir_lower_io_options)0);
|
||||||
|
@ -3157,6 +3178,7 @@ nir_to_tgsi(struct nir_shader *s,
|
||||||
|
|
||||||
c = rzalloc(NULL, struct ntt_compile);
|
c = rzalloc(NULL, struct ntt_compile);
|
||||||
c->screen = screen;
|
c->screen = screen;
|
||||||
|
c->options = options;
|
||||||
|
|
||||||
c->needs_texcoord_semantic =
|
c->needs_texcoord_semantic =
|
||||||
screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD);
|
screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD);
|
||||||
|
|
|
@ -24,12 +24,22 @@
|
||||||
#ifndef NIR_TO_TGSI_H
|
#ifndef NIR_TO_TGSI_H
|
||||||
#define NIR_TO_TGSI_H
|
#define NIR_TO_TGSI_H
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
struct nir_shader;
|
struct nir_shader;
|
||||||
struct pipe_screen;
|
struct pipe_screen;
|
||||||
|
|
||||||
|
struct nir_to_tgsi_options {
|
||||||
|
bool lower_cmp;
|
||||||
|
};
|
||||||
|
|
||||||
const void *nir_to_tgsi(struct nir_shader *s,
|
const void *nir_to_tgsi(struct nir_shader *s,
|
||||||
struct pipe_screen *screen);
|
struct pipe_screen *screen);
|
||||||
|
|
||||||
|
const void *nir_to_tgsi_options(struct nir_shader *s,
|
||||||
|
struct pipe_screen *screen,
|
||||||
|
const struct nir_to_tgsi_options *ntt_options);
|
||||||
|
|
||||||
const void *
|
const void *
|
||||||
nir_to_tgsi_get_compiler_options(struct pipe_screen *pscreen,
|
nir_to_tgsi_get_compiler_options(struct pipe_screen *pscreen,
|
||||||
enum pipe_shader_ir ir,
|
enum pipe_shader_ir ir,
|
||||||
|
|
|
@ -48,12 +48,6 @@ dEQP-GLES2.functional.rasterization.primitives.line_loop_wide,Fail
|
||||||
dEQP-GLES2.functional.shaders.functions.control_flow.mixed_return_break_continue_vertex,Fail
|
dEQP-GLES2.functional.shaders.functions.control_flow.mixed_return_break_continue_vertex,Fail
|
||||||
dEQP-GLES2.functional.shaders.functions.control_flow.mixed_return_break_continue_fragment,Fail
|
dEQP-GLES2.functional.shaders.functions.control_flow.mixed_return_break_continue_fragment,Fail
|
||||||
|
|
||||||
# "No free temporary to use for predicate stack counter."
|
|
||||||
dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_loop_read_vertex,Fail
|
|
||||||
dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_loop_read_vertex,Fail
|
|
||||||
dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_loop_read_vertex,Fail
|
|
||||||
dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_loop_read_vertex,Fail
|
|
||||||
|
|
||||||
# "Rewrite of inst 0 failed Can't allocate source for Inst 4 src_type=1 new_index=1 new_mask=1"
|
# "Rewrite of inst 0 failed Can't allocate source for Inst 4 src_type=1 new_index=1 new_mask=1"
|
||||||
dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_component_read_fragment,Fail
|
dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_component_read_fragment,Fail
|
||||||
dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_direct_read_fragment,Fail
|
dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_direct_read_fragment,Fail
|
||||||
|
|
|
@ -1943,7 +1943,13 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
|
||||||
vs->state = *shader;
|
vs->state = *shader;
|
||||||
|
|
||||||
if (vs->state.type == PIPE_SHADER_IR_NIR) {
|
if (vs->state.type == PIPE_SHADER_IR_NIR) {
|
||||||
vs->state.tokens = nir_to_tgsi(shader->ir.nir, pipe->screen);
|
static const struct nir_to_tgsi_options swtcl_options = {0};
|
||||||
|
static const struct nir_to_tgsi_options hwtcl_options = {
|
||||||
|
.lower_cmp = true,
|
||||||
|
};
|
||||||
|
vs->state.tokens = nir_to_tgsi_options(shader->ir.nir, pipe->screen,
|
||||||
|
r300->screen->caps.has_tcl ?
|
||||||
|
&hwtcl_options : &swtcl_options);
|
||||||
} else {
|
} else {
|
||||||
assert(vs->state.type == PIPE_SHADER_IR_TGSI);
|
assert(vs->state.type == PIPE_SHADER_IR_TGSI);
|
||||||
/* we need to keep a local copy of the tokens */
|
/* we need to keep a local copy of the tokens */
|
||||||
|
|
Loading…
Reference in New Issue