mirror of https://gitlab.freedesktop.org/mesa/mesa
600 lines
19 KiB
C
600 lines
19 KiB
C
/*
|
|
* Copyright 2024 Alyssa Rosenzweig
|
|
* Copyright 2024 Valve Corporation
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "gallium/include/pipe/p_defines.h"
|
|
#include "agx_linker.h"
|
|
#include "agx_nir_lower_gs.h"
|
|
#include "agx_nir_lower_vbo.h"
|
|
#include "agx_nir_passes.h"
|
|
#include "agx_pack.h"
|
|
#include "agx_tilebuffer.h"
|
|
#include "nir.h"
|
|
#include "nir_builder.h"
|
|
#include "nir_builder_opcodes.h"
|
|
#include "nir_lower_blend.h"
|
|
#include "shader_enums.h"
|
|
|
|
/*
|
|
* Insert code into a fragment shader to lower polygon stipple. The stipple is
|
|
* passed in a sideband, rather than requiring a texture binding. This is
|
|
* simpler for drivers to integrate and might be more efficient.
|
|
*/
|
|
static bool
|
|
agx_nir_lower_poly_stipple(nir_shader *s)
|
|
{
|
|
assert(s->info.stage == MESA_SHADER_FRAGMENT);
|
|
|
|
/* Insert at the beginning for performance. */
|
|
nir_builder b_ =
|
|
nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
|
|
nir_builder *b = &b_;
|
|
|
|
/* The stipple coordinate is defined at the window coordinate mod 32. It's
|
|
* reversed along the X-axis to simplify the driver, hence the NOT.
|
|
*/
|
|
nir_def *raw = nir_u2u32(b, nir_load_pixel_coord(b));
|
|
nir_def *coord = nir_umod_imm(
|
|
b,
|
|
nir_vec2(b, nir_inot(b, nir_channel(b, raw, 0)), nir_channel(b, raw, 1)),
|
|
32);
|
|
|
|
/* Extract the column from the packed bitfield */
|
|
nir_def *pattern = nir_load_polygon_stipple_agx(b, nir_channel(b, coord, 1));
|
|
nir_def *bit = nir_ubitfield_extract(b, pattern, nir_channel(b, coord, 0),
|
|
nir_imm_int(b, 1));
|
|
|
|
/* Discard fragments where the pattern is 0 */
|
|
nir_demote_if(b, nir_ieq_imm(b, bit, 0));
|
|
s->info.fs.uses_discard = true;
|
|
|
|
nir_metadata_preserve(b->impl,
|
|
nir_metadata_dominance | nir_metadata_block_index);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_vbo(nir_shader *s, const struct agx_velem_key *key)
|
|
{
|
|
struct agx_attribute out[AGX_MAX_VBUFS];
|
|
|
|
for (unsigned i = 0; i < AGX_MAX_VBUFS; ++i) {
|
|
out[i] = (struct agx_attribute){
|
|
.divisor = key[i].divisor,
|
|
.stride = key[i].stride,
|
|
.format = key[i].format,
|
|
.instanced = key[i].instanced,
|
|
};
|
|
}
|
|
|
|
return agx_nir_lower_vbo(s, out);
|
|
}
|
|
|
|
static int
|
|
map_vs_part_uniform(nir_intrinsic_instr *intr, unsigned nr_attribs)
|
|
{
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_load_vbo_base_agx:
|
|
return 4 * nir_src_as_uint(intr->src[0]);
|
|
case nir_intrinsic_load_attrib_clamp_agx:
|
|
return (4 * nr_attribs) + (2 * nir_src_as_uint(intr->src[0]));
|
|
case nir_intrinsic_load_first_vertex:
|
|
return (6 * nr_attribs);
|
|
case nir_intrinsic_load_base_instance:
|
|
return (6 * nr_attribs) + 2;
|
|
case nir_intrinsic_load_input_assembly_buffer_agx:
|
|
return (6 * nr_attribs) + 8;
|
|
default:
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
static int
|
|
map_fs_part_uniform(nir_intrinsic_instr *intr)
|
|
{
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_load_blend_const_color_r_float:
|
|
return 4;
|
|
case nir_intrinsic_load_blend_const_color_g_float:
|
|
return 6;
|
|
case nir_intrinsic_load_blend_const_color_b_float:
|
|
return 8;
|
|
case nir_intrinsic_load_blend_const_color_a_float:
|
|
return 10;
|
|
default:
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
lower_non_monolithic_uniforms(nir_builder *b, nir_intrinsic_instr *intr,
|
|
void *data)
|
|
{
|
|
int unif;
|
|
if (b->shader->info.stage == MESA_SHADER_VERTEX) {
|
|
unsigned *nr_attribs = data;
|
|
unif = map_vs_part_uniform(intr, *nr_attribs);
|
|
} else {
|
|
unif = map_fs_part_uniform(intr);
|
|
}
|
|
|
|
if (unif >= 0) {
|
|
b->cursor = nir_instr_remove(&intr->instr);
|
|
nir_def *load = nir_load_preamble(b, 1, intr->def.bit_size, .base = unif);
|
|
nir_def_rewrite_uses(&intr->def, load);
|
|
return true;
|
|
} else if (intr->intrinsic == nir_intrinsic_load_texture_handle_agx) {
|
|
b->cursor = nir_instr_remove(&intr->instr);
|
|
nir_def *offs =
|
|
nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_LENGTH);
|
|
nir_def_rewrite_uses(&intr->def, nir_vec2(b, nir_imm_int(b, 0), offs));
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
void
|
|
agx_nir_vs_prolog(nir_builder *b, const void *key_)
|
|
{
|
|
const struct agx_vs_prolog_key *key = key_;
|
|
b->shader->info.stage = MESA_SHADER_VERTEX;
|
|
b->shader->info.name = "VS prolog";
|
|
|
|
/* First, construct a passthrough shader reading each attribute and exporting
|
|
* the value. We also need to export vertex/instance ID in their usual regs.
|
|
*/
|
|
unsigned i = 0;
|
|
nir_def *vec = NULL;
|
|
unsigned vec_idx = ~0;
|
|
BITSET_FOREACH_SET(i, key->component_mask, AGX_MAX_ATTRIBS * 4) {
|
|
unsigned a = i / 4;
|
|
unsigned c = i % 4;
|
|
|
|
if (vec_idx != a) {
|
|
vec = nir_load_input(b, 4, 32, nir_imm_int(b, 0), .base = a);
|
|
vec_idx = a;
|
|
}
|
|
|
|
/* ABI: attributes passed starting at r8 */
|
|
nir_export_agx(b, nir_channel(b, vec, c), .base = 2 * (8 + i));
|
|
}
|
|
|
|
nir_export_agx(b, nir_load_vertex_id(b), .base = 5 * 2);
|
|
nir_export_agx(b, nir_load_instance_id(b), .base = 6 * 2);
|
|
|
|
/* Now lower the resulting program using the key */
|
|
lower_vbo(b->shader, key->attribs);
|
|
|
|
if (!key->hw) {
|
|
agx_nir_lower_index_buffer(b->shader, key->sw_index_size_B, false);
|
|
agx_nir_lower_sw_vs_id(b->shader);
|
|
}
|
|
|
|
/* Finally, lower uniforms according to our ABI */
|
|
unsigned nr = DIV_ROUND_UP(BITSET_LAST_BIT(key->component_mask), 4);
|
|
nir_shader_intrinsics_pass(b->shader, lower_non_monolithic_uniforms,
|
|
nir_metadata_dominance | nir_metadata_block_index,
|
|
&nr);
|
|
b->shader->info.io_lowered = true;
|
|
}
|
|
|
|
static bool
|
|
lower_input_to_prolog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|
{
|
|
if (intr->intrinsic != nir_intrinsic_load_input)
|
|
return false;
|
|
|
|
unsigned idx = nir_src_as_uint(intr->src[0]) + nir_intrinsic_base(intr);
|
|
unsigned comp = nir_intrinsic_component(intr);
|
|
|
|
assert(intr->def.bit_size == 32 && "todo: push conversions up?");
|
|
unsigned base = 4 * idx + comp;
|
|
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
nir_def *val = nir_load_exported_agx(
|
|
b, intr->def.num_components, intr->def.bit_size, .base = 16 + 2 * base);
|
|
|
|
BITSET_WORD *comps_read = data;
|
|
nir_component_mask_t mask = nir_def_components_read(&intr->def);
|
|
|
|
u_foreach_bit(c, mask) {
|
|
BITSET_SET(comps_read, base + c);
|
|
}
|
|
|
|
nir_def_rewrite_uses(&intr->def, val);
|
|
nir_instr_remove(&intr->instr);
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
agx_nir_lower_vs_input_to_prolog(nir_shader *s,
|
|
BITSET_WORD *attrib_components_read)
|
|
{
|
|
return nir_shader_intrinsics_pass(
|
|
s, lower_input_to_prolog,
|
|
nir_metadata_dominance | nir_metadata_block_index,
|
|
attrib_components_read);
|
|
}
|
|
|
|
static bool
|
|
lower_active_samples_to_register(nir_builder *b, nir_intrinsic_instr *intr,
|
|
void *data)
|
|
{
|
|
if (intr->intrinsic != nir_intrinsic_load_active_samples_agx)
|
|
return false;
|
|
|
|
b->cursor = nir_instr_remove(&intr->instr);
|
|
|
|
/* ABI: r0h contains the active sample mask */
|
|
nir_def *id = nir_load_exported_agx(b, 1, 16, .base = 1);
|
|
nir_def_rewrite_uses(&intr->def, id);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_tests_zs_intr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|
{
|
|
bool *value = data;
|
|
if (intr->intrinsic != nir_intrinsic_load_shader_part_tests_zs_agx)
|
|
return false;
|
|
|
|
b->cursor = nir_instr_remove(&intr->instr);
|
|
nir_def_rewrite_uses(&intr->def, nir_imm_intN_t(b, *value ? 0xFF : 0, 16));
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_tests_zs(nir_shader *s, bool value)
|
|
{
|
|
if (!s->info.fs.uses_discard)
|
|
return false;
|
|
|
|
return nir_shader_intrinsics_pass(
|
|
s, lower_tests_zs_intr, nir_metadata_dominance | nir_metadata_block_index,
|
|
&value);
|
|
}
|
|
|
|
static inline bool
|
|
blend_uses_2src(nir_lower_blend_rt rt)
|
|
{
|
|
enum pipe_blendfactor factors[] = {
|
|
rt.rgb.src_factor,
|
|
rt.rgb.dst_factor,
|
|
rt.alpha.src_factor,
|
|
rt.alpha.dst_factor,
|
|
};
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(factors); ++i) {
|
|
switch (factors[i]) {
|
|
case PIPE_BLENDFACTOR_SRC1_COLOR:
|
|
case PIPE_BLENDFACTOR_SRC1_ALPHA:
|
|
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
|
|
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void
|
|
agx_nir_fs_epilog(nir_builder *b, const void *key_)
|
|
{
|
|
const struct agx_fs_epilog_key *key = key_;
|
|
b->shader->info.stage = MESA_SHADER_FRAGMENT;
|
|
b->shader->info.name = "FS epilog";
|
|
|
|
/* First, construct a passthrough shader reading each colour and outputting
|
|
* the value.
|
|
*/
|
|
u_foreach_bit(rt, key->link.rt_written) {
|
|
bool dual_src = (rt == 1) && blend_uses_2src(key->blend.rt[0]);
|
|
unsigned read_rt = (key->link.broadcast_rt0 && !dual_src) ? 0 : rt;
|
|
unsigned size = (key->link.size_32 & BITFIELD_BIT(read_rt)) ? 32 : 16;
|
|
|
|
nir_def *value =
|
|
nir_load_exported_agx(b, 4, size, .base = 2 * (4 + (4 * read_rt)));
|
|
|
|
if (key->link.rt0_w_1 && read_rt == 0) {
|
|
value =
|
|
nir_vector_insert_imm(b, value, nir_imm_floatN_t(b, 1.0, size), 3);
|
|
}
|
|
|
|
nir_store_output(
|
|
b, value, nir_imm_int(b, 0),
|
|
.io_semantics.location = FRAG_RESULT_DATA0 + (dual_src ? 0 : rt),
|
|
.io_semantics.dual_source_blend_index = dual_src,
|
|
.src_type = nir_type_float | size);
|
|
}
|
|
|
|
/* Grab the sample ID early, this has to happen in the first block. */
|
|
nir_def *sample_id = NULL;
|
|
if (key->link.sample_shading) {
|
|
sample_id = nir_load_exported_agx(b, 1, 16, .base = 1);
|
|
}
|
|
|
|
/* Now lower the resulting program using the key */
|
|
struct agx_tilebuffer_layout tib = agx_build_tilebuffer_layout(
|
|
key->rt_formats, ARRAY_SIZE(key->rt_formats), key->nr_samples, true);
|
|
|
|
if (key->force_small_tile)
|
|
tib.tile_size = (struct agx_tile_size){16, 16};
|
|
|
|
bool force_translucent = false;
|
|
nir_lower_blend_options opts = {
|
|
.scalar_blend_const = true,
|
|
.logicop_enable = key->blend.logicop_func != PIPE_LOGICOP_COPY,
|
|
.logicop_func = key->blend.logicop_func,
|
|
};
|
|
|
|
static_assert(ARRAY_SIZE(opts.format) == 8, "max RTs out of sync");
|
|
memcpy(opts.rt, key->blend.rt, sizeof(opts.rt));
|
|
|
|
for (unsigned i = 0; i < 8; ++i) {
|
|
opts.format[i] = key->rt_formats[i];
|
|
}
|
|
|
|
/* It's more efficient to use masked stores (with
|
|
* agx_nir_lower_tilebuffer) than to emulate colour masking with
|
|
* nir_lower_blend.
|
|
*/
|
|
uint8_t colormasks[8] = {0};
|
|
|
|
for (unsigned i = 0; i < 8; ++i) {
|
|
if (key->rt_formats[i] == PIPE_FORMAT_NONE)
|
|
continue;
|
|
|
|
/* TODO: Flakes some dEQPs, seems to invoke UB. Revisit later.
|
|
* dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.77
|
|
* dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.98
|
|
*/
|
|
if (0 /* agx_tilebuffer_supports_mask(&tib, i) */) {
|
|
colormasks[i] = key->blend.rt[i].colormask;
|
|
opts.rt[i].colormask = (uint8_t)BITFIELD_MASK(4);
|
|
} else {
|
|
colormasks[i] = (uint8_t)BITFIELD_MASK(4);
|
|
}
|
|
|
|
/* If not all bound RTs are fully written to, we need to force
|
|
* translucent pass type. agx_nir_lower_tilebuffer will take
|
|
* care of this for its own colormasks input.
|
|
*/
|
|
unsigned comps = util_format_get_nr_components(key->rt_formats[i]);
|
|
if ((opts.rt[i].colormask & BITFIELD_MASK(comps)) !=
|
|
BITFIELD_MASK(comps)) {
|
|
force_translucent = true;
|
|
}
|
|
}
|
|
|
|
/* Alpha-to-coverage must be lowered before alpha-to-one */
|
|
if (key->blend.alpha_to_coverage)
|
|
NIR_PASS(_, b->shader, agx_nir_lower_alpha_to_coverage, tib.nr_samples);
|
|
|
|
/* Depth/stencil writes must be deferred until after all discards,
|
|
* particularly alpha-to-coverage.
|
|
*/
|
|
if (key->link.write_z || key->link.write_s) {
|
|
nir_store_zs_agx(
|
|
b, nir_imm_intN_t(b, 0xFF, 16),
|
|
nir_load_exported_agx(b, 1, 32, .base = 4),
|
|
nir_load_exported_agx(b, 1, 16, .base = 6),
|
|
.base = (key->link.write_z ? 1 : 0) | (key->link.write_s ? 2 : 0));
|
|
|
|
if (key->link.write_z)
|
|
b->shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_DEPTH);
|
|
|
|
if (key->link.write_s)
|
|
b->shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_STENCIL);
|
|
}
|
|
|
|
/* Alpha-to-one must be lowered before blending */
|
|
if (key->blend.alpha_to_one)
|
|
NIR_PASS(_, b->shader, agx_nir_lower_alpha_to_one);
|
|
|
|
NIR_PASS(_, b->shader, nir_lower_blend, &opts);
|
|
|
|
unsigned rt_spill = key->link.rt_spill_base;
|
|
NIR_PASS(_, b->shader, agx_nir_lower_tilebuffer, &tib, colormasks, &rt_spill,
|
|
&force_translucent);
|
|
NIR_PASS(_, b->shader, agx_nir_lower_texture);
|
|
NIR_PASS(_, b->shader, agx_nir_lower_multisampled_image_store);
|
|
|
|
/* If the API shader runs once per sample, then the epilog runs once per
|
|
* sample as well, so we need to lower our code to run for a single sample.
|
|
*
|
|
* If the API shader runs once per pixel, then the epilog runs once per
|
|
* pixel. So we run through the monolithic MSAA lowering, which wraps the
|
|
* epilog in the sample loop if needed. This localizes sample shading
|
|
* to the epilog, when sample shading is not used but blending is.
|
|
*/
|
|
if (key->link.sample_shading) {
|
|
NIR_PASS(_, b->shader, agx_nir_lower_to_per_sample);
|
|
NIR_PASS(_, b->shader, agx_nir_lower_fs_active_samples_to_register);
|
|
|
|
/* Ensure the sample ID is preserved in register. We do this late since it
|
|
* has to go in the last block, and the above passes might add control
|
|
* flow when lowering.
|
|
*/
|
|
b->cursor = nir_after_impl(b->impl);
|
|
nir_export_agx(b, sample_id, .base = 1);
|
|
} else {
|
|
NIR_PASS(_, b->shader, agx_nir_lower_monolithic_msaa, key->nr_samples);
|
|
}
|
|
|
|
/* Finally, lower uniforms according to our ABI */
|
|
nir_shader_intrinsics_pass(b->shader, lower_non_monolithic_uniforms,
|
|
nir_metadata_dominance | nir_metadata_block_index,
|
|
NULL);
|
|
|
|
/* There is no shader part after the epilog, so we're always responsible for
|
|
* running our own tests, unless the fragment shader forced early tests.
|
|
*/
|
|
NIR_PASS(_, b->shader, lower_tests_zs, !key->link.already_ran_zs);
|
|
|
|
b->shader->info.io_lowered = true;
|
|
b->shader->info.fs.uses_fbfetch_output |= force_translucent;
|
|
b->shader->info.fs.uses_sample_shading = key->link.sample_shading;
|
|
}
|
|
|
|
static bool
|
|
lower_output_to_epilog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|
{
|
|
struct agx_fs_epilog_link_info *info = data;
|
|
if (intr->intrinsic == nir_intrinsic_store_zs_agx) {
|
|
assert(nir_src_as_uint(intr->src[0]) == 0xff && "msaa not yet lowered");
|
|
b->cursor = nir_instr_remove(&intr->instr);
|
|
|
|
unsigned base = nir_intrinsic_base(intr);
|
|
info->write_z = !!(base & 1);
|
|
info->write_s = !!(base & 2);
|
|
|
|
/* ABI: r2 contains the written depth */
|
|
if (info->write_z)
|
|
nir_export_agx(b, intr->src[1].ssa, .base = 4);
|
|
|
|
/* ABI: r3l contains the written stencil */
|
|
if (info->write_s)
|
|
nir_export_agx(b, intr->src[2].ssa, .base = 6);
|
|
|
|
return true;
|
|
}
|
|
|
|
if (intr->intrinsic != nir_intrinsic_store_output)
|
|
return false;
|
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
|
|
|
/* Fix up gl_FragColor */
|
|
if (sem.location == FRAG_RESULT_COLOR) {
|
|
sem.location = FRAG_RESULT_DATA0;
|
|
info->broadcast_rt0 = true;
|
|
info->rt_written = ~0;
|
|
}
|
|
|
|
/* We don't use the epilog for sample mask writes */
|
|
if (sem.location < FRAG_RESULT_DATA0)
|
|
return false;
|
|
|
|
/* Determine the render target index. Dual source blending aliases a second
|
|
* render target, so get that out of the way now.
|
|
*/
|
|
unsigned rt = sem.location - FRAG_RESULT_DATA0;
|
|
rt += nir_src_as_uint(intr->src[1]);
|
|
|
|
if (sem.dual_source_blend_index) {
|
|
assert(rt == 0);
|
|
rt = 1;
|
|
}
|
|
|
|
info->rt_written |= BITFIELD_BIT(rt);
|
|
|
|
b->cursor = nir_instr_remove(&intr->instr);
|
|
nir_def *vec = intr->src[0].ssa;
|
|
|
|
if (vec->bit_size == 32)
|
|
info->size_32 |= BITFIELD_BIT(rt);
|
|
else
|
|
assert(vec->bit_size == 16);
|
|
|
|
uint32_t one_f = (vec->bit_size == 32 ? fui(1.0) : _mesa_float_to_half(1.0));
|
|
unsigned comp = nir_intrinsic_component(intr);
|
|
|
|
u_foreach_bit(c, nir_intrinsic_write_mask(intr)) {
|
|
nir_scalar s = nir_scalar_resolved(vec, c);
|
|
if (rt == 0 && c == 3 && nir_scalar_is_const(s) &&
|
|
nir_scalar_as_uint(s) == one_f) {
|
|
|
|
info->rt0_w_1 = true;
|
|
} else {
|
|
unsigned stride = vec->bit_size / 16;
|
|
|
|
nir_export_agx(b, nir_channel(b, vec, c),
|
|
.base = (2 * (4 + (4 * rt))) + (comp + c) * stride);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
agx_nir_lower_fs_output_to_epilog(nir_shader *s,
|
|
struct agx_fs_epilog_link_info *out)
|
|
{
|
|
nir_shader_intrinsics_pass(s, lower_output_to_epilog,
|
|
nir_metadata_dominance | nir_metadata_block_index,
|
|
out);
|
|
|
|
out->sample_shading = s->info.fs.uses_sample_shading;
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
agx_nir_lower_fs_active_samples_to_register(nir_shader *s)
|
|
{
|
|
return nir_shader_intrinsics_pass(
|
|
s, lower_active_samples_to_register,
|
|
nir_metadata_dominance | nir_metadata_block_index, NULL);
|
|
}
|
|
|
|
static bool
|
|
agx_nir_lower_stats_fs(nir_shader *s)
|
|
{
|
|
assert(s->info.stage == MESA_SHADER_FRAGMENT);
|
|
nir_builder b_ =
|
|
nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
|
|
nir_builder *b = &b_;
|
|
|
|
nir_def *samples = nir_bit_count(b, nir_load_sample_mask_in(b));
|
|
unsigned query = PIPE_STAT_QUERY_PS_INVOCATIONS;
|
|
|
|
nir_def *addr = nir_load_stat_query_address_agx(b, .base = query);
|
|
nir_global_atomic(b, 32, addr, samples, .atomic_op = nir_atomic_op_iadd);
|
|
|
|
nir_metadata_preserve(b->impl,
|
|
nir_metadata_block_index | nir_metadata_dominance);
|
|
return true;
|
|
}
|
|
|
|
void
|
|
agx_nir_fs_prolog(nir_builder *b, const void *key_)
|
|
{
|
|
const struct agx_fs_prolog_key *key = key_;
|
|
b->shader->info.stage = MESA_SHADER_FRAGMENT;
|
|
b->shader->info.name = "FS prolog";
|
|
|
|
/* First, insert code for any emulated features */
|
|
if (key->api_sample_mask != 0xff) {
|
|
/* Kill samples that are NOT covered by the mask */
|
|
nir_discard_agx(b, nir_imm_intN_t(b, key->api_sample_mask ^ 0xff, 16));
|
|
b->shader->info.fs.uses_discard = true;
|
|
}
|
|
|
|
if (key->statistics) {
|
|
NIR_PASS(_, b->shader, agx_nir_lower_stats_fs);
|
|
}
|
|
|
|
if (key->cull_distance_size) {
|
|
NIR_PASS(_, b->shader, agx_nir_lower_cull_distance_fs,
|
|
key->cull_distance_size);
|
|
}
|
|
|
|
if (key->polygon_stipple) {
|
|
NIR_PASS_V(b->shader, agx_nir_lower_poly_stipple);
|
|
}
|
|
|
|
/* Then, lower the prolog */
|
|
NIR_PASS(_, b->shader, agx_nir_lower_discard_zs_emit);
|
|
NIR_PASS(_, b->shader, agx_nir_lower_sample_mask);
|
|
NIR_PASS(_, b->shader, nir_shader_intrinsics_pass,
|
|
lower_non_monolithic_uniforms,
|
|
nir_metadata_dominance | nir_metadata_block_index, NULL);
|
|
NIR_PASS(_, b->shader, lower_tests_zs, key->run_zs_tests);
|
|
|
|
b->shader->info.io_lowered = true;
|
|
}
|