mirror of https://gitlab.freedesktop.org/mesa/mesa
228 lines
7.6 KiB
C
228 lines
7.6 KiB
C
/*
|
|
* Copyright 2023 Alyssa Rosenzweig
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "compiler/glsl/list.h"
|
|
#include "compiler/nir/nir_builder.h"
|
|
#include "agx_compiler.h"
|
|
#include "nir.h"
|
|
#include "nir_builder_opcodes.h"
|
|
#include "nir_intrinsics.h"
|
|
|
|
/*
|
|
* sample_mask takes two bitmasks as arguments, TARGET and LIVE. Each bit refers
|
|
* to an indexed sample. Roughly, the instruction does:
|
|
*
|
|
* foreach sample in TARGET {
|
|
* if sample in LIVE {
|
|
* run depth/stencil test and update
|
|
* } else {
|
|
* kill sample
|
|
* }
|
|
* }
|
|
*
|
|
* As a special case, TARGET may be set to all-1s (~0) to refer to all samples
|
|
* regardless of the framebuffer sample count.
|
|
*
|
|
* For example, to discard an entire pixel unconditionally, we could run:
|
|
*
|
|
* sample_mask ~0, 0
|
|
*
|
|
* sample_mask must follow these rules:
|
|
*
|
|
* 1. All sample_mask instructions affecting a sample must execute before a
|
|
* local_store_pixel instruction targeting that sample. This ensures that
|
|
* nothing is written for discarded samples (whether discarded in shader or
|
|
* due to a failed depth/stencil test).
|
|
*
|
|
* 2. If sample_mask is used anywhere in a shader, then on every execution path,
|
|
* every sample must be killed or else run depth/stencil tests exactly ONCE.
|
|
*
|
|
* 3. If a sample is killed, future sample_mask instructions have
|
|
* no effect on that sample. The following code sequence correctly implements
|
|
* a conditional discard (if there are no other sample_mask instructions in
|
|
* the shader):
|
|
*
|
|
* sample_mask discarded, 0
|
|
* sample_mask ~0, ~0
|
|
*
|
|
* but this sequence is incorrect:
|
|
*
|
|
* sample_mask ~0, ~discarded
|
|
* sample_mask ~0, ~0 <-- incorrect: depth/stencil tests run twice
|
|
*
|
|
* 4. If zs_emit is used anywhere in the shader, sample_mask must not be used.
|
|
* Instead, zs_emit with depth = NaN can be emitted.
|
|
*
|
|
* This pass lowers discard_agx to sample_mask instructions satisfying these
|
|
* rules. Other passes should not generate sample_mask instructions, as there
|
|
* are too many footguns.
|
|
*/
|
|
|
|
#define ALL_SAMPLES (0xFF)
|
|
#define BASE_Z 1
|
|
#define BASE_S 2
|
|
|
|
static bool
|
|
lower_sample_mask_to_zs(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|
{
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
bool depth_written =
|
|
b->shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
|
|
bool stencil_written =
|
|
b->shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
|
|
|
|
b->cursor = nir_before_instr(instr);
|
|
|
|
/* Existing zs_emit instructions need to be fixed up to write their own depth
|
|
* for consistency.
|
|
*/
|
|
if (intr->intrinsic == nir_intrinsic_store_zs_agx && !depth_written) {
|
|
/* Load the current depth at this pixel */
|
|
nir_ssa_def *z = nir_load_frag_coord_zw(b, .component = 2);
|
|
|
|
/* Write it out from this store_zs */
|
|
nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) | BASE_Z);
|
|
nir_instr_rewrite_src_ssa(instr, &intr->src[1], z);
|
|
|
|
/* We'll set outputs_written after the pass in case there are multiple
|
|
* store_zs_agx instructions needing fixup.
|
|
*/
|
|
b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
|
|
return true;
|
|
}
|
|
|
|
if (intr->intrinsic != nir_intrinsic_discard_agx)
|
|
return false;
|
|
|
|
/* Write a NaN depth value for discarded samples */
|
|
nir_store_zs_agx(b, intr->src[0].ssa, nir_imm_float(b, NAN),
|
|
stencil_written ? nir_imm_intN_t(b, 0, 16)
|
|
: nir_ssa_undef(b, 1, 16) /* stencil */,
|
|
.base = BASE_Z | (stencil_written ? BASE_S : 0));
|
|
|
|
nir_instr_remove(instr);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_discard_to_sample_mask_0(nir_builder *b, nir_instr *instr,
|
|
UNUSED void *data)
|
|
{
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
if (intr->intrinsic != nir_intrinsic_discard_agx)
|
|
return false;
|
|
|
|
b->cursor = nir_before_instr(instr);
|
|
nir_sample_mask_agx(b, intr->src[0].ssa, nir_imm_intN_t(b, 0, 16));
|
|
nir_instr_remove(instr);
|
|
return true;
|
|
}
|
|
|
|
static nir_intrinsic_instr *
|
|
last_discard_in_block(nir_block *block)
|
|
{
|
|
nir_foreach_instr_reverse(instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
if (intr->intrinsic == nir_intrinsic_discard_agx)
|
|
return intr;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static bool
|
|
cf_node_contains_discard(nir_cf_node *node)
|
|
{
|
|
nir_foreach_block_in_cf_node(block, node) {
|
|
if (last_discard_in_block(block))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
agx_nir_lower_sample_mask(nir_shader *shader, unsigned nr_samples)
|
|
{
|
|
if (!shader->info.fs.uses_discard)
|
|
return false;
|
|
|
|
/* sample_mask can't be used with zs_emit, so lower sample_mask to zs_emit */
|
|
if (shader->info.outputs_written & (BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
|
|
BITFIELD64_BIT(FRAG_RESULT_STENCIL))) {
|
|
bool progress = nir_shader_instructions_pass(
|
|
shader, lower_sample_mask_to_zs,
|
|
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
|
|
|
/* The lowering requires an unconditional depth write. We mark this after
|
|
* lowering so the lowering knows whether there was already a depth write
|
|
*/
|
|
assert(progress && "must have lowered something,given the outputs");
|
|
shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_DEPTH);
|
|
|
|
return true;
|
|
}
|
|
|
|
/* We want to run depth/stencil tests as early as possible, but we have to
|
|
* wait until after the last discard. We find the last discard and
|
|
* execute depth/stencil tests in the first unconditional block after (if in
|
|
* conditional control flow), or fuse depth/stencil tests into the sample
|
|
* instruction (if in unconditional control flow).
|
|
*
|
|
* To do so, we walk the root control flow list backwards, looking for the
|
|
* earliest unconditionally executed instruction after all discard.
|
|
*/
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
nir_builder b = nir_builder_create(impl);
|
|
foreach_list_typed_reverse(nir_cf_node, node, node, &impl->body) {
|
|
if (node->type == nir_cf_node_block) {
|
|
/* Unconditionally executed block */
|
|
nir_block *block = nir_cf_node_as_block(node);
|
|
nir_intrinsic_instr *intr = last_discard_in_block(block);
|
|
|
|
if (intr) {
|
|
/* Last discard is executed unconditionally, so fuse tests. */
|
|
b.cursor = nir_before_instr(&intr->instr);
|
|
|
|
nir_ssa_def *all_samples = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
|
|
nir_ssa_def *killed = intr->src[0].ssa;
|
|
nir_ssa_def *live = nir_ixor(&b, killed, all_samples);
|
|
|
|
nir_sample_mask_agx(&b, all_samples, live);
|
|
nir_instr_remove(&intr->instr);
|
|
break;
|
|
} else {
|
|
/* Set cursor for insertion due to a preceding conditionally
|
|
* executed discard.
|
|
*/
|
|
b.cursor = nir_before_block_after_phis(block);
|
|
}
|
|
} else if (cf_node_contains_discard(node)) {
|
|
/* Conditionally executed block contains the last discard. Test
|
|
* depth/stencil for remaining samples in unconditional code after.
|
|
*/
|
|
nir_sample_mask_agx(&b, nir_imm_intN_t(&b, ALL_SAMPLES, 16),
|
|
nir_imm_intN_t(&b, ALL_SAMPLES, 16));
|
|
break;
|
|
}
|
|
}
|
|
|
|
nir_shader_instructions_pass(
|
|
shader, lower_discard_to_sample_mask_0,
|
|
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
|
|
|
return true;
|
|
}
|