mesa/src/asahi/lib/agx_nir_lower_sample_intrin...

213 lines
7.5 KiB
C

/*
* Copyright 2023 Valve Corporation
* Copyright 2023 Alyssa Rosenzweig
* SPDX-License-Identifier: MIT
*/
#include "agx_tilebuffer.h"
#include "nir.h"
#include "nir_builder.h"
#include "nir_builder_opcodes.h"
#include "nir_intrinsics.h"
static nir_def *
select_if_msaa_else_0(nir_builder *b, nir_def *x)
{
/* Sample count > 1 <==> log2(Sample count) > 0 */
nir_def *msaa = nir_ugt_imm(b, nir_load_samples_log2_agx(b), 0);
return nir_bcsel(b, msaa, x, nir_imm_intN_t(b, 0, x->bit_size));
}
static bool
lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
b->cursor = nir_before_instr(&intr->instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_sample_pos:
case nir_intrinsic_load_sample_pos_or_center: {
/* Handle the center special case */
if (!b->shader->info.fs.uses_sample_shading) {
assert(intr->intrinsic == nir_intrinsic_load_sample_pos_or_center);
nir_def_rewrite_uses(&intr->def, nir_imm_vec2(b, 0.5, 0.5));
nir_instr_remove(&intr->instr);
return true;
}
/* Lower sample positions to decode the packed fixed-point register:
*
* uint32_t packed = load_sample_positions();
* uint32_t shifted = packed >> (sample_id * 8);
*
* for (i = 0; i < 2; ++i) {
* uint8_t nibble = (shifted >> (i * 4)) & 0xF;
* xy[component] = ((float)nibble) / 16.0;
* }
*/
nir_def *packed = nir_load_sample_positions_agx(b);
/* The n'th sample is the in the n'th byte of the register */
nir_def *shifted = nir_ushr(
b, packed, nir_u2u32(b, nir_imul_imm(b, nir_load_sample_id(b), 8)));
nir_def *xy[2];
for (unsigned i = 0; i < 2; ++i) {
/* Get the appropriate nibble */
nir_def *nibble =
nir_iand_imm(b, nir_ushr_imm(b, shifted, i * 4), 0xF);
/* Convert it from fixed point to float */
xy[i] = nir_fmul_imm(b, nir_u2f16(b, nibble), 1.0 / 16.0);
/* Upconvert if necessary */
xy[i] = nir_f2fN(b, xy[i], intr->def.bit_size);
}
/* Collect and rewrite */
nir_def_rewrite_uses(&intr->def, nir_vec2(b, xy[0], xy[1]));
nir_instr_remove(&intr->instr);
return true;
}
case nir_intrinsic_load_sample_mask_in: {
/* Apply API sample mask to sample mask inputs, lowering:
*
* sample_mask_in --> sample_mask_in & api_sample_mask
*
* Furthermore in OpenGL, gl_SampleMaskIn is only supposed to have the
* single bit set of the sample currently being shaded when sample shading
* is used. Mask by the sample ID to make that happen.
*/
b->cursor = nir_after_instr(&intr->instr);
nir_def *old = &intr->def;
nir_def *lowered = nir_iand(
b, old, nir_u2uN(b, nir_load_api_sample_mask_agx(b), old->bit_size));
if (b->shader->info.fs.uses_sample_shading) {
nir_def *bit = nir_load_active_samples_agx(b);
lowered = nir_iand(b, lowered, nir_u2uN(b, bit, old->bit_size));
}
nir_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
return true;
}
case nir_intrinsic_load_helper_invocation: {
/* When sample shading is enabled, we may execute helper invocations for
* samples that are not covered. Mask so that load_helper_invocation
* returns the right thing. By extension, this ensures we don't execute
* stores for non-covered samples.
*/
if (!b->shader->info.fs.uses_sample_shading)
return false;
b->cursor = nir_instr_remove(&intr->instr);
nir_def *active = nir_load_active_samples_agx(b);
nir_def *mask = nir_u2uN(b, nir_load_sample_mask(b), active->bit_size);
nir_def *def = nir_ieq_imm(b, nir_iand(b, mask, active), 0);
nir_def_rewrite_uses(&intr->def, def);
return true;
}
case nir_intrinsic_load_barycentric_sample: {
/* Lower fragment varyings with "sample" interpolation to
* interpolateAtSample() with the sample ID. If multisampling is disabled,
* the sample ID is 0, so we don't need to mask unlike for
* load_barycentric_at_sample.
*/
b->cursor = nir_after_instr(&intr->instr);
nir_def *old = &intr->def;
nir_def *lowered = nir_load_barycentric_at_sample(
b, intr->def.bit_size, nir_load_sample_id(b),
.interp_mode = nir_intrinsic_interp_mode(intr));
nir_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
return true;
}
case nir_intrinsic_load_barycentric_at_sample: {
/*
* In OpenGL, interpolateAtSample interpolates at the centre when
* multisampling is disabled. Furthermore, results are undefined when
* multisampling is enabled but the sample ID is out-of-bounds.
*
* To handle the former case, we force the sample ID to 0 when
* multisampling is disabled. To optimize the latter case, we force the
* sample ID to 0 when the requested sample is definitively out-of-bounds.
*/
b->cursor = nir_before_instr(&intr->instr);
nir_src *src = &intr->src[0];
nir_def *sample = src->ssa;
if (nir_src_is_const(*src) && nir_src_as_uint(*src) >= 4) {
sample = nir_imm_int(b, 0);
} else {
sample = select_if_msaa_else_0(b, sample);
}
nir_src_rewrite(src, sample);
return true;
}
case nir_intrinsic_store_output: {
/*
* In OpenGL, sample mask writes are ignored unless multisampling is used.
* This is not the case in Vulkan, disambiguated by the
* ignore_sample_mask_without_msaa flag.
*
* If it is used, the Vulkan spec says:
*
* If sample shading is enabled, bits written to SampleMask
* corresponding to samples that are not being shaded by the fragment
* shader invocation are ignored.
*
* That will be satisfied by outputting gl_SampleMask for the whole pixel
* and then lowering sample shading after (splitting up discard targets).
*/
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
if (sem.location != FRAG_RESULT_SAMPLE_MASK)
return false;
nir_def *mask = nir_inot(b, nir_u2u16(b, intr->src[0].ssa));
bool *ignore_sample_mask_without_msaa = data;
if (*ignore_sample_mask_without_msaa)
mask = select_if_msaa_else_0(b, mask);
nir_discard_agx(b, mask);
nir_instr_remove(&intr->instr);
b->shader->info.fs.uses_discard = true;
return true;
}
default:
return false;
}
}
/*
* In a fragment shader using sample shading, lower intrinsics like
* load_sample_position to variants in terms of load_sample_id. Except for a
* possible API bit to force sample shading in shaders that don't otherwise need
* it, this pass does not depend on the shader key. In particular, it does not
* depend on the sample count. So it runs on fragment shaders at compile-time.
* The load_sample_id intrinsics themselves are lowered later, with different
* lowerings for monolithic vs epilogs.
*
* Note that fragment I/O (like store_local_pixel_agx and discard_agx) does not
* get lowered here, because that lowering is different for monolithic vs FS
* epilogs even though there's no dependency on sample count.
*/
bool
agx_nir_lower_sample_intrinsics(nir_shader *shader,
bool ignore_sample_mask_without_msaa)
{
return nir_shader_intrinsics_pass(
shader, lower, nir_metadata_block_index | nir_metadata_dominance,
&ignore_sample_mask_without_msaa);
}