gallium/draw: properly fix short aalines

The fix we used to have for short smooth lines were incorrect, and
here's the real fix:

For lines shorter than one pixel, we need to clamp the length-wise
coverage to the line-length. That produces results that are consistent
with our approximation for longer lines.

Because we pass (length / 2) + 0.5 to the fragment shader instead of
the unmodified length, we need to spend a few instructions to
reconstruct the original width.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19899>
This commit is contained in:
Erik Faye-Lund 2022-11-21 13:28:00 +01:00 committed by Marge Bot
parent 20964181c1
commit 089e724259
3 changed files with 44 additions and 26 deletions

View File

@ -107,6 +107,7 @@ struct aa_transform_context {
uint64_t tempsUsed; /**< bitmask */
int colorOutput; /**< which output is the primary color */
int maxInput, maxGeneric; /**< max input index found */
int numImm; /**< number of immediate regsters */
int colorTemp, aaTemp; /**< temp registers */
};
@ -147,6 +148,18 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
ctx->emit_declaration(ctx, decl);
}
/**
* TGSI immediate declaration transform callback.
*/
static void
aa_immediate(struct tgsi_transform_context *ctx,
struct tgsi_full_immediate *imm)
{
struct aa_transform_context *aactx = (struct aa_transform_context *)ctx;
ctx->emit_immediate(ctx, imm);
aactx->numImm++;
}
/**
* Find the lowest zero bit, or -1 if bitfield is all ones.
@ -182,6 +195,9 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
/* declare new temp regs */
tgsi_transform_temp_decl(ctx, aactx->aaTemp);
tgsi_transform_temp_decl(ctx, aactx->colorTemp);
/* declare new immediate reg */
tgsi_transform_immediate_decl(ctx, 2.0, -1.0, 0.0, 0.25);
}
@ -215,6 +231,26 @@ aa_transform_epilog(struct tgsi_transform_context *ctx)
inst.Src[1].Register.Negate = true;
ctx->emit_instruction(ctx, &inst);
/* linelength * 2 - 1 */
tgsi_transform_op3_swz_inst(ctx, TGSI_OPCODE_MAD,
TGSI_FILE_TEMPORARY, aactx->aaTemp,
TGSI_WRITEMASK_Y,
TGSI_FILE_INPUT, aactx->maxInput + 1,
TGSI_SWIZZLE_W, false,
TGSI_FILE_IMMEDIATE, aactx->numImm,
TGSI_SWIZZLE_X,
TGSI_FILE_IMMEDIATE, aactx->numImm,
TGSI_SWIZZLE_Y);
/* MIN height alpha */
tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
TGSI_FILE_TEMPORARY, aactx->aaTemp,
TGSI_WRITEMASK_Z,
TGSI_FILE_TEMPORARY, aactx->aaTemp,
TGSI_SWIZZLE_Z,
TGSI_FILE_TEMPORARY, aactx->aaTemp,
TGSI_SWIZZLE_Y, false);
/* MUL width / height alpha */
tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
TGSI_FILE_TEMPORARY, aactx->aaTemp,
@ -292,6 +328,7 @@ generate_aaline_fs(struct aaline_stage *aaline)
transform.base.epilog = aa_transform_epilog;
transform.base.transform_instruction = aa_transform_inst;
transform.base.transform_declaration = aa_transform_decl;
transform.base.transform_immediate = aa_immediate;
aaline_fs.tokens = tgsi_transform_shader(orig_fs->tokens, newLen, &transform.base);
if (!aaline_fs.tokens)
@ -390,29 +427,7 @@ aaline_line(struct draw_stage *stage, struct prim_header *header)
uint i;
half_length = 0.5f * sqrtf(dx * dx + dy * dy);
if (half_length < 0.5f) {
/*
* The logic we use for "normal" sized segments is incorrect
* for very short segments (basically because we only have
* one value to interpolate, not a distance to each endpoint).
* Therefore, we calculate half_length differently, so that for
* original line length (near) 0, we get alpha 0 - otherwise
* max alpha would still be 0.5. This also prevents us from
* artifacts due to degenerated lines (the endpoints being
* identical, which would still receive anywhere from alpha
* 0-0.5 otherwise) (at least the pstipple stage may generate
* such lines due to float inaccuracies if line length is very
* close to a integer).
* Might not be fully accurate neither (because the "strength" of
* the line is going to be determined by how close to the pixel
* center those 1 or 2 fragments are) but it's probably the best
* we can do.
*/
half_length = 2.0f * half_length;
} else {
half_length = half_length + 0.5f;
}
half_length = half_length + 0.5f;
t_w = half_width;
t_l = 0.5f;

View File

@ -174,10 +174,13 @@ nir_lower_aaline_block(nir_block *block,
nir_ssa_def *out_input = intrin->src[1].ssa;
b->cursor = nir_before_instr(instr);
nir_ssa_def *lw = nir_load_var(b, state->line_width_input);
nir_ssa_def *len = nir_channel(b, lw, 3);
len = nir_fadd_imm(b, nir_fmul_imm(b, len, 2.0), -1.0);
nir_ssa_def *tmp = nir_fsat(b, nir_fadd(b, nir_channels(b, lw, 0xa),
nir_fneg(b, nir_fabs(b, nir_channels(b, lw, 0x5)))));
tmp = nir_fmul(b, nir_channel(b, tmp, 0), nir_channel(b, tmp, 1));
tmp = nir_fmul(b, nir_channel(b, tmp, 0),
nir_fmin(b, nir_channel(b, tmp, 1), len));
tmp = nir_fmul(b, nir_channel(b, out_input, 3), tmp);
nir_ssa_def *out = nir_vec4(b, nir_channel(b, out_input, 0),

View File

@ -72,7 +72,7 @@ traces:
checksum: de5452f4cbc0100d8ecb51459e47cd99
bgfx/29-debugdraw.rdc:
gl-vmware-llvmpipe:
checksum: 164e5226af26b6552506542a45bc6bf5
checksum: 015201fe000d6a323b0f7d3f218d3e47
bgfx/31-rsm.rdc:
gl-vmware-llvmpipe:
checksum: b59d323511488d5c098ebfa9b434c2dc
@ -120,7 +120,7 @@ traces:
checksum: a55dd3d87a86b3b47121ff67861028c3
jvgs/jvgs-d27fb67-v2.trace:
gl-vmware-llvmpipe:
checksum: b8c21bf76e667735d1640b215f456531
checksum: 43b89627364b4cabbab84931aef4ce5e
pathfinder/demo-v2.trace:
gl-vmware-llvmpipe:
checksum: a053c56658bc830249bc94317a3b3ea8