i965: Add support for correct GL_CLAMP behavior by clamping coordinates.

This removes the stupid strict-conformance fallback code I broke when
adding ARB_sampler_objects.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36572
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)
This commit is contained in:
Eric Anholt 2010-11-02 09:11:17 -07:00
parent 7592f00560
commit b126a0c0cb
9 changed files with 90 additions and 69 deletions

View File

@ -325,42 +325,6 @@ static GLboolean check_fallbacks( struct brw_context *brw,
return GL_TRUE;
}
/* BRW hardware doesn't handle GL_CLAMP texturing correctly;
* brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
* as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and
* we want strict conformance, force the fallback.
* Right now, we only do this for 2D textures.
*/
{
int u;
for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
if (texUnit->Enabled) {
struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, u);
if (texUnit->Enabled & TEXTURE_1D_BIT) {
if (sampler->WrapS == GL_CLAMP) {
return GL_TRUE;
}
}
if (texUnit->Enabled & TEXTURE_2D_BIT) {
if (sampler->WrapS == GL_CLAMP ||
sampler->WrapT == GL_CLAMP) {
return GL_TRUE;
}
}
if (texUnit->Enabled & TEXTURE_3D_BIT) {
if (sampler->WrapS == GL_CLAMP ||
sampler->WrapT == GL_CLAMP ||
sampler->WrapR == GL_CLAMP) {
return GL_TRUE;
}
}
}
}
}
/* Nothing stopping us from the fast path now */
return GL_FALSE;
}

View File

@ -1172,7 +1172,8 @@ fs_visitor::visit(ir_assignment *ir)
}
fs_inst *
fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
int sampler)
{
int mlen;
int base_mrf = 1;
@ -1184,7 +1185,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
if (ir->shadow_comparitor) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
fs_inst *inst = emit(BRW_OPCODE_MOV,
fs_reg(MRF, base_mrf + mlen + i), coordinate);
if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
inst->saturate = true;
coordinate.reg_offset++;
}
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@ -1212,7 +1217,10 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
mlen++;
} else if (ir->op == ir_tex) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
coordinate);
if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
inst->saturate = true;
coordinate.reg_offset++;
}
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@ -1226,7 +1234,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
assert(ir->op == ir_txb || ir->op == ir_txl);
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), coordinate);
fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF,
base_mrf + mlen + i * 2),
coordinate);
if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
inst->saturate = true;
coordinate.reg_offset++;
}
@ -1298,15 +1310,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
* surprising in the disassembly.
*/
fs_inst *
fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
int sampler)
{
int mlen = 1; /* g0 header always present. */
int base_mrf = 1;
int reg_width = c->dispatch_width / 8;
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width),
coordinate);
fs_inst *inst = emit(BRW_OPCODE_MOV,
fs_reg(MRF, base_mrf + mlen + i * reg_width),
coordinate);
if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
inst->saturate = true;
coordinate.reg_offset++;
}
mlen += ir->coordinate->type->vector_elements * reg_width;
@ -1357,7 +1373,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
}
fs_inst *
fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate)
fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
int sampler)
{
int mlen = 1; /* g0 header always present. */
int base_mrf = 1;
@ -1391,8 +1408,10 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate)
/* Set up the coordinate */
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
coordinate);
fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
coordinate);
if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
inst->saturate = true;
coordinate.reg_offset++;
mlen += reg_width;
}
@ -1517,11 +1536,11 @@ fs_visitor::visit(ir_texture *ir)
fs_reg dst = fs_reg(this, glsl_type::vec4_type);
if (intel->gen >= 7) {
inst = emit_texture_gen7(ir, dst, coordinate);
inst = emit_texture_gen7(ir, dst, coordinate, sampler);
} else if (intel->gen >= 5) {
inst = emit_texture_gen5(ir, dst, coordinate);
inst = emit_texture_gen5(ir, dst, coordinate, sampler);
} else {
inst = emit_texture_gen4(ir, dst, coordinate);
inst = emit_texture_gen4(ir, dst, coordinate, sampler);
}
/* If there's an offset, we already set up m1. To avoid the implied move,

View File

@ -512,9 +512,12 @@ public:
fs_reg *emit_general_interpolation(ir_variable *ir);
void emit_interpolation_setup_gen4();
void emit_interpolation_setup_gen6();
fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate);
fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate);
fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate);
fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
int sampler);
fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
int sampler);
fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
int sampler);
fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0);
fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1);
bool try_emit_saturate(ir_expression *ir);

View File

@ -192,7 +192,7 @@ GLuint translate_tex_format(gl_format mesa_format,
GLenum srgb_decode);
/* brw_wm_sampler_state.c */
GLuint translate_wrap_mode(GLenum wrap);
uint32_t translate_wrap_mode(GLenum wrap, bool using_nearest);
void upload_default_color(struct brw_context *brw,
struct gl_sampler_object *sampler,
int unit);

View File

@ -410,6 +410,16 @@ static void brw_wm_populate_key( struct brw_context *brw,
swizzles[GET_SWZ(t->_Swizzle, 1)],
swizzles[GET_SWZ(t->_Swizzle, 2)],
swizzles[GET_SWZ(t->_Swizzle, 3)]);
if (sampler->MinFilter != GL_NEAREST &&
sampler->MagFilter != GL_NEAREST) {
if (sampler->WrapS == GL_CLAMP)
key->gl_clamp_mask[0] |= 1 << i;
if (sampler->WrapT == GL_CLAMP)
key->gl_clamp_mask[1] |= 1 << i;
if (sampler->WrapR == GL_CLAMP)
key->gl_clamp_mask[2] |= 1 << i;
}
}
else {
key->tex_swizzles[i] = SWIZZLE_NOOP;

View File

@ -71,9 +71,9 @@ struct brw_wm_prog_key {
GLuint shadowtex_mask:16;
GLuint yuvtex_mask:16;
GLuint yuvtex_swap_mask:16; /* UV swaped */
uint16_t gl_clamp_mask[3];
GLushort tex_swizzles[BRW_MAX_TEX_UNIT];
GLushort drawable_height;
GLbitfield64 vp_outputs_written;
GLuint iz_lookup;

View File

@ -1100,11 +1100,16 @@ void emit_tex(struct brw_wm_compile *c,
/* Emit the texcoords. */
for (i = 0; i < nr_texcoords; i++) {
if (c->key.gl_clamp_mask[i] & (1 << sampler))
brw_set_saturate(p, true);
if (emit & (1<<i))
brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
else
brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
cur_mrf += mrf_per_channel;
brw_set_saturate(p, false);
}
/* Fill in the shadow comparison reference value. */

View File

@ -44,20 +44,28 @@
/* The brw (and related graphics cores) do not support GL_CLAMP. The
* Intel drivers for "other operating systems" implement GL_CLAMP as
* GL_CLAMP_TO_EDGE, so the same is done here.
*/
GLuint
translate_wrap_mode(GLenum wrap)
uint32_t
translate_wrap_mode(GLenum wrap, bool using_nearest)
{
switch( wrap ) {
case GL_REPEAT:
return BRW_TEXCOORDMODE_WRAP;
case GL_CLAMP:
return BRW_TEXCOORDMODE_CLAMP;
case GL_CLAMP:
/* GL_CLAMP is the weird mode where coordinates are clamped to
* [0.0, 1.0], so linear filtering of coordinates outside of
* [0.0, 1.0] give you half edge texel value and half border
* color. The fragment shader will clamp the coordinates, and
* we set clamp_border here, which gets the result desired. We
* just use clamp(_to_edge) for nearest, because for nearest
* clamping to 1.0 gives border color instead of the desired
* edge texels.
*/
if (using_nearest)
return BRW_TEXCOORDMODE_CLAMP;
else
return BRW_TEXCOORDMODE_CLAMP_BORDER;
case GL_CLAMP_TO_EDGE:
return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
return BRW_TEXCOORDMODE_CLAMP;
case GL_CLAMP_TO_BORDER:
return BRW_TEXCOORDMODE_CLAMP_BORDER;
case GL_MIRRORED_REPEAT:
@ -155,11 +163,13 @@ static void brw_update_sampler_state(struct brw_context *brw,
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
struct gl_texture_object *texObj = texUnit->_Current;
struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
bool using_nearest = false;
switch (gl_sampler->MinFilter) {
case GL_NEAREST:
sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
using_nearest = true;
break;
case GL_LINEAR:
sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@ -200,6 +210,7 @@ static void brw_update_sampler_state(struct brw_context *brw,
switch (gl_sampler->MagFilter) {
case GL_NEAREST:
sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
using_nearest = true;
break;
case GL_LINEAR:
sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
@ -209,9 +220,12 @@ static void brw_update_sampler_state(struct brw_context *brw,
}
}
sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR);
sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS);
sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT);
sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
using_nearest);
sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
using_nearest);
sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
using_nearest);
if (intel->gen >= 6 &&
sampler->ss0.min_filter != sampler->ss0.mag_filter)

View File

@ -41,11 +41,13 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
struct gl_texture_object *texObj = texUnit->_Current;
struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
bool using_nearest = false;
switch (gl_sampler->MinFilter) {
case GL_NEAREST:
sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
using_nearest = true;
break;
case GL_LINEAR:
sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@ -85,6 +87,7 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
switch (gl_sampler->MagFilter) {
case GL_NEAREST:
sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
using_nearest = true;
break;
case GL_LINEAR:
sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
@ -94,9 +97,12 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
}
}
sampler->ss3.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR);
sampler->ss3.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS);
sampler->ss3.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT);
sampler->ss3.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
using_nearest);
sampler->ss3.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
using_nearest);
sampler->ss3.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
using_nearest);
/* Cube-maps on 965 and later must use the same wrap mode for all 3
* coordinate dimensions. Futher, only CUBE and CLAMP are valid.