From b126a0c0cb30b1e2f2df1953fe14d8596d1cf4f7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Nov 2010 09:11:17 -0700 Subject: [PATCH] i965: Add support for correct GL_CLAMP behavior by clamping coordinates. This removes the stupid strict-conformance fallback code I broke when adding ARB_sampler_objects. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36572 Reviewed-by: Kenneth Graunke (v1) --- src/mesa/drivers/dri/i965/brw_draw.c | 36 --------------- src/mesa/drivers/dri/i965/brw_fs.cpp | 45 +++++++++++++------ src/mesa/drivers/dri/i965/brw_fs.h | 9 ++-- src/mesa/drivers/dri/i965/brw_state.h | 2 +- src/mesa/drivers/dri/i965/brw_wm.c | 10 +++++ src/mesa/drivers/dri/i965/brw_wm.h | 2 +- src/mesa/drivers/dri/i965/brw_wm_emit.c | 5 +++ .../drivers/dri/i965/brw_wm_sampler_state.c | 38 +++++++++++----- .../drivers/dri/i965/gen7_sampler_state.c | 12 +++-- 9 files changed, 90 insertions(+), 69 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 44ede608b76..bcfd678a924 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -325,42 +325,6 @@ static GLboolean check_fallbacks( struct brw_context *brw, return GL_TRUE; } - /* BRW hardware doesn't handle GL_CLAMP texturing correctly; - * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP - * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and - * we want strict conformance, force the fallback. - * Right now, we only do this for 2D textures. - */ - { - int u; - for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; - - if (texUnit->Enabled) { - struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, u); - - if (texUnit->Enabled & TEXTURE_1D_BIT) { - if (sampler->WrapS == GL_CLAMP) { - return GL_TRUE; - } - } - if (texUnit->Enabled & TEXTURE_2D_BIT) { - if (sampler->WrapS == GL_CLAMP || - sampler->WrapT == GL_CLAMP) { - return GL_TRUE; - } - } - if (texUnit->Enabled & TEXTURE_3D_BIT) { - if (sampler->WrapS == GL_CLAMP || - sampler->WrapT == GL_CLAMP || - sampler->WrapR == GL_CLAMP) { - return GL_TRUE; - } - } - } - } - } - /* Nothing stopping us from the fast path now */ return GL_FALSE; } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 2157c935711..1943ab6021f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1172,7 +1172,8 @@ fs_visitor::visit(ir_assignment *ir) } fs_inst * -fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) +fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, + int sampler) { int mlen; int base_mrf = 1; @@ -1184,7 +1185,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) if (ir->shadow_comparitor) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); + fs_inst *inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, base_mrf + mlen + i), coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; + coordinate.reg_offset++; } /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ @@ -1212,7 +1217,10 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) mlen++; } else if (ir->op == ir_tex) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); + fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), + coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; coordinate.reg_offset++; } /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ @@ -1226,7 +1234,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) assert(ir->op == ir_txb || ir->op == ir_txl); for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), coordinate); + fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, + base_mrf + mlen + i * 2), + coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; coordinate.reg_offset++; } @@ -1298,15 +1310,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) * surprising in the disassembly. */ fs_inst * -fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) +fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, + int sampler) { int mlen = 1; /* g0 header always present. */ int base_mrf = 1; int reg_width = c->dispatch_width / 8; for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width), - coordinate); + fs_inst *inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, base_mrf + mlen + i * reg_width), + coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; coordinate.reg_offset++; } mlen += ir->coordinate->type->vector_elements * reg_width; @@ -1357,7 +1373,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) } fs_inst * -fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate) +fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, + int sampler) { int mlen = 1; /* g0 header always present. */ int base_mrf = 1; @@ -1391,8 +1408,10 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate) /* Set up the coordinate */ for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), - coordinate); + fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), + coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; coordinate.reg_offset++; mlen += reg_width; } @@ -1517,11 +1536,11 @@ fs_visitor::visit(ir_texture *ir) fs_reg dst = fs_reg(this, glsl_type::vec4_type); if (intel->gen >= 7) { - inst = emit_texture_gen7(ir, dst, coordinate); + inst = emit_texture_gen7(ir, dst, coordinate, sampler); } else if (intel->gen >= 5) { - inst = emit_texture_gen5(ir, dst, coordinate); + inst = emit_texture_gen5(ir, dst, coordinate, sampler); } else { - inst = emit_texture_gen4(ir, dst, coordinate); + inst = emit_texture_gen4(ir, dst, coordinate, sampler); } /* If there's an offset, we already set up m1. To avoid the implied move, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 1b37ef5d087..4b355c979eb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -512,9 +512,12 @@ public: fs_reg *emit_general_interpolation(ir_variable *ir); void emit_interpolation_setup_gen4(); void emit_interpolation_setup_gen6(); - fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate); - fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate); - fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate); + fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, + int sampler); + fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, + int sampler); + fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, + int sampler); fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0); fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 11a87320542..3aaa7c6d794 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -192,7 +192,7 @@ GLuint translate_tex_format(gl_format mesa_format, GLenum srgb_decode); /* brw_wm_sampler_state.c */ -GLuint translate_wrap_mode(GLenum wrap); +uint32_t translate_wrap_mode(GLenum wrap, bool using_nearest); void upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, int unit); diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 40589b0d2e4..907976295ab 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -410,6 +410,16 @@ static void brw_wm_populate_key( struct brw_context *brw, swizzles[GET_SWZ(t->_Swizzle, 1)], swizzles[GET_SWZ(t->_Swizzle, 2)], swizzles[GET_SWZ(t->_Swizzle, 3)]); + + if (sampler->MinFilter != GL_NEAREST && + sampler->MagFilter != GL_NEAREST) { + if (sampler->WrapS == GL_CLAMP) + key->gl_clamp_mask[0] |= 1 << i; + if (sampler->WrapT == GL_CLAMP) + key->gl_clamp_mask[1] |= 1 << i; + if (sampler->WrapR == GL_CLAMP) + key->gl_clamp_mask[2] |= 1 << i; + } } else { key->tex_swizzles[i] = SWIZZLE_NOOP; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index a5f99a0a657..8ab531bdf87 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -71,9 +71,9 @@ struct brw_wm_prog_key { GLuint shadowtex_mask:16; GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ + uint16_t gl_clamp_mask[3]; GLushort tex_swizzles[BRW_MAX_TEX_UNIT]; - GLushort drawable_height; GLbitfield64 vp_outputs_written; GLuint iz_lookup; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index fd4cd892f41..f61757a8cac 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1100,11 +1100,16 @@ void emit_tex(struct brw_wm_compile *c, /* Emit the texcoords. */ for (i = 0; i < nr_texcoords; i++) { + if (c->key.gl_clamp_mask[i] & (1 << sampler)) + brw_set_saturate(p, true); + if (emit & (1<Texture.Unit[unit]; struct gl_texture_object *texObj = texUnit->_Current; struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit); + bool using_nearest = false; switch (gl_sampler->MinFilter) { case GL_NEAREST: sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + using_nearest = true; break; case GL_LINEAR: sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; @@ -200,6 +210,7 @@ static void brw_update_sampler_state(struct brw_context *brw, switch (gl_sampler->MagFilter) { case GL_NEAREST: sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + using_nearest = true; break; case GL_LINEAR: sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; @@ -209,9 +220,12 @@ static void brw_update_sampler_state(struct brw_context *brw, } } - sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR); - sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS); - sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT); + sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR, + using_nearest); + sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS, + using_nearest); + sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT, + using_nearest); if (intel->gen >= 6 && sampler->ss0.min_filter != sampler->ss0.mag_filter) diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index 8487a8fa4b0..95f6fbf7414 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -41,11 +41,13 @@ gen7_update_sampler_state(struct brw_context *brw, int unit, struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *texObj = texUnit->_Current; struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit); + bool using_nearest = false; switch (gl_sampler->MinFilter) { case GL_NEAREST: sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + using_nearest = true; break; case GL_LINEAR: sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; @@ -85,6 +87,7 @@ gen7_update_sampler_state(struct brw_context *brw, int unit, switch (gl_sampler->MagFilter) { case GL_NEAREST: sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + using_nearest = true; break; case GL_LINEAR: sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; @@ -94,9 +97,12 @@ gen7_update_sampler_state(struct brw_context *brw, int unit, } } - sampler->ss3.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR); - sampler->ss3.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS); - sampler->ss3.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT); + sampler->ss3.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR, + using_nearest); + sampler->ss3.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS, + using_nearest); + sampler->ss3.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT, + using_nearest); /* Cube-maps on 965 and later must use the same wrap mode for all 3 * coordinate dimensions. Futher, only CUBE and CLAMP are valid.