i965: Add support for correct GL_CLAMP behavior by clamping coordinates.

This removes the stupid strict-conformance fallback code I broke when adding ARB_sampler_objects. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36572 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)
2010-11-02 09:11:17 -07:00 · 2010-11-02 09:11:17 -07:00 · b126a0c0cb
parent 7592f00560
commit b126a0c0cb
9 changed files with 90 additions and 69 deletions
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@ -325,42 +325,6 @@ static GLboolean check_fallbacks( struct brw_context *brw,
 	    return GL_TRUE;
   }

-   /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
-    * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
-    * as GL_CLAMP_TO_EDGE instead.  If we're using GL_CLAMP, and
-    * we want strict conformance, force the fallback.
-    * Right now, we only do this for 2D textures.
-    */
-   {
-      int u;
-      for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
-         struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
-
-         if (texUnit->Enabled) {
-	    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, u);
-
-            if (texUnit->Enabled & TEXTURE_1D_BIT) {
-               if (sampler->WrapS == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-            if (texUnit->Enabled & TEXTURE_2D_BIT) {
-               if (sampler->WrapS == GL_CLAMP ||
-                   sampler->WrapT == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-            if (texUnit->Enabled & TEXTURE_3D_BIT) {
-               if (sampler->WrapS == GL_CLAMP ||
-                   sampler->WrapT == GL_CLAMP ||
-                   sampler->WrapR == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-         }
-      }
-   }
-      
   /* Nothing stopping us from the fast path now */
   return GL_FALSE;
 }
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@ -1172,7 +1172,8 @@ fs_visitor::visit(ir_assignment *ir)
 }

 fs_inst *
-fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+			      int sampler)
 {
   int mlen;
   int base_mrf = 1;
@ -1184,7 +1185,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)

   if (ir->shadow_comparitor) {
      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
+	 fs_inst *inst = emit(BRW_OPCODE_MOV,
+			      fs_reg(MRF, base_mrf + mlen + i), coordinate);
+	 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+	    inst->saturate = true;
+
 	 coordinate.reg_offset++;
      }
      /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@ -1212,7 +1217,10 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
      mlen++;
   } else if (ir->op == ir_tex) {
      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
+	 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
+			      coordinate);
+	 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+	    inst->saturate = true;
 	 coordinate.reg_offset++;
      }
      /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@ -1226,7 +1234,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
      assert(ir->op == ir_txb || ir->op == ir_txl);

      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), coordinate);
+	 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF,
+						     base_mrf + mlen + i * 2),
+			      coordinate);
+	 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+	    inst->saturate = true;
 	 coordinate.reg_offset++;
      }

@ -1298,15 +1310,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
 * surprising in the disassembly.
 */
 fs_inst *
-fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+			      int sampler)
 {
   int mlen = 1; /* g0 header always present. */
   int base_mrf = 1;
   int reg_width = c->dispatch_width / 8;

   for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width),
-	   coordinate);
+      fs_inst *inst = emit(BRW_OPCODE_MOV,
+			   fs_reg(MRF, base_mrf + mlen + i * reg_width),
+			   coordinate);
+      if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+	 inst->saturate = true;
      coordinate.reg_offset++;
   }
   mlen += ir->coordinate->type->vector_elements * reg_width;
@ -1357,7 +1373,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
 }

 fs_inst *
-fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+			      int sampler)
 {
   int mlen = 1; /* g0 header always present. */
   int base_mrf = 1;
@ -1391,8 +1408,10 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate)

   /* Set up the coordinate */
   for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
-	   coordinate);
+      fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
+			   coordinate);
+      if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+	 inst->saturate = true;
      coordinate.reg_offset++;
      mlen += reg_width;
   }
@ -1517,11 +1536,11 @@ fs_visitor::visit(ir_texture *ir)
   fs_reg dst = fs_reg(this, glsl_type::vec4_type);

   if (intel->gen >= 7) {
-      inst = emit_texture_gen7(ir, dst, coordinate);
+      inst = emit_texture_gen7(ir, dst, coordinate, sampler);
   } else if (intel->gen >= 5) {
-      inst = emit_texture_gen5(ir, dst, coordinate);
+      inst = emit_texture_gen5(ir, dst, coordinate, sampler);
   } else {
-      inst = emit_texture_gen4(ir, dst, coordinate);
+      inst = emit_texture_gen4(ir, dst, coordinate, sampler);
   }

   /* If there's an offset, we already set up m1.  To avoid the implied move,
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@ -512,9 +512,12 @@ public:
   fs_reg *emit_general_interpolation(ir_variable *ir);
   void emit_interpolation_setup_gen4();
   void emit_interpolation_setup_gen6();
-   fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate);
-   fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate);
-   fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate);
+   fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+			      int sampler);
+   fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+			      int sampler);
+   fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+			      int sampler);
   fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0);
   fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1);
   bool try_emit_saturate(ir_expression *ir);
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@ -192,7 +192,7 @@ GLuint translate_tex_format(gl_format mesa_format,
 			    GLenum srgb_decode);

 /* brw_wm_sampler_state.c */
-GLuint translate_wrap_mode(GLenum wrap);
+uint32_t translate_wrap_mode(GLenum wrap, bool using_nearest);
 void upload_default_color(struct brw_context *brw,
 			  struct gl_sampler_object *sampler,
 			  int unit);
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@ -410,6 +410,16 @@ static void brw_wm_populate_key( struct brw_context *brw,
 			  swizzles[GET_SWZ(t->_Swizzle, 1)],
 			  swizzles[GET_SWZ(t->_Swizzle, 2)],
 			  swizzles[GET_SWZ(t->_Swizzle, 3)]);
+
+	 if (sampler->MinFilter != GL_NEAREST &&
+	     sampler->MagFilter != GL_NEAREST) {
+	    if (sampler->WrapS == GL_CLAMP)
+	       key->gl_clamp_mask[0] |= 1 << i;
+	    if (sampler->WrapT == GL_CLAMP)
+	       key->gl_clamp_mask[1] |= 1 << i;
+	    if (sampler->WrapR == GL_CLAMP)
+	       key->gl_clamp_mask[2] |= 1 << i;
+	 }
      }
      else {
         key->tex_swizzles[i] = SWIZZLE_NOOP;
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@ -71,9 +71,9 @@ struct brw_wm_prog_key {
   GLuint shadowtex_mask:16;
   GLuint yuvtex_mask:16;
   GLuint yuvtex_swap_mask:16;	/* UV swaped */
+   uint16_t gl_clamp_mask[3];

   GLushort tex_swizzles[BRW_MAX_TEX_UNIT];
-
   GLushort drawable_height;
   GLbitfield64 vp_outputs_written;
   GLuint iz_lookup;
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@ -1100,11 +1100,16 @@ void emit_tex(struct brw_wm_compile *c,

   /* Emit the texcoords. */
   for (i = 0; i < nr_texcoords; i++) {
+      if (c->key.gl_clamp_mask[i] & (1 << sampler))
+	 brw_set_saturate(p, true);
+
      if (emit & (1<<i))
 	 brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
      else
 	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
      cur_mrf += mrf_per_channel;
+
+      brw_set_saturate(p, false);
   }

   /* Fill in the shadow comparison reference value. */
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@ -44,20 +44,28 @@



-/* The brw (and related graphics cores) do not support GL_CLAMP.  The
- * Intel drivers for "other operating systems" implement GL_CLAMP as
- * GL_CLAMP_TO_EDGE, so the same is done here.
- */
-GLuint
-translate_wrap_mode(GLenum wrap)
+uint32_t
+translate_wrap_mode(GLenum wrap, bool using_nearest)
 {
   switch( wrap ) {
   case GL_REPEAT: 
      return BRW_TEXCOORDMODE_WRAP;
-   case GL_CLAMP:  
-      return BRW_TEXCOORDMODE_CLAMP;
+   case GL_CLAMP:
+      /* GL_CLAMP is the weird mode where coordinates are clamped to
+       * [0.0, 1.0], so linear filtering of coordinates outside of
+       * [0.0, 1.0] give you half edge texel value and half border
+       * color.  The fragment shader will clamp the coordinates, and
+       * we set clamp_border here, which gets the result desired.  We
+       * just use clamp(_to_edge) for nearest, because for nearest
+       * clamping to 1.0 gives border color instead of the desired
+       * edge texels.
+       */
+      if (using_nearest)
+	 return BRW_TEXCOORDMODE_CLAMP;
+      else
+	 return BRW_TEXCOORDMODE_CLAMP_BORDER;
   case GL_CLAMP_TO_EDGE: 
-      return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
+      return BRW_TEXCOORDMODE_CLAMP;
   case GL_CLAMP_TO_BORDER: 
      return BRW_TEXCOORDMODE_CLAMP_BORDER;
   case GL_MIRRORED_REPEAT: 
@ -155,11 +163,13 @@ static void brw_update_sampler_state(struct brw_context *brw,
   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
   struct gl_texture_object *texObj = texUnit->_Current;
   struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
+   bool using_nearest = false;

   switch (gl_sampler->MinFilter) {
   case GL_NEAREST:
      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      using_nearest = true;
      break;
   case GL_LINEAR:
      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@ -200,6 +210,7 @@ static void brw_update_sampler_state(struct brw_context *brw,
      switch (gl_sampler->MagFilter) {
      case GL_NEAREST:
 	 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+	 using_nearest = true;
 	 break;
      case GL_LINEAR:
 	 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
@ -209,9 +220,12 @@ static void brw_update_sampler_state(struct brw_context *brw,
      }  
   }

-   sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR);
-   sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS);
-   sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT);
+   sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
+						  using_nearest);
+   sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
+						  using_nearest);
+   sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
+						  using_nearest);

   if (intel->gen >= 6 &&
       sampler->ss0.min_filter != sampler->ss0.mag_filter)
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@ -41,11 +41,13 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
   struct gl_texture_object *texObj = texUnit->_Current;
   struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
+   bool using_nearest = false;

   switch (gl_sampler->MinFilter) {
   case GL_NEAREST:
      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      using_nearest = true;
      break;
   case GL_LINEAR:
      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@ -85,6 +87,7 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
      switch (gl_sampler->MagFilter) {
      case GL_NEAREST:
 	 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+	 using_nearest = true;
 	 break;
      case GL_LINEAR:
 	 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
@ -94,9 +97,12 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
      }
   }

-   sampler->ss3.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR);
-   sampler->ss3.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS);
-   sampler->ss3.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT);
+   sampler->ss3.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
+						  using_nearest);
+   sampler->ss3.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
+						  using_nearest);
+   sampler->ss3.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
+						  using_nearest);

   /* Cube-maps on 965 and later must use the same wrap mode for all 3
    * coordinate dimensions.  Futher, only CUBE and CLAMP are valid.