svga: add VS code to set attribute W component to 1

There's a few 3-component vertex attribute formats that have no equivalent SVGA3D_DECLTYPE_x format. Previously, we had to use the swtnl code to handle them. This patch lets us use hwtnl for more vertex attribute types by fetching 3-component attributes as 4-component attributes and explicitly setting the W component to 1. This lets us handle PIPE_FORMAT_R16G16B16_SNORM/UNORM and PIPE_FORMAT_R8G8B8_UNORM vertex attribs without using the swtnl path. Fixes piglit normal3b3s GL_SHORT test. Reviewed-by: Charmaine Lee <charmainel@vmware.com>
2014-04-17 09:00:29 -07:00 · 2014-04-17 09:00:29 -07:00 · 851645a3e7
parent 615a356ee3
commit 851645a3e7
5 changed files with 100 additions and 46 deletions
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@ -202,6 +202,7 @@ struct svga_velems_state {
   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
   SVGA3dDeclType decl_type[PIPE_MAX_ATTRIBS]; /**< vertex attrib formats */
   unsigned adjust_attrib_range; /* bitmask of attrs needing range adjustment */
+   unsigned adjust_attrib_w_1;   /* bitmask of attrs needing w = 1 */
 };

 /* Use to calculate differences between state emitted to hardware and
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@ -94,9 +94,14 @@ translate_vertex_format(enum pipe_format format)
   case PIPE_FORMAT_R16G16_FLOAT:         return SVGA3D_DECLTYPE_FLOAT16_2;
   case PIPE_FORMAT_R16G16B16A16_FLOAT:   return SVGA3D_DECLTYPE_FLOAT16_4;

-   /* See attrib_needs_adjustment() below */
+   /* See attrib_needs_adjustment() and attrib_needs_w_to_1() below */
   case PIPE_FORMAT_R8G8B8_SNORM:         return SVGA3D_DECLTYPE_UBYTE4N;

+   /* See attrib_needs_w_to_1() below */
+   case PIPE_FORMAT_R16G16B16_SNORM:      return SVGA3D_DECLTYPE_SHORT4N;
+   case PIPE_FORMAT_R16G16B16_UNORM:      return SVGA3D_DECLTYPE_USHORT4N;
+   case PIPE_FORMAT_R8G8B8_UNORM:         return SVGA3D_DECLTYPE_UBYTE4N;
+
   default:
      /* There are many formats without hardware support.  This case
       * will be hit regularly, meaning we'll need swvfetch.
@ -123,6 +128,25 @@ attrib_needs_range_adjustment(enum pipe_format format)
 }


+/**
+ * Does the given vertex attrib format need to have the W component set
+ * to one in the VS?
+ */
+static boolean
+attrib_needs_w_to_1(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_R8G8B8_SNORM:
+   case PIPE_FORMAT_R8G8B8_UNORM:
+   case PIPE_FORMAT_R16G16B16_SNORM:
+   case PIPE_FORMAT_R16G16B16_UNORM:
+      return TRUE;
+   default:
+      return FALSE;
+   }
+}
+
+
 static void *
 svga_create_vertex_elements_state(struct pipe_context *pipe,
                                  unsigned count,
@ -138,6 +162,7 @@ svga_create_vertex_elements_state(struct pipe_context *pipe,
      memcpy(velems->velem, attribs, sizeof(*attribs) * count);

      velems->adjust_attrib_range = 0x0;
+      velems->adjust_attrib_w_1 = 0x0;

      /* Translate Gallium vertex format to SVGA3dDeclType */
      for (i = 0; i < count; i++) {
@ -147,6 +172,9 @@ svga_create_vertex_elements_state(struct pipe_context *pipe,
         if (attrib_needs_range_adjustment(f)) {
            velems->adjust_attrib_range |= (1 << i);
         }
+         if (attrib_needs_w_to_1(f)) {
+            velems->adjust_attrib_w_1 |= (1 << i);
+         }
      }
   }
   return velems;
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@ -162,6 +162,7 @@ make_vs_key(struct svga_context *svga, struct svga_vs_compile_key *key)

   /* SVGA_NEW_VELEMENT */
   key->adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
+   key->adjust_attrib_w_1 = svga->curr.velems->adjust_attrib_w_1;
 }


--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@ -50,6 +50,7 @@ struct svga_vs_compile_key
   unsigned need_prescale:1;
   unsigned allow_psiz:1;
   unsigned adjust_attrib_range:16;
+   unsigned adjust_attrib_w_1:16;
 };

 struct svga_fs_compile_key
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@ -3532,57 +3532,78 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit)
 static boolean
 emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
 {
-   unsigned adjust_attrib_range = emit->key.vkey.adjust_attrib_range;
-
-   while (adjust_attrib_range) {
-      /* The vertex input/attribute is supposed to be a signed value in
-       * the range [-1,1] but we actually fetched/converted it to the
-       * range [0,1].  This most likely happens when the app specifies a
-       * signed byte attribute but we interpreted it as unsigned bytes.
-       * See also svga_translate_vertex_format().
-       *
-       * Here, we emit some extra instructions to adjust
-       * the attribute values from [0,1] to [-1,1].
-       *
-       * The adjustment we implement is:
-       *   new_attrib = attrib * 2.0;
-       *   if (attrib >= 0.5)
-       *      new_attrib = new_attrib - 2.0;
-       * This isn't exactly right (it's off by a bit or so) but close enough.
-       */
-      const unsigned index = u_bit_scan(&adjust_attrib_range);
+   unsigned adjust_mask = (emit->key.vkey.adjust_attrib_range |
+                           emit->key.vkey.adjust_attrib_w_1);
+ 
+   while (adjust_mask) {
+      /* Adjust vertex attrib range and/or set W component = 1 */
+      const unsigned index = u_bit_scan(&adjust_mask);
      struct src_register tmp;

-      SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0);
-
      /* allocate a temp reg */
      tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
      emit->nr_hw_temp++;

-      /* tmp = attrib * 2.0 */
-      if (!submit_op2(emit,
-                      inst_token(SVGA3DOP_MUL),
-                      dst(tmp),
-                      emit->input_map[index],
-                      get_two_immediate(emit)))
-         return FALSE;
+      if (emit->key.vkey.adjust_attrib_range & (1 << index)) {
+         /* The vertex input/attribute is supposed to be a signed value in
+          * the range [-1,1] but we actually fetched/converted it to the
+          * range [0,1].  This most likely happens when the app specifies a
+          * signed byte attribute but we interpreted it as unsigned bytes.
+          * See also svga_translate_vertex_format().
+          *
+          * Here, we emit some extra instructions to adjust
+          * the attribute values from [0,1] to [-1,1].
+          *
+          * The adjustment we implement is:
+          *   new_attrib = attrib * 2.0;
+          *   if (attrib >= 0.5)
+          *      new_attrib = new_attrib - 2.0;
+          * This isn't exactly right (it's off by a bit or so) but close enough.
+          */
+         SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0);

-      /* pred = (attrib >= 0.5) */
-      if (!submit_op2(emit,
-                      inst_token_setp(SVGA3DOPCOMP_GE),
-                      pred_reg,
-                      emit->input_map[index],  /* vert attrib */
-                      get_half_immediate(emit)))  /* 0.5 */
-         return FALSE;
+         /* tmp = attrib * 2.0 */
+         if (!submit_op2(emit,
+                         inst_token(SVGA3DOP_MUL),
+                         dst(tmp),
+                         emit->input_map[index],
+                         get_two_immediate(emit)))
+            return FALSE;

-      /* sub(pred) tmp, tmp, 2.0 */
-      if (!submit_op3(emit,
-                      inst_token_predicated(SVGA3DOP_SUB),
-                      dst(tmp),
-                      src(pred_reg),
-                      tmp,
-                      get_two_immediate(emit)))
-         return FALSE;
+         /* pred = (attrib >= 0.5) */
+         if (!submit_op2(emit,
+                         inst_token_setp(SVGA3DOPCOMP_GE),
+                         pred_reg,
+                         emit->input_map[index],  /* vert attrib */
+                         get_half_immediate(emit)))  /* 0.5 */
+            return FALSE;
+
+         /* sub(pred) tmp, tmp, 2.0 */
+         if (!submit_op3(emit,
+                         inst_token_predicated(SVGA3DOP_SUB),
+                         dst(tmp),
+                         src(pred_reg),
+                         tmp,
+                         get_two_immediate(emit)))
+            return FALSE;
+      }
+      else {
+         /* just copy the vertex input attrib to the temp register */
+         if (!submit_op1(emit,
+                         inst_token(SVGA3DOP_MOV),
+                         dst(tmp),
+                         emit->input_map[index]))
+            return FALSE;
+      }
+
+      if (emit->key.vkey.adjust_attrib_w_1 & (1 << index)) {
+         /* move 1 into W position of tmp */
+         if (!submit_op1(emit,
+                         inst_token(SVGA3DOP_MOV),
+                         writemask(dst(tmp), TGSI_WRITEMASK_W),
+                         get_one_immediate(emit)))
+            return FALSE;
+      }

      /* Reassign the input_map entry to the new tmp register */
      emit->input_map[index] = tmp;
@ -3640,7 +3661,8 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
   else if (emit->unit == PIPE_SHADER_VERTEX) {
      if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
         return TRUE;
-      if (emit->key.vkey.adjust_attrib_range)
+      if (emit->key.vkey.adjust_attrib_range ||
+          emit->key.vkey.adjust_attrib_w_1)
         return TRUE;
   }

@ -3803,7 +3825,8 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit)
   }
   else {
      assert(emit->unit == PIPE_SHADER_VERTEX);
-      if (emit->key.vkey.adjust_attrib_range) {
+      if (emit->key.vkey.adjust_attrib_range ||
+          emit->key.vkey.adjust_attrib_w_1) {
         if (!emit_adjusted_vertex_attribs(emit))
            return FALSE;
      }