intel/schedule_instructions: Move some comments

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
2019-02-21 10:32:01 -06:00 · 2019-02-21 10:32:01 -06:00 · 95ae400abc
parent aeaba24fcb
commit 95ae400abc
1 changed files with 37 additions and 37 deletions
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@ -368,44 +368,13 @@ schedule_node::set_latency_gen7(bool is_haswell)
      break;
   case SHADER_OPCODE_UNTYPED_ATOMIC:
-      /* Test code:
+      /* See GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */
       *   mov(8)    g112<1>ud       0x00000000ud       { align1 WE_all 1Q };
       *   mov(1)    g112.7<1>ud     g1.7<0,1,0>ud      { align1 WE_all };
       *   mov(8)    g113<1>ud       0x00000000ud       { align1 WE_normal 1Q };
       *   send(8)   g4<1>ud         g112<8,8,1>ud
       *             data (38, 5, 6) mlen 2 rlen 1      { align1 WE_normal 1Q };
       *
       * Running it 100 times as fragment shader on a 128x128 quad
       * gives an average latency of 13867 cycles per atomic op,
       * standard deviation 3%.  Note that this is a rather
       * pessimistic estimate, the actual latency in cases with few
       * collisions between threads and favorable pipelining has been
       * seen to be reduced by a factor of 100.
       */
      latency = 14000;
      break;
   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
-      /* Test code:
+      /* See also GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ */
       *   mov(8)    g112<1>UD       0x00000000UD       { align1 WE_all 1Q };
       *   mov(1)    g112.7<1>UD     g1.7<0,1,0>UD      { align1 WE_all };
       *   mov(8)    g113<1>UD       0x00000000UD       { align1 WE_normal 1Q };
       *   send(8)   g4<1>UD         g112<8,8,1>UD
       *             data (38, 6, 5) mlen 2 rlen 1      { align1 WE_normal 1Q };
       *   .
       *   . [repeats 8 times]
       *   .
       *   mov(8)    g112<1>UD       0x00000000UD       { align1 WE_all 1Q };
       *   mov(1)    g112.7<1>UD     g1.7<0,1,0>UD      { align1 WE_all };
       *   mov(8)    g113<1>UD       0x00000000UD       { align1 WE_normal 1Q };
       *   send(8)   g4<1>UD         g112<8,8,1>UD
       *             data (38, 6, 5) mlen 2 rlen 1      { align1 WE_normal 1Q };
       *
       * Running it 100 times as fragment shader on a 128x128 quad
       * gives an average latency of 583 cycles per surface read,
       * standard deviation 0.9%.
       */
      latency = is_haswell ? 300 : 600;
      break;
@ -460,13 +429,44 @@ schedule_node::set_latency_gen7(bool is_haswell)
         case GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ:
         case GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE:
-            /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
+            /* Test code:
             *   mov(8)    g112<1>UD       0x00000000UD       { align1 WE_all 1Q };
             *   mov(1)    g112.7<1>UD     g1.7<0,1,0>UD      { align1 WE_all };
             *   mov(8)    g113<1>UD       0x00000000UD       { align1 WE_normal 1Q };
             *   send(8)   g4<1>UD         g112<8,8,1>UD
             *             data (38, 6, 5) mlen 2 rlen 1      { align1 WE_normal 1Q };
             *   .
             *   . [repeats 8 times]
             *   .
             *   mov(8)    g112<1>UD       0x00000000UD       { align1 WE_all 1Q };
             *   mov(1)    g112.7<1>UD     g1.7<0,1,0>UD      { align1 WE_all };
             *   mov(8)    g113<1>UD       0x00000000UD       { align1 WE_normal 1Q };
             *   send(8)   g4<1>UD         g112<8,8,1>UD
             *             data (38, 6, 5) mlen 2 rlen 1      { align1 WE_normal 1Q };
             *
             * Running it 100 times as fragment shader on a 128x128 quad
             * gives an average latency of 583 cycles per surface read,
             * standard deviation 0.9%.
             */
            assert(!is_haswell);
            latency = 600;
            break;
         case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP:
-            /* See also SHADER_OPCODE_UNTYPED_ATOMIC */
+            /* Test code:
             *   mov(8)    g112<1>ud       0x00000000ud       { align1 WE_all 1Q };
             *   mov(1)    g112.7<1>ud     g1.7<0,1,0>ud      { align1 WE_all };
             *   mov(8)    g113<1>ud       0x00000000ud       { align1 WE_normal 1Q };
             *   send(8)   g4<1>ud         g112<8,8,1>ud
             *             data (38, 5, 6) mlen 2 rlen 1      { align1 WE_normal 1Q };
             *
             * Running it 100 times as fragment shader on a 128x128 quad
             * gives an average latency of 13867 cycles per atomic op,
             * standard deviation 3%.  Note that this is a rather
             * pessimistic estimate, the actual latency in cases with few
             * collisions between threads and favorable pipelining has been
             * seen to be reduced by a factor of 100.
             */
            assert(!is_haswell);
            latency = 14000;
            break;
@ -486,7 +486,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
         case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ:
         case GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE:
         case GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ:
-            /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
+            /* See also GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ */
            latency = 300;
            break;
@ -497,7 +497,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
         case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
         case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
         case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
-            /* See also SHADER_OPCODE_UNTYPED_ATOMIC */
+            /* See also GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */
            latency = 14000;
            break;