intel/schedule_instructions: Move some comments
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
This commit is contained in:
parent
aeaba24fcb
commit
95ae400abc
|
@ -368,44 +368,13 @@ schedule_node::set_latency_gen7(bool is_haswell)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
||||||
/* Test code:
|
/* See GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */
|
||||||
* mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q };
|
|
||||||
* mov(1) g112.7<1>ud g1.7<0,1,0>ud { align1 WE_all };
|
|
||||||
* mov(8) g113<1>ud 0x00000000ud { align1 WE_normal 1Q };
|
|
||||||
* send(8) g4<1>ud g112<8,8,1>ud
|
|
||||||
* data (38, 5, 6) mlen 2 rlen 1 { align1 WE_normal 1Q };
|
|
||||||
*
|
|
||||||
* Running it 100 times as fragment shader on a 128x128 quad
|
|
||||||
* gives an average latency of 13867 cycles per atomic op,
|
|
||||||
* standard deviation 3%. Note that this is a rather
|
|
||||||
* pessimistic estimate, the actual latency in cases with few
|
|
||||||
* collisions between threads and favorable pipelining has been
|
|
||||||
* seen to be reduced by a factor of 100.
|
|
||||||
*/
|
|
||||||
latency = 14000;
|
latency = 14000;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
||||||
/* Test code:
|
/* See also GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ */
|
||||||
* mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
|
|
||||||
* mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };
|
|
||||||
* mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q };
|
|
||||||
* send(8) g4<1>UD g112<8,8,1>UD
|
|
||||||
* data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q };
|
|
||||||
* .
|
|
||||||
* . [repeats 8 times]
|
|
||||||
* .
|
|
||||||
* mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
|
|
||||||
* mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };
|
|
||||||
* mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q };
|
|
||||||
* send(8) g4<1>UD g112<8,8,1>UD
|
|
||||||
* data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q };
|
|
||||||
*
|
|
||||||
* Running it 100 times as fragment shader on a 128x128 quad
|
|
||||||
* gives an average latency of 583 cycles per surface read,
|
|
||||||
* standard deviation 0.9%.
|
|
||||||
*/
|
|
||||||
latency = is_haswell ? 300 : 600;
|
latency = is_haswell ? 300 : 600;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -460,13 +429,44 @@ schedule_node::set_latency_gen7(bool is_haswell)
|
||||||
|
|
||||||
case GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ:
|
case GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ:
|
||||||
case GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE:
|
case GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE:
|
||||||
/* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
|
/* Test code:
|
||||||
|
* mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
|
||||||
|
* mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };
|
||||||
|
* mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q };
|
||||||
|
* send(8) g4<1>UD g112<8,8,1>UD
|
||||||
|
* data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q };
|
||||||
|
* .
|
||||||
|
* . [repeats 8 times]
|
||||||
|
* .
|
||||||
|
* mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
|
||||||
|
* mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };
|
||||||
|
* mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q };
|
||||||
|
* send(8) g4<1>UD g112<8,8,1>UD
|
||||||
|
* data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q };
|
||||||
|
*
|
||||||
|
* Running it 100 times as fragment shader on a 128x128 quad
|
||||||
|
* gives an average latency of 583 cycles per surface read,
|
||||||
|
* standard deviation 0.9%.
|
||||||
|
*/
|
||||||
assert(!is_haswell);
|
assert(!is_haswell);
|
||||||
latency = 600;
|
latency = 600;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP:
|
case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP:
|
||||||
/* See also SHADER_OPCODE_UNTYPED_ATOMIC */
|
/* Test code:
|
||||||
|
* mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q };
|
||||||
|
* mov(1) g112.7<1>ud g1.7<0,1,0>ud { align1 WE_all };
|
||||||
|
* mov(8) g113<1>ud 0x00000000ud { align1 WE_normal 1Q };
|
||||||
|
* send(8) g4<1>ud g112<8,8,1>ud
|
||||||
|
* data (38, 5, 6) mlen 2 rlen 1 { align1 WE_normal 1Q };
|
||||||
|
*
|
||||||
|
* Running it 100 times as fragment shader on a 128x128 quad
|
||||||
|
* gives an average latency of 13867 cycles per atomic op,
|
||||||
|
* standard deviation 3%. Note that this is a rather
|
||||||
|
* pessimistic estimate, the actual latency in cases with few
|
||||||
|
* collisions between threads and favorable pipelining has been
|
||||||
|
* seen to be reduced by a factor of 100.
|
||||||
|
*/
|
||||||
assert(!is_haswell);
|
assert(!is_haswell);
|
||||||
latency = 14000;
|
latency = 14000;
|
||||||
break;
|
break;
|
||||||
|
@ -486,7 +486,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
|
||||||
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ:
|
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ:
|
||||||
case GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE:
|
case GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE:
|
||||||
case GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ:
|
case GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ:
|
||||||
/* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
|
/* See also GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ */
|
||||||
latency = 300;
|
latency = 300;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -497,7 +497,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
|
||||||
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
|
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
|
||||||
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
|
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
|
||||||
case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
|
case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
|
||||||
/* See also SHADER_OPCODE_UNTYPED_ATOMIC */
|
/* See also GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */
|
||||||
latency = 14000;
|
latency = 14000;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue