intel/fs,vec4: Drop support for shader time

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14056>
This commit is contained in:
Jason Ekstrand 2021-12-03 21:55:56 -06:00 committed by Marge Bot
parent 8f3c100d61
commit 4fa58d27a5
36 changed files with 37 additions and 439 deletions

View File

@ -1464,7 +1464,7 @@ crocus_compile_tcs(struct crocus_context *ice,
char *error_str = NULL;
const unsigned *program =
brw_compile_tcs(compiler, &ice->dbg, mem_ctx, &key_clean, tcs_prog_data, nir,
-1, NULL, &error_str);
NULL, &error_str);
if (program == NULL) {
dbg_printf("Failed to compile control shader: %s\n", error_str);
ralloc_free(mem_ctx);
@ -1597,7 +1597,7 @@ crocus_compile_tes(struct crocus_context *ice,
char *error_str = NULL;
const unsigned *program =
brw_compile_tes(compiler, &ice->dbg, mem_ctx, &key_clean, &input_vue_map,
tes_prog_data, nir, -1, NULL, &error_str);
tes_prog_data, nir, NULL, &error_str);
if (program == NULL) {
dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
ralloc_free(mem_ctx);
@ -1731,7 +1731,7 @@ crocus_compile_gs(struct crocus_context *ice,
char *error_str = NULL;
const unsigned *program =
brw_compile_gs(compiler, &ice->dbg, mem_ctx, &key_clean, gs_prog_data, nir,
-1, NULL, &error_str);
NULL, &error_str);
if (program == NULL) {
dbg_printf("Failed to compile geometry shader: %s\n", error_str);
ralloc_free(mem_ctx);

View File

@ -1552,7 +1552,7 @@ iris_compile_tcs(struct iris_screen *screen,
char *error_str = NULL;
const unsigned *program =
brw_compile_tcs(compiler, dbg, mem_ctx, &brw_key, tcs_prog_data,
nir, -1, NULL, &error_str);
nir, NULL, &error_str);
if (program == NULL) {
dbg_printf("Failed to compile control shader: %s\n", error_str);
ralloc_free(mem_ctx);
@ -1710,7 +1710,7 @@ iris_compile_tes(struct iris_screen *screen,
char *error_str = NULL;
const unsigned *program =
brw_compile_tes(compiler, dbg, mem_ctx, &brw_key, &input_vue_map,
tes_prog_data, nir, -1, NULL, &error_str);
tes_prog_data, nir, NULL, &error_str);
if (program == NULL) {
dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
ralloc_free(mem_ctx);
@ -1844,7 +1844,7 @@ iris_compile_gs(struct iris_screen *screen,
char *error_str = NULL;
const unsigned *program =
brw_compile_gs(compiler, dbg, mem_ctx, &brw_key, gs_prog_data,
nir, -1, NULL, &error_str);
nir, NULL, &error_str);
if (program == NULL) {
dbg_printf("Failed to compile geometry shader: %s\n", error_str);
ralloc_free(mem_ctx);

View File

@ -608,14 +608,6 @@ struct brw_image_param {
*/
#define BRW_GFX6_SOL_BINDING_START 0
/**
* Stride in bytes between shader_time entries.
*
* We separate entries by a cacheline to reduce traffic between EUs writing to
* different entries.
*/
#define BRW_SHADER_TIME_STRIDE 64
struct brw_ubo_range
{
uint16_t block;
@ -755,7 +747,6 @@ struct brw_stage_prog_data {
uint32_t ubo_start;
uint32_t ssbo_start;
uint32_t image_start;
uint32_t shader_time_start;
uint32_t plane_start[3];
/** @} */
} binding_table;
@ -1573,8 +1564,6 @@ struct brw_compile_vs_params {
struct brw_vs_prog_data *prog_data;
bool edgeflag_is_last; /* true for gallium */
bool shader_time;
int shader_time_index;
struct brw_compile_stats *stats;
@ -1608,7 +1597,6 @@ brw_compile_tcs(const struct brw_compiler *compiler,
const struct brw_tcs_prog_key *key,
struct brw_tcs_prog_data *prog_data,
nir_shader *nir,
int shader_time_index,
struct brw_compile_stats *stats,
char **error_str);
@ -1624,7 +1612,6 @@ brw_compile_tes(const struct brw_compiler *compiler, void *log_data,
const struct brw_vue_map *input_vue_map,
struct brw_tes_prog_data *prog_data,
nir_shader *nir,
int shader_time_index,
struct brw_compile_stats *stats,
char **error_str);
@ -1639,7 +1626,6 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
const struct brw_gs_prog_key *key,
struct brw_gs_prog_data *prog_data,
nir_shader *nir,
int shader_time_index,
struct brw_compile_stats *stats,
char **error_str);
@ -1724,11 +1710,6 @@ struct brw_compile_fs_params {
const struct brw_vue_map *vue_map;
const struct brw_mue_map *mue_map;
bool shader_time;
int shader_time_index8;
int shader_time_index16;
int shader_time_index32;
bool allow_spilling;
bool use_rep_send;
@ -1763,9 +1744,6 @@ struct brw_compile_cs_params {
const struct brw_cs_prog_key *key;
struct brw_cs_prog_data *prog_data;
bool shader_time;
int shader_time_index;
struct brw_compile_stats *stats;
void *log_data;

View File

@ -1675,10 +1675,6 @@ void gfx7_block_read_scratch(struct brw_codegen *p,
int num_regs,
unsigned offset);
void brw_shader_time_add(struct brw_codegen *p,
struct brw_reg payload,
uint32_t surf_index);
/**
* Return the generation-specific jump distance scaling factor.
*

View File

@ -395,8 +395,6 @@ enum opcode {
*/
FS_OPCODE_PACK,
SHADER_OPCODE_SHADER_TIME_ADD,
/**
* Typed and untyped surface access opcodes.
*

View File

@ -3592,55 +3592,6 @@ brw_broadcast(struct brw_codegen *p,
brw_pop_insn_state(p);
}
/**
* This instruction is generated as a single-channel align1 instruction by
* both the VS and FS stages when using INTEL_DEBUG=shader_time.
*
* We can't use the typed atomic op in the FS because that has the execution
* mask ANDed with the pixel mask, but we just want to write the one dword for
* all the pixels.
*
* We don't use the SIMD4x2 atomic ops in the VS because want to just write
* one u32. So we use the same untyped atomic write message as the pixel
* shader.
*
* The untyped atomic operation requires a BUFFER surface type with RAW
* format, and is only accessible through the legacy DATA_CACHE dataport
* messages.
*/
void brw_shader_time_add(struct brw_codegen *p,
struct brw_reg payload,
uint32_t surf_index)
{
const struct intel_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->verx10 >= 75 ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GFX7_SFID_DATAPORT_DATA_CACHE);
assert(devinfo->ver >= 7);
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
/* We use brw_vec1_reg and unmasked because we want to increment the given
* offset only once.
*/
brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_NULL, 0));
brw_set_src0(p, send, brw_vec1_reg(payload.file,
payload.nr, 0));
brw_set_desc(p, send, (brw_message_desc(devinfo, 2, 0, false) |
brw_dp_untyped_atomic_desc(devinfo, 1, BRW_AOP_ADD,
false)));
brw_inst_set_sfid(devinfo, send, sfid);
brw_inst_set_binding_table_index(devinfo, send, surf_index);
brw_pop_insn_state(p);
}
/**
* Emit the SEND message for a barrier

View File

@ -221,7 +221,6 @@ fs_inst::is_send_from_grf() const
{
switch (opcode) {
case SHADER_OPCODE_SEND:
case SHADER_OPCODE_SHADER_TIME_ADD:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
@ -310,7 +309,6 @@ fs_inst::is_payload(unsigned arg) const
case VEC4_OPCODE_UNTYPED_SURFACE_READ:
case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
case SHADER_OPCODE_SHADER_TIME_ADD:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
case SHADER_OPCODE_INTERLOCK:
@ -590,83 +588,6 @@ fs_visitor::get_timestamp(const fs_builder &bld)
return dst;
}
void
fs_visitor::emit_shader_time_begin()
{
/* We want only the low 32 bits of the timestamp. Since it's running
* at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
* which is plenty of time for our purposes. It is identical across the
* EUs, but since it's tracking GPU core speed it will increment at a
* varying rate as render P-states change.
*/
shader_start_time = component(
get_timestamp(bld.annotate("shader time start")), 0);
}
void
fs_visitor::emit_shader_time_end()
{
/* Insert our code just before the final SEND with EOT. */
exec_node *end = this->instructions.get_tail();
assert(end && ((fs_inst *) end)->eot);
const fs_builder ibld = bld.annotate("shader time end")
.exec_all().at(NULL, end);
const fs_reg timestamp = get_timestamp(ibld);
/* We only use the low 32 bits of the timestamp - see
* emit_shader_time_begin()).
*
* We could also check if render P-states have changed (or anything
* else that might disrupt timing) by setting smear to 2 and checking if
* that field is != 0.
*/
const fs_reg shader_end_time = component(timestamp, 0);
/* Check that there weren't any timestamp reset events (assuming these
* were the only two timestamp reads that happened).
*/
const fs_reg reset = component(timestamp, 2);
set_condmod(BRW_CONDITIONAL_Z,
ibld.AND(ibld.null_reg_ud(), reset, brw_imm_ud(1u)));
ibld.IF(BRW_PREDICATE_NORMAL);
fs_reg start = shader_start_time;
start.negate = true;
const fs_reg diff = component(fs_reg(VGRF, alloc.allocate(1),
BRW_REGISTER_TYPE_UD),
0);
const fs_builder cbld = ibld.group(1, 0);
cbld.group(1, 0).ADD(diff, start, shader_end_time);
/* If there were no instructions between the two timestamp gets, the diff
* is 2 cycles. Remove that overhead, so I can forget about that when
* trying to determine the time taken for single instructions.
*/
cbld.ADD(diff, diff, brw_imm_ud(-2u));
SHADER_TIME_ADD(cbld, 0, diff);
SHADER_TIME_ADD(cbld, 1, brw_imm_ud(1u));
ibld.emit(BRW_OPCODE_ELSE);
SHADER_TIME_ADD(cbld, 2, brw_imm_ud(1u));
ibld.emit(BRW_OPCODE_ENDIF);
}
void
fs_visitor::SHADER_TIME_ADD(const fs_builder &bld,
int shader_time_subindex,
fs_reg value)
{
int index = shader_time_index * 3 + shader_time_subindex;
struct brw_reg offset = brw_imm_d(index * BRW_SHADER_TIME_STRIDE);
fs_reg payload;
if (dispatch_width == 8)
payload = vgrf(glsl_type::uvec2_type);
else
payload = vgrf(glsl_type::uint_type);
bld.emit(SHADER_OPCODE_SHADER_TIME_ADD, fs_reg(), payload, offset, value);
}
void
fs_visitor::vfail(const char *format, va_list va)
{
@ -8850,9 +8771,6 @@ fs_visitor::run_vs()
setup_vs_payload();
if (shader_time_index >= 0)
emit_shader_time_begin();
emit_nir_code();
if (failed)
@ -8860,9 +8778,6 @@ fs_visitor::run_vs()
emit_urb_writes();
if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
optimize();
@ -8941,9 +8856,6 @@ fs_visitor::run_tcs()
tcs_key->input_vertices;
}
if (shader_time_index >= 0)
emit_shader_time_begin();
/* Initialize gl_InvocationID */
set_tcs_invocation_id();
@ -8978,9 +8890,6 @@ fs_visitor::run_tcs()
inst->mlen = 3;
inst->eot = true;
if (shader_time_index >= 0)
emit_shader_time_end();
if (failed)
return false;
@ -9005,9 +8914,6 @@ fs_visitor::run_tes()
/* R0: thread header, R1-3: gl_TessCoord.xyz, R4: URB handles */
payload.num_regs = 5;
if (shader_time_index >= 0)
emit_shader_time_begin();
emit_nir_code();
if (failed)
@ -9015,9 +8921,6 @@ fs_visitor::run_tes()
emit_urb_writes();
if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
optimize();
@ -9054,16 +8957,10 @@ fs_visitor::run_gs()
}
}
if (shader_time_index >= 0)
emit_shader_time_begin();
emit_nir_code();
emit_gs_thread_end();
if (shader_time_index >= 0)
emit_shader_time_end();
if (failed)
return false;
@ -9126,9 +9023,6 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
assert(dispatch_width == 16);
emit_repclear_shader();
} else {
if (shader_time_index >= 0)
emit_shader_time_begin();
if (nir->info.inputs_read > 0 ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
(nir->info.outputs_read > 0 && !wm_key->coherent_fb_fetch)) {
@ -9166,9 +9060,6 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
emit_fb_writes();
if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
optimize();
@ -9198,9 +9089,6 @@ fs_visitor::run_cs(bool allow_spilling)
setup_cs_payload();
if (shader_time_index >= 0)
emit_shader_time_begin();
if (devinfo->platform == INTEL_PLATFORM_HSW && prog_data->total_shared > 0) {
/* Move SLM index from g0.0[27:24] to sr0.1[11:8] */
const fs_builder abld = bld.exec_all().group(1, 0);
@ -9215,9 +9103,6 @@ fs_visitor::run_cs(bool allow_spilling)
emit_cs_terminate();
if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
optimize();
@ -9241,9 +9126,6 @@ fs_visitor::run_bs(bool allow_spilling)
/* R0: thread header, R1: stack IDs, R2: argument addresses */
payload.num_regs = 3;
if (shader_time_index >= 0)
emit_shader_time_begin();
emit_nir_code();
if (failed)
@ -9252,9 +9134,6 @@ fs_visitor::run_bs(bool allow_spilling)
/* TODO(RT): Perhaps rename this? */
emit_cs_terminate();
if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
optimize();
@ -9295,9 +9174,6 @@ fs_visitor::run_task(bool allow_spilling)
*/
payload.num_regs = dispatch_width == 32 ? 4 : 3;
if (shader_time_index >= 0)
emit_shader_time_begin();
emit_nir_code();
if (failed)
@ -9305,9 +9181,6 @@ fs_visitor::run_task(bool allow_spilling)
emit_cs_terminate();
if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
optimize();
@ -9348,9 +9221,6 @@ fs_visitor::run_mesh(bool allow_spilling)
*/
payload.num_regs = dispatch_width == 32 ? 4 : 3;
if (shader_time_index >= 0)
emit_shader_time_begin();
emit_nir_code();
if (failed)
@ -9358,9 +9228,6 @@ fs_visitor::run_mesh(bool allow_spilling)
emit_cs_terminate();
if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
optimize();
@ -9738,7 +9605,6 @@ brw_compile_fs(const struct brw_compiler *compiler,
v8 = new fs_visitor(compiler, params->log_data, mem_ctx, &key->base,
&prog_data->base, nir, 8,
params->shader_time ? params->shader_time_index8 : -1,
debug_enabled);
if (!v8->run_fs(allow_spilling, false /* do_rep_send */)) {
params->error_str = ralloc_strdup(mem_ctx, v8->fail_msg);
@ -9779,7 +9645,6 @@ brw_compile_fs(const struct brw_compiler *compiler,
/* Try a SIMD16 compile */
v16 = new fs_visitor(compiler, params->log_data, mem_ctx, &key->base,
&prog_data->base, nir, 16,
params->shader_time ? params->shader_time_index16 : -1,
debug_enabled);
v16->import_uniforms(v8);
if (!v16->run_fs(allow_spilling, params->use_rep_send)) {
@ -9807,7 +9672,6 @@ brw_compile_fs(const struct brw_compiler *compiler,
/* Try a SIMD32 compile */
v32 = new fs_visitor(compiler, params->log_data, mem_ctx, &key->base,
&prog_data->base, nir, 32,
params->shader_time ? params->shader_time_index32 : -1,
debug_enabled);
v32->import_uniforms(v8);
if (!v32->run_fs(allow_spilling, false)) {
@ -10064,7 +9928,6 @@ brw_compile_cs(const struct brw_compiler *compiler,
const nir_shader *nir = params->nir;
const struct brw_cs_prog_key *key = params->key;
struct brw_cs_prog_data *prog_data = params->prog_data;
int shader_time_index = params->shader_time ? params->shader_time_index : -1;
const bool debug_enabled =
INTEL_DEBUG(params->debug_flag ? params->debug_flag : DEBUG_CS);
@ -10106,7 +9969,7 @@ brw_compile_cs(const struct brw_compiler *compiler,
v[simd] = new fs_visitor(compiler, params->log_data, mem_ctx, &key->base,
&prog_data->base, shader, dispatch_width,
shader_time_index, debug_enabled);
debug_enabled);
if (prog_data->prog_mask) {
unsigned first = ffs(prog_data->prog_mask) - 1;
@ -10238,7 +10101,7 @@ compile_single_bs(const struct brw_compiler *compiler, void *log_data,
if (!INTEL_DEBUG(DEBUG_NO8)) {
v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
&prog_data->base, shader,
8, -1 /* shader time */, debug_enabled);
8, debug_enabled);
const bool allow_spilling = true;
if (!v8->run_bs(allow_spilling)) {
if (error_str)
@ -10256,7 +10119,7 @@ compile_single_bs(const struct brw_compiler *compiler, void *log_data,
if (!has_spilled && !INTEL_DEBUG(DEBUG_NO16)) {
v16 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
&prog_data->base, shader,
16, -1 /* shader time */, debug_enabled);
16, debug_enabled);
const bool allow_spilling = (v == NULL);
if (!v16->run_bs(allow_spilling)) {
brw_shader_perf_log(compiler, log_data,

View File

@ -97,14 +97,12 @@ public:
struct brw_stage_prog_data *prog_data,
const nir_shader *shader,
unsigned dispatch_width,
int shader_time_index,
bool debug_enabled);
fs_visitor(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
struct brw_gs_compile *gs_compile,
struct brw_gs_prog_data *prog_data,
const nir_shader *shader,
int shader_time_index,
bool debug_enabled);
void init();
~fs_visitor();
@ -336,12 +334,6 @@ public:
void emit_barrier();
void emit_shader_time_begin();
void emit_shader_time_end();
void SHADER_TIME_ADD(const brw::fs_builder &bld,
int shader_time_subindex,
fs_reg value);
fs_reg get_timestamp(const brw::fs_builder &bld);
fs_reg interp_reg(int location, int channel);
@ -431,8 +423,6 @@ public:
const unsigned dispatch_width; /**< 8, 16 or 32 */
unsigned max_dispatch_width;
int shader_time_index;
struct shader_stats shader_stats;
brw::fs_builder bld;
@ -553,11 +543,6 @@ private:
struct brw_reg x,
struct brw_reg y);
void generate_shader_time_add(fs_inst *inst,
struct brw_reg payload,
struct brw_reg offset,
struct brw_reg value);
void generate_mov_indirect(fs_inst *inst,
struct brw_reg dst,
struct brw_reg reg,

View File

@ -1846,47 +1846,6 @@ fs_generator::generate_pack_half_2x16_split(fs_inst *,
brw_F32TO16(p, dst_w, x);
}
void
fs_generator::generate_shader_time_add(fs_inst *,
struct brw_reg payload,
struct brw_reg offset,
struct brw_reg value)
{
const tgl_swsb swsb = brw_get_default_swsb(p);
assert(devinfo->ver >= 7);
brw_push_insn_state(p);
brw_set_default_mask_control(p, true);
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
assert(payload.file == BRW_GENERAL_REGISTER_FILE);
struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
offset.type);
struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0),
value.type);
assert(offset.file == BRW_IMMEDIATE_VALUE);
if (value.file == BRW_GENERAL_REGISTER_FILE) {
value.width = BRW_WIDTH_1;
value.hstride = BRW_HORIZONTAL_STRIDE_0;
value.vstride = BRW_VERTICAL_STRIDE_0;
} else {
assert(value.file == BRW_IMMEDIATE_VALUE);
}
/* Trying to deal with setup of the params from the IR is crazy in the FS8
* case, and we don't really care about squeezing every bit of performance
* out of this path, so we just emit the MOVs from here.
*/
brw_MOV(p, payload_offset, offset);
brw_set_default_swsb(p, tgl_swsb_null());
brw_MOV(p, payload_value, value);
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
brw_shader_time_add(p, payload,
prog_data->binding_table.shader_time_start);
brw_pop_insn_state(p);
}
void
fs_generator::enable_debug(const char *shader_name)
{
@ -2459,10 +2418,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
generate_halt(inst);
break;
case SHADER_OPCODE_SHADER_TIME_ADD:
generate_shader_time_add(inst, src[0], src[1], src[2]);
break;
case SHADER_OPCODE_INTERLOCK:
case SHADER_OPCODE_MEMORY_FENCE: {
assert(src[1].file == BRW_IMMEDIATE_VALUE);

View File

@ -1130,7 +1130,6 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
struct brw_stage_prog_data *prog_data,
const nir_shader *shader,
unsigned dispatch_width,
int shader_time_index,
bool debug_enabled)
: backend_shader(compiler, log_data, mem_ctx, shader, prog_data,
debug_enabled),
@ -1138,7 +1137,6 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
live_analysis(this), regpressure_analysis(this),
performance_analysis(this),
dispatch_width(dispatch_width),
shader_time_index(shader_time_index),
bld(fs_builder(this, dispatch_width).at_end())
{
init();
@ -1149,7 +1147,6 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
struct brw_gs_compile *c,
struct brw_gs_prog_data *prog_data,
const nir_shader *shader,
int shader_time_index,
bool debug_enabled)
: backend_shader(compiler, log_data, mem_ctx, shader,
&prog_data->base.base, debug_enabled),
@ -1158,7 +1155,6 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
live_analysis(this), regpressure_analysis(this),
performance_analysis(this),
dispatch_width(8),
shader_time_index(shader_time_index),
bld(fs_builder(this, dispatch_width).at_end())
{
init();

View File

@ -192,7 +192,7 @@ brw_compile_task(const struct brw_compiler *compiler,
v[simd] = new fs_visitor(compiler, params->log_data, mem_ctx, &key->base,
&prog_data->base.base, shader, dispatch_width,
-1 /* shader_time_index */, debug_enabled);
debug_enabled);
if (prog_data->base.prog_mask) {
unsigned first = ffs(prog_data->base.prog_mask) - 1;
@ -572,7 +572,7 @@ brw_compile_mesh(const struct brw_compiler *compiler,
v[simd] = new fs_visitor(compiler, params->log_data, mem_ctx, &key->base,
&prog_data->base.base, shader, dispatch_width,
-1 /* shader_time_index */, debug_enabled);
debug_enabled);
if (prog_data->base.prog_mask) {
unsigned first = ffs(prog_data->base.prog_mask) - 1;

View File

@ -287,9 +287,6 @@ brw_instruction_name(const struct intel_device_info *devinfo, enum opcode op)
case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
return "image_size_logical";
case SHADER_OPCODE_SHADER_TIME_ADD:
return "shader_time_add";
case VEC4_OPCODE_UNTYPED_ATOMIC:
return "untyped_atomic";
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
@ -1337,7 +1334,6 @@ brw_compile_tes(const struct brw_compiler *compiler,
const struct brw_vue_map *input_vue_map,
struct brw_tes_prog_data *prog_data,
nir_shader *nir,
int shader_time_index,
struct brw_compile_stats *stats,
char **error_str)
{
@ -1429,7 +1425,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
if (is_scalar) {
fs_visitor v(compiler, log_data, mem_ctx, &key->base,
&prog_data->base.base, nir, 8,
shader_time_index, debug_enabled);
debug_enabled);
if (!v.run_tes()) {
if (error_str)
*error_str = ralloc_strdup(mem_ctx, v.fail_msg);
@ -1457,7 +1453,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
assembly = g.get_assembly();
} else {
brw::vec4_tes_visitor v(compiler, log_data, key, prog_data,
nir, mem_ctx, shader_time_index, debug_enabled);
nir, mem_ctx, debug_enabled);
if (!v.run()) {
if (error_str)
*error_str = ralloc_strdup(mem_ctx, v.fail_msg);

View File

@ -150,7 +150,6 @@ bool
vec4_instruction::is_send_from_grf() const
{
switch (opcode) {
case SHADER_OPCODE_SHADER_TIME_ADD:
case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7:
case VEC4_OPCODE_UNTYPED_ATOMIC:
case VEC4_OPCODE_UNTYPED_SURFACE_READ:
@ -207,7 +206,6 @@ unsigned
vec4_instruction::size_read(unsigned arg) const
{
switch (opcode) {
case SHADER_OPCODE_SHADER_TIME_ADD:
case VEC4_OPCODE_UNTYPED_ATOMIC:
case VEC4_OPCODE_UNTYPED_SURFACE_READ:
case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
@ -361,8 +359,6 @@ vec4_instruction::implied_mrf_writes() const
return 1;
case TCS_OPCODE_URB_WRITE:
return 0;
case SHADER_OPCODE_SHADER_TIME_ADD:
return 0;
case SHADER_OPCODE_TEX:
case SHADER_OPCODE_TXL:
case SHADER_OPCODE_TXD:
@ -1673,70 +1669,6 @@ vec4_visitor::get_timestamp()
return src_reg(dst);
}
void
vec4_visitor::emit_shader_time_begin()
{
current_annotation = "shader time start";
shader_start_time = get_timestamp();
}
void
vec4_visitor::emit_shader_time_end()
{
current_annotation = "shader time end";
src_reg shader_end_time = get_timestamp();
/* Check that there weren't any timestamp reset events (assuming these
* were the only two timestamp reads that happened).
*/
src_reg reset_end = shader_end_time;
reset_end.swizzle = BRW_SWIZZLE_ZZZZ;
vec4_instruction *test = emit(AND(dst_null_ud(), reset_end, brw_imm_ud(1u)));
test->conditional_mod = BRW_CONDITIONAL_Z;
emit(IF(BRW_PREDICATE_NORMAL));
/* Take the current timestamp and get the delta. */
shader_start_time.negate = true;
dst_reg diff = dst_reg(this, glsl_type::uint_type);
emit(ADD(diff, shader_start_time, shader_end_time));
/* If there were no instructions between the two timestamp gets, the diff
* is 2 cycles. Remove that overhead, so I can forget about that when
* trying to determine the time taken for single instructions.
*/
emit(ADD(diff, src_reg(diff), brw_imm_ud(-2u)));
emit_shader_time_write(0, src_reg(diff));
emit_shader_time_write(1, brw_imm_ud(1u));
emit(BRW_OPCODE_ELSE);
emit_shader_time_write(2, brw_imm_ud(1u));
emit(BRW_OPCODE_ENDIF);
}
void
vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
{
dst_reg dst =
dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2));
dst_reg offset = dst;
dst_reg time = dst;
time.offset += REG_SIZE;
offset.type = BRW_REGISTER_TYPE_UD;
int index = shader_time_index * 3 + shader_time_subindex;
emit(MOV(offset, brw_imm_d(index * BRW_SHADER_TIME_STRIDE)));
time.type = BRW_REGISTER_TYPE_UD;
emit(MOV(time, value));
vec4_instruction *inst =
emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(dst));
inst->mlen = 2;
}
static bool
is_align1_df(vec4_instruction *inst)
{
@ -2420,9 +2352,6 @@ vec4_visitor::invalidate_analysis(brw::analysis_dependency_class c)
bool
vec4_visitor::run()
{
if (shader_time_index >= 0)
emit_shader_time_begin();
setup_push_ranges();
if (prog_data->base.zero_push_reg) {
@ -2705,7 +2634,6 @@ brw_compile_vs(const struct brw_compiler *compiler,
fs_visitor v(compiler, params->log_data, mem_ctx, &key->base,
&prog_data->base.base, nir, 8,
params->shader_time ? params->shader_time_index : -1,
debug_enabled);
if (!v.run_vs()) {
params->error_str = ralloc_strdup(mem_ctx, v.fail_msg);
@ -2737,7 +2665,6 @@ brw_compile_vs(const struct brw_compiler *compiler,
vec4_vs_visitor v(compiler, params->log_data, key, prog_data,
nir, mem_ctx,
params->shader_time ? params->shader_time_index : -1,
debug_enabled);
if (!v.run()) {
params->error_str = ralloc_strdup(mem_ctx, v.fail_msg);

View File

@ -72,7 +72,6 @@ public:
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index,
bool debug_enabled);
dst_reg dst_null_f()
@ -278,10 +277,6 @@ public:
vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
virtual void emit_urb_slot(dst_reg reg, int varying);
void emit_shader_time_begin();
void emit_shader_time_end();
void emit_shader_time_write(int shader_time_subindex, src_reg value);
src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
src_reg *reladdr, int reg_offset);
void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
@ -370,8 +365,6 @@ private:
*/
const bool no_spills;
int shader_time_index;
unsigned last_scratch; /**< measured in 32-byte (register size) units */
};

View File

@ -1908,12 +1908,6 @@ generate_code(struct brw_codegen *p,
generate_gs_get_instance_id(p, dst);
break;
case SHADER_OPCODE_SHADER_TIME_ADD:
brw_shader_time_add(p, src[0],
prog_data->base.binding_table.shader_time_start);
send_count++;
break;
case VEC4_OPCODE_UNTYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,

View File

@ -43,11 +43,10 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index,
bool debug_enabled)
: vec4_visitor(compiler, log_data, &c->key.base.tex,
&prog_data->base, shader, mem_ctx,
no_spills, shader_time_index, debug_enabled),
no_spills, debug_enabled),
c(c),
gs_prog_data(prog_data)
{
@ -221,8 +220,6 @@ vec4_gs_visitor::emit_thread_end()
vec4_instruction *inst = emit(MOV(mrf_reg, r0));
inst->force_writemask_all = true;
emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count);
if (INTEL_DEBUG(DEBUG_SHADER_TIME))
emit_shader_time_end();
inst = emit(GS_OPCODE_THREAD_END);
inst->base_mrf = base_mrf;
inst->mlen = 1;
@ -588,7 +585,6 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
const struct brw_gs_prog_key *key,
struct brw_gs_prog_data *prog_data,
nir_shader *nir,
int shader_time_index,
struct brw_compile_stats *stats,
char **error_str)
{
@ -821,7 +817,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
if (is_scalar) {
fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, nir,
shader_time_index, debug_enabled);
debug_enabled);
if (v.run_gs()) {
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
@ -858,7 +854,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
brw::vec4_gs_visitor v(compiler, log_data, &c, prog_data, nir,
mem_ctx, true /* no_spills */,
shader_time_index, debug_enabled);
debug_enabled);
/* Backup 'nr_params' and 'param' as they can be modified by the
* the DUAL_OBJECT visitor. If it fails, we will run the fallback
@ -928,11 +924,11 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
if (compiler->devinfo->ver >= 7)
gs = new brw::vec4_gs_visitor(compiler, log_data, &c, prog_data,
nir, mem_ctx, false /* no_spills */,
shader_time_index, debug_enabled);
debug_enabled);
else
gs = new brw::gfx6_gs_visitor(compiler, log_data, &c, prog_data,
nir, mem_ctx, false /* no_spills */,
shader_time_index, debug_enabled);
debug_enabled);
if (!gs->run()) {
if (error_str)

View File

@ -47,7 +47,6 @@ public:
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index,
bool debug_enabled);
virtual void nir_setup_inputs();

View File

@ -40,10 +40,9 @@ vec4_tcs_visitor::vec4_tcs_visitor(const struct brw_compiler *compiler,
struct brw_tcs_prog_data *prog_data,
const nir_shader *nir,
void *mem_ctx,
int shader_time_index,
bool debug_enabled)
: vec4_visitor(compiler, log_data, &key->base.tex, &prog_data->base,
nir, mem_ctx, false, shader_time_index, debug_enabled),
nir, mem_ctx, false, debug_enabled),
key(key)
{
}
@ -143,9 +142,6 @@ vec4_tcs_visitor::emit_thread_end()
emit(BRW_OPCODE_ENDIF);
}
if (INTEL_DEBUG(DEBUG_SHADER_TIME))
emit_shader_time_end();
inst = emit(TCS_OPCODE_THREAD_END);
inst->base_mrf = 14;
inst->mlen = 2;
@ -361,7 +357,6 @@ brw_compile_tcs(const struct brw_compiler *compiler,
const struct brw_tcs_prog_key *key,
struct brw_tcs_prog_data *prog_data,
nir_shader *nir,
int shader_time_index,
struct brw_compile_stats *stats,
char **error_str)
{
@ -459,8 +454,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
if (is_scalar) {
fs_visitor v(compiler, log_data, mem_ctx, &key->base,
&prog_data->base.base, nir, 8,
shader_time_index, debug_enabled);
&prog_data->base.base, nir, 8, debug_enabled);
if (!v.run_tcs()) {
if (error_str)
*error_str = ralloc_strdup(mem_ctx, v.fail_msg);
@ -487,8 +481,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
assembly = g.get_assembly();
} else {
brw::vec4_tcs_visitor v(compiler, log_data, key, prog_data,
nir, mem_ctx, shader_time_index,
debug_enabled);
nir, mem_ctx, debug_enabled);
if (!v.run()) {
if (error_str)
*error_str = ralloc_strdup(mem_ctx, v.fail_msg);

View File

@ -45,7 +45,6 @@ public:
struct brw_tcs_prog_data *prog_data,
const nir_shader *nir,
void *mem_ctx,
int shader_time_index,
bool debug_enabled);
protected:

View File

@ -39,10 +39,9 @@ vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
struct brw_tes_prog_data *prog_data,
const nir_shader *shader,
void *mem_ctx,
int shader_time_index,
bool debug_enabled)
: vec4_visitor(compiler, log_data, &key->base.tex, &prog_data->base,
shader, mem_ctx, false, shader_time_index, debug_enabled)
shader, mem_ctx, false, debug_enabled)
{
}
@ -104,12 +103,6 @@ vec4_tes_visitor::emit_urb_write_header(int mrf)
vec4_instruction *
vec4_tes_visitor::emit_urb_write_opcode(bool complete)
{
/* For DS, the URB writes end the thread. */
if (complete) {
if (INTEL_DEBUG(DEBUG_SHADER_TIME))
emit_shader_time_end();
}
vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
inst->urb_write_flags = complete ?
BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;

View File

@ -44,7 +44,6 @@ public:
struct brw_tes_prog_data *prog_data,
const nir_shader *nir,
void *mem_ctx,
int shader_time_index,
bool debug_enabled);
protected:

View File

@ -1611,7 +1611,6 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index,
bool debug_enabled)
: backend_shader(compiler, log_data, mem_ctx, shader, &prog_data->base,
debug_enabled),
@ -1624,7 +1623,6 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
live_analysis(this), performance_analysis(this),
need_all_constants_in_pull_buffer(false),
no_spills(no_spills),
shader_time_index(shader_time_index),
last_scratch(0)
{
this->failed = false;

View File

@ -37,7 +37,6 @@ public:
struct brw_vs_prog_data *vs_prog_data,
const nir_shader *shader,
void *mem_ctx,
int shader_time_index,
bool debug_enabled);
protected:

View File

@ -46,12 +46,6 @@ vec4_vs_visitor::emit_urb_write_header(int mrf)
vec4_instruction *
vec4_vs_visitor::emit_urb_write_opcode(bool complete)
{
/* For VS, the URB writes end the thread. */
if (complete) {
if (INTEL_DEBUG(DEBUG_SHADER_TIME))
emit_shader_time_end();
}
vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
inst->urb_write_flags = complete ?
BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
@ -103,11 +97,9 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
struct brw_vs_prog_data *vs_prog_data,
const nir_shader *shader,
void *mem_ctx,
int shader_time_index,
bool debug_enabled)
: vec4_visitor(compiler, log_data, &key->base.tex, &vs_prog_data->base,
shader, mem_ctx, false /* no_spills */, shader_time_index,
debug_enabled),
shader, mem_ctx, false /* no_spills */, debug_enabled),
key(key),
vs_prog_data(vs_prog_data)
{

View File

@ -42,10 +42,9 @@ public:
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index,
bool debug_enabled) :
vec4_gs_visitor(comp, log_data, c, prog_data, shader, mem_ctx, no_spills,
shader_time_index, debug_enabled)
debug_enabled)
{
}

View File

@ -60,7 +60,7 @@ public:
struct brw_wm_prog_data *prog_data,
nir_shader *shader)
: fs_visitor(compiler, NULL, mem_ctx, NULL,
&prog_data->base, shader, 8, -1, false) {}
&prog_data->base, shader, 8, false) {}
};

View File

@ -49,7 +49,7 @@ public:
struct brw_wm_prog_data *prog_data,
nir_shader *shader)
: fs_visitor(compiler, NULL, mem_ctx, NULL,
&prog_data->base, shader, 8, -1, false) {}
&prog_data->base, shader, 8, false) {}
};

View File

@ -49,7 +49,7 @@ public:
struct brw_wm_prog_data *prog_data,
nir_shader *shader)
: fs_visitor(compiler, NULL, mem_ctx, NULL,
&prog_data->base, shader, 16, -1, false) {}
&prog_data->base, shader, 16, false) {}
};

View File

@ -52,7 +52,7 @@ void scoreboard_test::SetUp()
nir_shader *shader =
nir_shader_create(ctx, MESA_SHADER_FRAGMENT, NULL, NULL);
v = new fs_visitor(compiler, NULL, ctx, NULL, &prog_data->base, shader, 8, -1, false);
v = new fs_visitor(compiler, NULL, ctx, NULL, &prog_data->base, shader, 8, false);
devinfo->ver = 12;
devinfo->verx10 = devinfo->ver * 10;

View File

@ -52,7 +52,7 @@ public:
nir_shader *shader,
struct brw_vue_prog_data *prog_data)
: vec4_visitor(compiler, NULL, NULL, prog_data, shader, mem_ctx,
false, -1, false)
false, false)
{
prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
}

View File

@ -48,7 +48,7 @@ public:
nir_shader *shader,
struct brw_vue_prog_data *prog_data)
: vec4_visitor(compiler, NULL, NULL, prog_data, shader, mem_ctx,
false /* no_spills */, -1, false)
false /* no_spills */, false)
{
prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
}

View File

@ -48,7 +48,7 @@ public:
nir_shader *shader,
struct brw_vue_prog_data *prog_data)
: vec4_visitor(compiler, NULL, NULL, prog_data, shader, mem_ctx,
false /* no_spills */, -1, false)
false /* no_spills */, false)
{
prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
}

View File

@ -51,7 +51,7 @@ public:
nir_shader *shader,
struct brw_vue_prog_data *prog_data)
: vec4_visitor(compiler, NULL, NULL, prog_data, shader, mem_ctx,
false /* no_spills */, -1, false)
false /* no_spills */, false)
{
prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
}

View File

@ -58,7 +58,6 @@ static const struct debug_control debug_control[] = {
{ "urb", DEBUG_URB },
{ "vs", DEBUG_VS },
{ "clip", DEBUG_CLIP },
{ "shader_time", DEBUG_SHADER_TIME },
{ "no16", DEBUG_NO16 },
{ "blorp", DEBUG_BLORP },
{ "nodualobj", DEBUG_NO_DUAL_OBJECT_GS },

View File

@ -59,7 +59,7 @@ extern uint64_t intel_debug;
#define DEBUG_URB (1ull << 11)
#define DEBUG_VS (1ull << 12)
#define DEBUG_CLIP (1ull << 13)
#define DEBUG_SHADER_TIME (1ull << 14)
/* (1ull << 14) */
#define DEBUG_BLORP (1ull << 15)
#define DEBUG_NO16 (1ull << 16)
#define DEBUG_NO_DUAL_OBJECT_GS (1ull << 17)
@ -93,7 +93,7 @@ extern uint64_t intel_debug;
#define DEBUG_ANY (~0ull)
/* These flags are not compatible with the disk shader cache */
#define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME
#define DEBUG_DISK_CACHE_DISABLE_MASK 0
/* These flags may affect program generation */
#define DEBUG_DISK_CACHE_MASK \

View File

@ -1017,7 +1017,7 @@ anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx,
&tcs_stage->key.tcs,
&tcs_stage->prog_data.tcs,
tcs_stage->nir, -1,
tcs_stage->nir,
tcs_stage->stats, NULL);
}
@ -1047,7 +1047,7 @@ anv_pipeline_compile_tes(const struct brw_compiler *compiler,
&tes_stage->key.tes,
&tcs_stage->prog_data.tcs.base.vue_map,
&tes_stage->prog_data.tes,
tes_stage->nir, -1,
tes_stage->nir,
tes_stage->stats, NULL);
}
@ -1076,7 +1076,7 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler,
gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
&gs_stage->key.gs,
&gs_stage->prog_data.gs,
gs_stage->nir, -1,
gs_stage->nir,
gs_stage->stats, NULL);
}