i965: Use a single index per shader for shader_time.
Previously, each shader took 3 shader time indices which were potentially at arbirary points in the shader time buffer. Now, each shader gets a single index which refers to 3 consecutive locations in the buffer. This simplifies some of the logic at the cost of having a magic 3 a few places. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
This commit is contained in:
parent
6e255a3299
commit
c7893dc3c5
|
@ -821,20 +821,10 @@ struct brw_tracked_state {
|
|||
enum shader_time_shader_type {
|
||||
ST_NONE,
|
||||
ST_VS,
|
||||
ST_VS_WRITTEN,
|
||||
ST_VS_RESET,
|
||||
ST_GS,
|
||||
ST_GS_WRITTEN,
|
||||
ST_GS_RESET,
|
||||
ST_FS8,
|
||||
ST_FS8_WRITTEN,
|
||||
ST_FS8_RESET,
|
||||
ST_FS16,
|
||||
ST_FS16_WRITTEN,
|
||||
ST_FS16_RESET,
|
||||
ST_CS,
|
||||
ST_CS_WRITTEN,
|
||||
ST_CS_RESET,
|
||||
};
|
||||
|
||||
struct brw_vertex_buffer {
|
||||
|
@ -979,6 +969,8 @@ enum brw_predicate_state {
|
|||
BRW_PREDICATE_STATE_USE_BIT
|
||||
};
|
||||
|
||||
struct shader_times;
|
||||
|
||||
/**
|
||||
* brw_context is derived from gl_context.
|
||||
*/
|
||||
|
@ -1503,7 +1495,7 @@ struct brw_context
|
|||
const char **names;
|
||||
int *ids;
|
||||
enum shader_time_shader_type *types;
|
||||
uint64_t *cumulative;
|
||||
struct shader_times *cumulative;
|
||||
int num_entries;
|
||||
int max_entries;
|
||||
double report_time;
|
||||
|
|
|
@ -578,38 +578,30 @@ fs_visitor::emit_shader_time_begin()
|
|||
void
|
||||
fs_visitor::emit_shader_time_end()
|
||||
{
|
||||
enum shader_time_shader_type type, written_type, reset_type;
|
||||
enum shader_time_shader_type type;
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
type = ST_VS;
|
||||
written_type = ST_VS_WRITTEN;
|
||||
reset_type = ST_VS_RESET;
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
type = ST_GS;
|
||||
written_type = ST_GS_WRITTEN;
|
||||
reset_type = ST_GS_RESET;
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
if (dispatch_width == 8) {
|
||||
type = ST_FS8;
|
||||
written_type = ST_FS8_WRITTEN;
|
||||
reset_type = ST_FS8_RESET;
|
||||
} else {
|
||||
assert(dispatch_width == 16);
|
||||
type = ST_FS16;
|
||||
written_type = ST_FS16_WRITTEN;
|
||||
reset_type = ST_FS16_RESET;
|
||||
}
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
type = ST_CS;
|
||||
written_type = ST_CS_WRITTEN;
|
||||
reset_type = ST_CS_RESET;
|
||||
break;
|
||||
default:
|
||||
unreachable("fs_visitor::emit_shader_time_end missing code");
|
||||
}
|
||||
int shader_time_index = brw_get_shader_time_index(brw, shader_prog, prog,
|
||||
type);
|
||||
|
||||
/* Insert our code just before the final SEND with EOT. */
|
||||
exec_node *end = this->instructions.get_tail();
|
||||
|
@ -639,20 +631,20 @@ fs_visitor::emit_shader_time_end()
|
|||
* trying to determine the time taken for single instructions.
|
||||
*/
|
||||
ibld.ADD(diff, diff, fs_reg(-2u));
|
||||
SHADER_TIME_ADD(ibld, type, diff);
|
||||
SHADER_TIME_ADD(ibld, written_type, fs_reg(1u));
|
||||
SHADER_TIME_ADD(ibld, shader_time_index, 0, diff);
|
||||
SHADER_TIME_ADD(ibld, shader_time_index, 1, fs_reg(1u));
|
||||
ibld.emit(BRW_OPCODE_ELSE);
|
||||
SHADER_TIME_ADD(ibld, reset_type, fs_reg(1u));
|
||||
SHADER_TIME_ADD(ibld, shader_time_index, 2, fs_reg(1u));
|
||||
ibld.emit(BRW_OPCODE_ENDIF);
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::SHADER_TIME_ADD(const fs_builder &bld,
|
||||
enum shader_time_shader_type type, fs_reg value)
|
||||
int shader_time_index, int shader_time_subindex,
|
||||
fs_reg value)
|
||||
{
|
||||
int shader_time_index =
|
||||
brw_get_shader_time_index(brw, shader_prog, prog, type);
|
||||
fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE);
|
||||
int index = shader_time_index * 3 + shader_time_subindex;
|
||||
fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE);
|
||||
|
||||
fs_reg payload;
|
||||
if (dispatch_width == 8)
|
||||
|
|
|
@ -278,7 +278,8 @@ public:
|
|||
void emit_shader_time_begin();
|
||||
void emit_shader_time_end();
|
||||
void SHADER_TIME_ADD(const brw::fs_builder &bld,
|
||||
enum shader_time_shader_type type, fs_reg value);
|
||||
int shader_time_index, int shader_time_subindex,
|
||||
fs_reg value);
|
||||
|
||||
void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
|
||||
fs_reg dst, fs_reg offset, fs_reg src0,
|
||||
|
|
|
@ -287,18 +287,24 @@ void brwInitFragProgFuncs( struct dd_function_table *functions )
|
|||
functions->MemoryBarrier = brw_memory_barrier;
|
||||
}
|
||||
|
||||
struct shader_times {
|
||||
uint64_t time;
|
||||
uint64_t written;
|
||||
uint64_t reset;
|
||||
};
|
||||
|
||||
void
|
||||
brw_init_shader_time(struct brw_context *brw)
|
||||
{
|
||||
const int max_entries = 4096;
|
||||
brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time",
|
||||
max_entries * SHADER_TIME_STRIDE,
|
||||
4096);
|
||||
const int max_entries = 2048;
|
||||
brw->shader_time.bo =
|
||||
drm_intel_bo_alloc(brw->bufmgr, "shader time",
|
||||
max_entries * SHADER_TIME_STRIDE * 3, 4096);
|
||||
brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
|
||||
brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
|
||||
brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
|
||||
max_entries);
|
||||
brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
|
||||
brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
|
||||
max_entries);
|
||||
brw->shader_time.max_entries = max_entries;
|
||||
}
|
||||
|
@ -318,27 +324,6 @@ compare_time(const void *a, const void *b)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
get_written_and_reset(struct brw_context *brw, int i,
|
||||
uint64_t *written, uint64_t *reset)
|
||||
{
|
||||
enum shader_time_shader_type type = brw->shader_time.types[i];
|
||||
assert(type == ST_VS || type == ST_GS || type == ST_FS8 ||
|
||||
type == ST_FS16 || type == ST_CS);
|
||||
|
||||
/* Find where we recorded written and reset. */
|
||||
int wi, ri;
|
||||
|
||||
for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
|
||||
;
|
||||
|
||||
for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
|
||||
;
|
||||
|
||||
*written = brw->shader_time.cumulative[wi];
|
||||
*reset = brw->shader_time.cumulative[ri];
|
||||
}
|
||||
|
||||
static void
|
||||
print_shader_time_line(const char *stage, const char *name,
|
||||
int shader_num, uint64_t time, uint64_t total)
|
||||
|
@ -374,26 +359,13 @@ brw_report_shader_time(struct brw_context *brw)
|
|||
sorted[i] = &scaled[i];
|
||||
|
||||
switch (type) {
|
||||
case ST_VS_WRITTEN:
|
||||
case ST_VS_RESET:
|
||||
case ST_GS_WRITTEN:
|
||||
case ST_GS_RESET:
|
||||
case ST_FS8_WRITTEN:
|
||||
case ST_FS8_RESET:
|
||||
case ST_FS16_WRITTEN:
|
||||
case ST_FS16_RESET:
|
||||
case ST_CS_WRITTEN:
|
||||
case ST_CS_RESET:
|
||||
/* We'll handle these when along with the time. */
|
||||
scaled[i] = 0;
|
||||
continue;
|
||||
|
||||
case ST_VS:
|
||||
case ST_GS:
|
||||
case ST_FS8:
|
||||
case ST_FS16:
|
||||
case ST_CS:
|
||||
get_written_and_reset(brw, i, &written, &reset);
|
||||
written = brw->shader_time.cumulative[i].written;
|
||||
reset = brw->shader_time.cumulative[i].reset;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -405,7 +377,7 @@ brw_report_shader_time(struct brw_context *brw)
|
|||
break;
|
||||
}
|
||||
|
||||
uint64_t time = brw->shader_time.cumulative[i];
|
||||
uint64_t time = brw->shader_time.cumulative[i].time;
|
||||
if (written) {
|
||||
scaled[i] = time / written * (written + reset);
|
||||
} else {
|
||||
|
@ -491,16 +463,19 @@ brw_collect_shader_time(struct brw_context *brw)
|
|||
* overhead compared to the cost of tracking the time in the first place.
|
||||
*/
|
||||
drm_intel_bo_map(brw->shader_time.bo, true);
|
||||
|
||||
uint32_t *times = brw->shader_time.bo->virtual;
|
||||
void *bo_map = brw->shader_time.bo->virtual;
|
||||
|
||||
for (int i = 0; i < brw->shader_time.num_entries; i++) {
|
||||
brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
|
||||
uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE;
|
||||
|
||||
brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4];
|
||||
brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4];
|
||||
brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4];
|
||||
}
|
||||
|
||||
/* Zero the BO out to clear it out for our next collection.
|
||||
*/
|
||||
memset(times, 0, brw->shader_time.bo->size);
|
||||
memset(bo_map, 0, brw->shader_time.bo->size);
|
||||
drm_intel_bo_unmap(brw->shader_time.bo);
|
||||
}
|
||||
|
||||
|
|
|
@ -1676,20 +1676,21 @@ vec4_visitor::emit_shader_time_end()
|
|||
*/
|
||||
emit(ADD(diff, src_reg(diff), src_reg(-2u)));
|
||||
|
||||
emit_shader_time_write(st_base, src_reg(diff));
|
||||
emit_shader_time_write(st_written, src_reg(1u));
|
||||
int shader_time_index =
|
||||
brw_get_shader_time_index(brw, shader_prog, prog, st_type);
|
||||
|
||||
emit_shader_time_write(shader_time_index, 0, src_reg(diff));
|
||||
emit_shader_time_write(shader_time_index, 1, src_reg(1u));
|
||||
emit(BRW_OPCODE_ELSE);
|
||||
emit_shader_time_write(st_reset, src_reg(1u));
|
||||
emit_shader_time_write(shader_time_index, 2, src_reg(1u));
|
||||
emit(BRW_OPCODE_ENDIF);
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
|
||||
vec4_visitor::emit_shader_time_write(int shader_time_index,
|
||||
int shader_time_subindex,
|
||||
src_reg value)
|
||||
{
|
||||
int shader_time_index =
|
||||
brw_get_shader_time_index(brw, shader_prog, prog, type);
|
||||
|
||||
dst_reg dst =
|
||||
dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2));
|
||||
|
||||
|
@ -1698,7 +1699,8 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
|
|||
time.reg_offset++;
|
||||
|
||||
offset.type = BRW_REGISTER_TYPE_UD;
|
||||
emit(MOV(offset, src_reg(shader_time_index * SHADER_TIME_STRIDE)));
|
||||
int index = shader_time_index * 3 + shader_time_subindex;
|
||||
emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE)));
|
||||
|
||||
time.type = BRW_REGISTER_TYPE_UD;
|
||||
emit(MOV(time, src_reg(value)));
|
||||
|
|
|
@ -85,9 +85,7 @@ public:
|
|||
gl_shader_stage stage,
|
||||
void *mem_ctx,
|
||||
bool no_spills,
|
||||
shader_time_shader_type st_base,
|
||||
shader_time_shader_type st_written,
|
||||
shader_time_shader_type st_reset);
|
||||
shader_time_shader_type st_type);
|
||||
~vec4_visitor();
|
||||
|
||||
dst_reg dst_null_f()
|
||||
|
@ -345,7 +343,7 @@ public:
|
|||
|
||||
void emit_shader_time_begin();
|
||||
void emit_shader_time_end();
|
||||
void emit_shader_time_write(enum shader_time_shader_type type,
|
||||
void emit_shader_time_write(int shader_time_index, int shader_time_subindex,
|
||||
src_reg value);
|
||||
|
||||
void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
|
||||
|
@ -413,9 +411,7 @@ private:
|
|||
*/
|
||||
const bool no_spills;
|
||||
|
||||
const shader_time_shader_type st_base;
|
||||
const shader_time_shader_type st_written;
|
||||
const shader_time_shader_type st_reset;
|
||||
const shader_time_shader_type st_type;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -41,8 +41,7 @@ vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw,
|
|||
bool no_spills)
|
||||
: vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base,
|
||||
&c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx,
|
||||
no_spills,
|
||||
ST_GS, ST_GS_WRITTEN, ST_GS_RESET),
|
||||
no_spills, ST_GS),
|
||||
c(c)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -3688,9 +3688,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
|
|||
gl_shader_stage stage,
|
||||
void *mem_ctx,
|
||||
bool no_spills,
|
||||
shader_time_shader_type st_base,
|
||||
shader_time_shader_type st_written,
|
||||
shader_time_shader_type st_reset)
|
||||
shader_time_shader_type st_type)
|
||||
: backend_shader(brw, mem_ctx, shader_prog, prog, &prog_data->base, stage),
|
||||
c(c),
|
||||
key(key),
|
||||
|
@ -3700,9 +3698,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
|
|||
first_non_payload_grf(0),
|
||||
need_all_constants_in_pull_buffer(false),
|
||||
no_spills(no_spills),
|
||||
st_base(st_base),
|
||||
st_written(st_written),
|
||||
st_reset(st_reset)
|
||||
st_type(st_type)
|
||||
{
|
||||
this->failed = false;
|
||||
|
||||
|
|
|
@ -221,7 +221,7 @@ vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw,
|
|||
&vs_compile->key.base, &vs_prog_data->base, prog,
|
||||
MESA_SHADER_VERTEX,
|
||||
mem_ctx, false /* no_spills */,
|
||||
ST_VS, ST_VS_WRITTEN, ST_VS_RESET),
|
||||
ST_VS),
|
||||
vs_compile(vs_compile),
|
||||
vs_prog_data(vs_prog_data)
|
||||
{
|
||||
|
|
|
@ -48,8 +48,7 @@ public:
|
|||
struct gl_shader_program *shader_prog)
|
||||
: vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog,
|
||||
MESA_SHADER_VERTEX, NULL,
|
||||
false /* no_spills */,
|
||||
ST_NONE, ST_NONE, ST_NONE)
|
||||
false /* no_spills */, ST_NONE)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -51,8 +51,7 @@ public:
|
|||
struct gl_shader_program *shader_prog)
|
||||
: vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog,
|
||||
MESA_SHADER_VERTEX, NULL,
|
||||
false /* no_spills */,
|
||||
ST_NONE, ST_NONE, ST_NONE)
|
||||
false /* no_spills */, ST_NONE)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue