intel/blorp: Emit VF cache invalidates for 48-bit bugs with softpin.

commit 92f01fc5f9 made i965 start emitting
VF cache invalidates when the high bits of vertex buffers change.  But
we were not tracking vertex buffers emitted by BLORP.  This was papered
over by a mistake where I emitted VF cache invalidates all the time,
which Chris fixed in commit 3ac5fbadfd.

This patch adds a new hook which allows the driver to track addresses
and request a VF cache invalidate as appropriate.

v2: Make the driver do the PIPE_CONTROL so it can apply workarounds
    (caught by Jason Ekstrand).  Rebase on anv bug fix.
v3: Don't screw up the boolean (caught by Jason Ekstrand).

Fixes: 92f01fc5f9 ("i965: Emit VF cache invalidates for 48-bit addressing bugs with softpin.")
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
Kenneth Graunke 2018-05-31 16:39:16 -07:00
parent 2a74296f24
commit 74259b98aa
3 changed files with 51 additions and 5 deletions

View File

@ -59,6 +59,10 @@ blorp_alloc_dynamic_state(struct blorp_batch *batch,
static void *
blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
struct blorp_address *addr);
static void
blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
const struct blorp_address *addrs,
unsigned num_vbs);
#if GEN_GEN >= 8
static struct blorp_address
@ -334,19 +338,22 @@ blorp_emit_vertex_buffers(struct blorp_batch *batch,
uint32_t num_vbs = 2;
memset(vb, 0, sizeof(vb));
struct blorp_address addr;
struct blorp_address addrs[2] = {};
uint32_t size;
blorp_emit_vertex_data(batch, params, &addr, &size);
blorp_fill_vertex_buffer_state(batch, vb, 0, addr, size, 3 * sizeof(float));
blorp_emit_vertex_data(batch, params, &addrs[0], &size);
blorp_fill_vertex_buffer_state(batch, vb, 0, addrs[0], size,
3 * sizeof(float));
blorp_emit_input_varying_data(batch, params, &addr, &size);
blorp_fill_vertex_buffer_state(batch, vb, 1, addr, size, 0);
blorp_emit_input_varying_data(batch, params, &addrs[1], &size);
blorp_fill_vertex_buffer_state(batch, vb, 1, addrs[1], size, 0);
const unsigned num_dwords = 1 + num_vbs * GENX(VERTEX_BUFFER_STATE_length);
uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
if (!dw)
return;
blorp_vf_invalidate_for_vb_48b_transitions(batch, addrs, num_vbs);
for (unsigned i = 0; i < num_vbs; i++) {
GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]);
dw += GENX(VERTEX_BUFFER_STATE_length);

View File

@ -158,6 +158,16 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
return vb_state.map;
}
static void
blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
const struct blorp_address *addrs,
unsigned num_vbs)
{
/* anv forces all vertex buffers into the low 4GB so there are never any
* transitions that require a VF invalidation.
*/
}
#if GEN_GEN >= 8
static struct blorp_address
blorp_get_workaround_page(struct blorp_batch *batch)

View File

@ -189,6 +189,35 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
return data;
}
/**
* See vf_invalidate_for_vb_48b_transitions in genX_state_upload.c.
*/
static void
blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
const struct blorp_address *addrs,
unsigned num_vbs)
{
#if GEN_GEN >= 8
struct brw_context *brw = batch->driver_batch;
bool need_invalidate = false;
for (unsigned i = 0; i < num_vbs; i++) {
struct brw_bo *bo = addrs[i].buffer;
uint16_t high_bits =
bo && (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32u : 0;
if (high_bits != brw->vb.last_bo_high_bits[i]) {
need_invalidate = true;
brw->vb.last_bo_high_bits[i] = high_bits;
}
}
if (need_invalidate) {
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
}
#endif
}
#if GEN_GEN >= 8
static struct blorp_address
blorp_get_workaround_page(struct blorp_batch *batch)