v3d: Sync on last CS when non-compute stage uses resource written by CS

When a resource is written by a compute shader and then used by a
non-compute stage we sync on last compute job to guarantee that the
resource has been completely written when the next stage reads resources.

In the other cases how flushes are done guarantee the serialization of
the writes and reads.

To reproduce the failure the following tests should be executed in batch
as last test don't fail when run isolated:

KHR-GLES31.core.shader_image_load_store.basic-allFormats-load-fs
KHR-GLES31.core.shader_image_load_store.basic-allFormats-loadStoreComputeStage
KHR-GLES31.core.shader_image_load_store.basic-allTargets-load-cs
KHR-GLES31.core.shader_image_load_store.advanced-sync-vertexArray

v2: Use fence dep instead of bo_wait (Eric Anholt)
v3: Rename struct names (Iago Toral)
    Document why is not needed on graphics->compute case. (Iago Toral)
    Follow same code pattern of the other update of in_sync_bcl.
v4: Fixed comments style. (Iago Toral)

Fixes KHR-GLES31.core.shader_image_load_store.advanced-sync-vertexArray

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
CC: 19.3 20.0 <mesa-stable@lists.freedesktop.org>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2700>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2700>
This commit is contained in:
Jose Maria Casanova Crespo 2019-11-11 01:46:24 +01:00 committed by Marge Bot
parent 5de8bc7c75
commit 01496e3d1e
6 changed files with 76 additions and 25 deletions

View File

@ -128,7 +128,7 @@ v3d_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
struct pipe_surface *src_surf =
v3d_get_blit_surface(pctx, info->src.resource, info->src.level);
v3d_flush_jobs_reading_resource(v3d, info->src.resource);
v3d_flush_jobs_reading_resource(v3d, info->src.resource, false);
struct v3d_job *job = v3d_get_job(v3d, dst_surf, NULL);
pipe_surface_reference(&job->color_read, src_surf);
@ -381,8 +381,8 @@ v3d_tfu(struct pipe_context *pctx,
if (dst_base_slice->tiling == VC5_TILING_RASTER)
return false;
v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT);
v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT);
v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false);
v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false);
struct drm_v3d_submit_tfu tfu = {
.ios = (height << 16) | width,
@ -539,5 +539,5 @@ v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
* texture uploads before using the textures.
*/
v3d_flush_jobs_writing_resource(v3d, info.dst.resource,
V3D_FLUSH_DEFAULT);
V3D_FLUSH_DEFAULT, false);
}

View File

@ -519,6 +519,12 @@ struct v3d_context {
bool active_queries;
/**
* If a compute job writes a resource read by a non-compute stage we
* should sync on the last compute job.
*/
bool sync_on_last_compute_job;
uint32_t tf_prims_generated;
uint32_t prims_generated;
@ -653,10 +659,12 @@ void v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job);
void v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo);
void v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
struct pipe_resource *prsc,
enum v3d_flush_cond flush_cond);
enum v3d_flush_cond flush_cond,
bool is_compute_pipeline);
void v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
struct pipe_resource *prsc,
enum v3d_flush_cond flush_cond);
enum v3d_flush_cond flush_cond,
bool is_compute_pipeline);
void v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode);
void v3d_update_compiled_cs(struct v3d_context *v3d);

View File

@ -184,10 +184,23 @@ v3d_job_writes_resource_from_tf(struct v3d_job *job,
void
v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
struct pipe_resource *prsc,
enum v3d_flush_cond flush_cond)
enum v3d_flush_cond flush_cond,
bool is_compute_pipeline)
{
struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
prsc);
struct v3d_resource *rsc = v3d_resource(prsc);
/* We need to sync if graphics pipeline reads a resource written
* by the compute pipeline. The same would be needed for the case of
* graphics-compute dependency but nowadays all compute jobs
* are serialized with the previous submitted job.
*/
if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
v3d->sync_on_last_compute_job = true;
rsc->compute_written = false;
}
if (!entry)
return;
@ -220,7 +233,8 @@ v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
void
v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
struct pipe_resource *prsc,
enum v3d_flush_cond flush_cond)
enum v3d_flush_cond flush_cond,
bool is_compute_pipeline)
{
struct v3d_resource *rsc = v3d_resource(prsc);
@ -230,7 +244,8 @@ v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
* caller intends to write to the resource, so we don't care if
* there was a previous TF write to it.
*/
v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond);
v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
is_compute_pipeline);
hash_table_foreach(v3d->jobs, entry) {
struct v3d_job *job = entry->data;
@ -329,7 +344,8 @@ v3d_get_job(struct v3d_context *v3d,
for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
if (cbufs[i]) {
v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
V3D_FLUSH_DEFAULT);
V3D_FLUSH_DEFAULT,
false);
pipe_surface_reference(&job->cbufs[i], cbufs[i]);
if (cbufs[i]->texture->nr_samples > 1)
@ -338,7 +354,8 @@ v3d_get_job(struct v3d_context *v3d,
}
if (zsbuf) {
v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
V3D_FLUSH_DEFAULT);
V3D_FLUSH_DEFAULT,
false);
pipe_surface_reference(&job->zsbuf, zsbuf);
if (zsbuf->texture->nr_samples > 1)
job->msaa = true;
@ -356,7 +373,8 @@ v3d_get_job(struct v3d_context *v3d,
if (rsc->separate_stencil) {
v3d_flush_jobs_reading_resource(v3d,
&rsc->separate_stencil->base,
V3D_FLUSH_DEFAULT);
V3D_FLUSH_DEFAULT,
false);
_mesa_hash_table_insert(v3d->write_jobs,
&rsc->separate_stencil->base,
job);

View File

@ -170,19 +170,23 @@ v3d_map_usage_prep(struct pipe_context *pctx,
* don't violate any syncing requirements.
*/
v3d_flush_jobs_reading_resource(v3d, prsc,
V3D_FLUSH_DEFAULT);
V3D_FLUSH_DEFAULT,
false);
}
} else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
/* If we're writing and the buffer is being used by the CL, we
* have to flush the CL first. If we're only reading, we need
* to flush if the CL has written our buffer.
*/
if (usage & PIPE_TRANSFER_WRITE)
if (usage & PIPE_TRANSFER_WRITE) {
v3d_flush_jobs_reading_resource(v3d, prsc,
V3D_FLUSH_ALWAYS);
else
V3D_FLUSH_ALWAYS,
false);
} else {
v3d_flush_jobs_writing_resource(v3d, prsc,
V3D_FLUSH_ALWAYS);
V3D_FLUSH_ALWAYS,
false);
}
}
if (usage & PIPE_TRANSFER_WRITE) {

View File

@ -129,6 +129,11 @@ struct v3d_resource {
int cpp;
bool tiled;
/**
* Indicates if the CS has written the resource
*/
bool compute_written;
/**
* Number of times the resource has been written to.
*

View File

@ -172,7 +172,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
v3d_update_shadow_texture(pctx, &view->base);
v3d_flush_jobs_writing_resource(v3d, view->texture,
V3D_FLUSH_DEFAULT);
V3D_FLUSH_DEFAULT,
s == PIPE_SHADER_COMPUTE);
}
/* Flush writes to UBOs. */
@ -180,7 +181,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
struct pipe_constant_buffer *cb = &v3d->constbuf[s].cb[i];
if (cb->buffer) {
v3d_flush_jobs_writing_resource(v3d, cb->buffer,
V3D_FLUSH_DEFAULT);
V3D_FLUSH_DEFAULT,
s == PIPE_SHADER_COMPUTE);
}
}
@ -189,7 +191,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
struct pipe_shader_buffer *sb = &v3d->ssbo[s].sb[i];
if (sb->buffer) {
v3d_flush_jobs_reading_resource(v3d, sb->buffer,
V3D_FLUSH_NOT_CURRENT_JOB);
V3D_FLUSH_NOT_CURRENT_JOB,
s == PIPE_SHADER_COMPUTE);
}
}
@ -198,7 +201,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
struct v3d_image_view *view = &v3d->shaderimg[s].si[i];
v3d_flush_jobs_reading_resource(v3d, view->base.resource,
V3D_FLUSH_NOT_CURRENT_JOB);
V3D_FLUSH_NOT_CURRENT_JOB,
s == PIPE_SHADER_COMPUTE);
}
/* Flush writes to our vertex buffers (i.e. from transform feedback) */
@ -207,7 +211,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
struct pipe_vertex_buffer *vb = &v3d->vertexbuf.vb[i];
v3d_flush_jobs_writing_resource(v3d, vb->buffer.resource,
V3D_FLUSH_DEFAULT);
V3D_FLUSH_DEFAULT,
false);
}
}
}
@ -228,7 +233,8 @@ v3d_predraw_check_outputs(struct pipe_context *pctx)
const struct pipe_stream_output_target *target =
so->targets[i];
v3d_flush_jobs_reading_resource(v3d, target->buffer,
V3D_FLUSH_DEFAULT);
V3D_FLUSH_DEFAULT,
false);
}
}
}
@ -1121,7 +1127,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
if (info->indirect) {
v3d_flush_jobs_writing_resource(v3d, info->indirect->buffer,
V3D_FLUSH_DEFAULT);
V3D_FLUSH_DEFAULT, false);
}
v3d_predraw_check_outputs(pctx);
@ -1153,6 +1159,14 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
job->submit.in_sync_bcl = v3d->out_sync;
}
/* We also need to ensure that compute is complete when render depends
* on resources written by it.
*/
if (v3d->sync_on_last_compute_job) {
job->submit.in_sync_bcl = v3d->out_sync;
v3d->sync_on_last_compute_job = false;
}
/* Mark SSBOs and images as being written. We don't actually know
* which ones are read vs written, so just assume the worst.
*/
@ -1575,13 +1589,15 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
foreach_bit(i, v3d->ssbo[PIPE_SHADER_COMPUTE].enabled_mask) {
struct v3d_resource *rsc = v3d_resource(
v3d->ssbo[PIPE_SHADER_COMPUTE].sb[i].buffer);
rsc->writes++; /* XXX */
rsc->writes++;
rsc->compute_written = true;
}
foreach_bit(i, v3d->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask) {
struct v3d_resource *rsc = v3d_resource(
v3d->shaderimg[PIPE_SHADER_COMPUTE].si[i].base.resource);
rsc->writes++;
rsc->compute_written = true;
}
v3d_bo_unreference(&uniforms.bo);