v3d: Sync on last CS when non-compute stage uses resource written by CS

When a resource is written by a compute shader and then used by a non-compute stage we sync on last compute job to guarantee that the resource has been completely written when the next stage reads resources. In the other cases how flushes are done guarantee the serialization of the writes and reads. To reproduce the failure the following tests should be executed in batch as last test don't fail when run isolated: KHR-GLES31.core.shader_image_load_store.basic-allFormats-load-fs KHR-GLES31.core.shader_image_load_store.basic-allFormats-loadStoreComputeStage KHR-GLES31.core.shader_image_load_store.basic-allTargets-load-cs KHR-GLES31.core.shader_image_load_store.advanced-sync-vertexArray v2: Use fence dep instead of bo_wait (Eric Anholt) v3: Rename struct names (Iago Toral) Document why is not needed on graphics->compute case. (Iago Toral) Follow same code pattern of the other update of in_sync_bcl. v4: Fixed comments style. (Iago Toral) Fixes KHR-GLES31.core.shader_image_load_store.advanced-sync-vertexArray Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> CC: 19.3 20.0 <mesa-stable@lists.freedesktop.org> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2700> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2700>
2019-11-11 01:46:24 +01:00 · 2019-11-11 01:46:24 +01:00 · 01496e3d1e
parent 5de8bc7c75
commit 01496e3d1e
6 changed files with 76 additions and 25 deletions
--- a/src/gallium/drivers/v3d/v3d_blit.c
+++ b/src/gallium/drivers/v3d/v3d_blit.c
@ -128,7 +128,7 @@ v3d_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
        struct pipe_surface *src_surf =
                v3d_get_blit_surface(pctx, info->src.resource, info->src.level);

-        v3d_flush_jobs_reading_resource(v3d, info->src.resource);
+        v3d_flush_jobs_reading_resource(v3d, info->src.resource, false);

        struct v3d_job *job = v3d_get_job(v3d, dst_surf, NULL);
        pipe_surface_reference(&job->color_read, src_surf);
@ -381,8 +381,8 @@ v3d_tfu(struct pipe_context *pctx,
        if (dst_base_slice->tiling == VC5_TILING_RASTER)
                return false;

-        v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT);
-        v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT);
+        v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false);
+        v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false);

        struct drm_v3d_submit_tfu tfu = {
                .ios = (height << 16) | width,
@ -539,5 +539,5 @@ v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
         * texture uploads before using the textures.
         */
        v3d_flush_jobs_writing_resource(v3d, info.dst.resource,
-                                        V3D_FLUSH_DEFAULT);
+                                        V3D_FLUSH_DEFAULT, false);
 }
--- a/src/gallium/drivers/v3d/v3d_context.h
+++ b/src/gallium/drivers/v3d/v3d_context.h
@ -519,6 +519,12 @@ struct v3d_context {

        bool active_queries;

+        /**
+         * If a compute job writes a resource read by a non-compute stage we
+         * should sync on the last compute job.
+         */
+        bool sync_on_last_compute_job;
+
        uint32_t tf_prims_generated;
        uint32_t prims_generated;

@ -653,10 +659,12 @@ void v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job);
 void v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo);
 void v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
                                     struct pipe_resource *prsc,
-                                     enum v3d_flush_cond flush_cond);
+                                     enum v3d_flush_cond flush_cond,
+                                     bool is_compute_pipeline);
 void v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
                                     struct pipe_resource *prsc,
-                                     enum v3d_flush_cond flush_cond);
+                                     enum v3d_flush_cond flush_cond,
+                                     bool is_compute_pipeline);
 void v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode);
 void v3d_update_compiled_cs(struct v3d_context *v3d);

--- a/src/gallium/drivers/v3d/v3d_job.c
+++ b/src/gallium/drivers/v3d/v3d_job.c
@ -184,10 +184,23 @@ v3d_job_writes_resource_from_tf(struct v3d_job *job,
 void
 v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
                                struct pipe_resource *prsc,
-                                enum v3d_flush_cond flush_cond)
+                                enum v3d_flush_cond flush_cond,
+                                bool is_compute_pipeline)
 {
        struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
                                                           prsc);
+        struct v3d_resource *rsc = v3d_resource(prsc);
+
+        /* We need to sync if graphics pipeline reads a resource written
+         * by the compute pipeline. The same would be needed for the case of
+         * graphics-compute dependency but nowadays all compute jobs
+         * are serialized with the previous submitted job.
+         */
+        if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
+           v3d->sync_on_last_compute_job = true;
+           rsc->compute_written = false;
+        }
+
        if (!entry)
                return;

@ -220,7 +233,8 @@ v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
 void
 v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
                                struct pipe_resource *prsc,
-                                enum v3d_flush_cond flush_cond)
+                                enum v3d_flush_cond flush_cond,
+                                bool is_compute_pipeline)
 {
        struct v3d_resource *rsc = v3d_resource(prsc);

@ -230,7 +244,8 @@ v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
         * caller intends to write to the resource, so we don't care if
         * there was a previous TF write to it.
         */
-        v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond);
+        v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
+                                        is_compute_pipeline);

        hash_table_foreach(v3d->jobs, entry) {
                struct v3d_job *job = entry->data;
@ -329,7 +344,8 @@ v3d_get_job(struct v3d_context *v3d,
        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                if (cbufs[i]) {
                        v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                        pipe_surface_reference(&job->cbufs[i], cbufs[i]);

                        if (cbufs[i]->texture->nr_samples > 1)
@ -338,7 +354,8 @@ v3d_get_job(struct v3d_context *v3d,
        }
        if (zsbuf) {
                v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
-                                                V3D_FLUSH_DEFAULT);
+                                                V3D_FLUSH_DEFAULT,
+                                                false);
                pipe_surface_reference(&job->zsbuf, zsbuf);
                if (zsbuf->texture->nr_samples > 1)
                        job->msaa = true;
@ -356,7 +373,8 @@ v3d_get_job(struct v3d_context *v3d,
                if (rsc->separate_stencil) {
                        v3d_flush_jobs_reading_resource(v3d,
                                                        &rsc->separate_stencil->base,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                        _mesa_hash_table_insert(v3d->write_jobs,
                                                &rsc->separate_stencil->base,
                                                job);
--- a/src/gallium/drivers/v3d/v3d_resource.c
+++ b/src/gallium/drivers/v3d/v3d_resource.c
@ -170,19 +170,23 @@ v3d_map_usage_prep(struct pipe_context *pctx,
                         * don't violate any syncing requirements.
                         */
                        v3d_flush_jobs_reading_resource(v3d, prsc,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                }
        } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
                /* If we're writing and the buffer is being used by the CL, we
                 * have to flush the CL first.  If we're only reading, we need
                 * to flush if the CL has written our buffer.
                 */
-                if (usage & PIPE_TRANSFER_WRITE)
+                if (usage & PIPE_TRANSFER_WRITE) {
                        v3d_flush_jobs_reading_resource(v3d, prsc,
-                                                        V3D_FLUSH_ALWAYS);
-                else
+                                                        V3D_FLUSH_ALWAYS,
+                                                        false);
+                } else {
                        v3d_flush_jobs_writing_resource(v3d, prsc,
-                                                        V3D_FLUSH_ALWAYS);
+                                                        V3D_FLUSH_ALWAYS,
+                                                        false);
+                }
        }

        if (usage & PIPE_TRANSFER_WRITE) {
--- a/src/gallium/drivers/v3d/v3d_resource.h
+++ b/src/gallium/drivers/v3d/v3d_resource.h
@ -129,6 +129,11 @@ struct v3d_resource {
        int cpp;
        bool tiled;

+        /**
+         * Indicates if the CS has written the resource
+         */
+        bool compute_written;
+
        /**
         * Number of times the resource has been written to.
         *
--- a/src/gallium/drivers/v3d/v3dx_draw.c
+++ b/src/gallium/drivers/v3d/v3dx_draw.c
@ -172,7 +172,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                        v3d_update_shadow_texture(pctx, &view->base);

                v3d_flush_jobs_writing_resource(v3d, view->texture,
-                                                V3D_FLUSH_DEFAULT);
+                                                V3D_FLUSH_DEFAULT,
+                                                s == PIPE_SHADER_COMPUTE);
        }

        /* Flush writes to UBOs. */
@ -180,7 +181,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                struct pipe_constant_buffer *cb = &v3d->constbuf[s].cb[i];
                if (cb->buffer) {
                        v3d_flush_jobs_writing_resource(v3d, cb->buffer,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        s == PIPE_SHADER_COMPUTE);
                }
        }

@ -189,7 +191,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                struct pipe_shader_buffer *sb = &v3d->ssbo[s].sb[i];
                if (sb->buffer) {
                        v3d_flush_jobs_reading_resource(v3d, sb->buffer,
-                                                        V3D_FLUSH_NOT_CURRENT_JOB);
+                                                        V3D_FLUSH_NOT_CURRENT_JOB,
+                                                        s == PIPE_SHADER_COMPUTE);
                }
        }

@ -198,7 +201,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                struct v3d_image_view *view = &v3d->shaderimg[s].si[i];

                v3d_flush_jobs_reading_resource(v3d, view->base.resource,
-                                                V3D_FLUSH_NOT_CURRENT_JOB);
+                                                V3D_FLUSH_NOT_CURRENT_JOB,
+                                                s == PIPE_SHADER_COMPUTE);
        }

        /* Flush writes to our vertex buffers (i.e. from transform feedback) */
@ -207,7 +211,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                        struct pipe_vertex_buffer *vb = &v3d->vertexbuf.vb[i];

                        v3d_flush_jobs_writing_resource(v3d, vb->buffer.resource,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                }
        }
 }
@ -228,7 +233,8 @@ v3d_predraw_check_outputs(struct pipe_context *pctx)
                        const struct pipe_stream_output_target *target =
                                so->targets[i];
                        v3d_flush_jobs_reading_resource(v3d, target->buffer,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                }
        }
 }
@ -1121,7 +1127,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)

        if (info->indirect) {
                v3d_flush_jobs_writing_resource(v3d, info->indirect->buffer,
-                                                V3D_FLUSH_DEFAULT);
+                                                V3D_FLUSH_DEFAULT, false);
        }

        v3d_predraw_check_outputs(pctx);
@ -1153,6 +1159,14 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                job->submit.in_sync_bcl = v3d->out_sync;
        }

+        /* We also need to ensure that compute is complete when render depends
+         * on resources written by it.
+         */
+        if (v3d->sync_on_last_compute_job) {
+                job->submit.in_sync_bcl = v3d->out_sync;
+                v3d->sync_on_last_compute_job = false;
+        }
+
        /* Mark SSBOs and images as being written.  We don't actually know
         * which ones are read vs written, so just assume the worst.
         */
@ -1575,13 +1589,15 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
        foreach_bit(i, v3d->ssbo[PIPE_SHADER_COMPUTE].enabled_mask) {
                struct v3d_resource *rsc = v3d_resource(
                        v3d->ssbo[PIPE_SHADER_COMPUTE].sb[i].buffer);
-                rsc->writes++; /* XXX */
+                rsc->writes++;
+                rsc->compute_written = true;
        }

        foreach_bit(i, v3d->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask) {
                struct v3d_resource *rsc = v3d_resource(
                        v3d->shaderimg[PIPE_SHADER_COMPUTE].si[i].base.resource);
                rsc->writes++;
+                rsc->compute_written = true;
        }

        v3d_bo_unreference(&uniforms.bo);