i965: Share the flush for brw_blorp_miptree_download into a pbo
As all users of brw_blorp_miptree_download() must emit a full pipeline and cache flush when targetting a user PBO (as that PBO may then be subsequently bound or *be* bound anywhere and outside of the driver dirty tracking) move that flush into brw_blorp_miptree_download() itself. v2 (Ken): Rebase without userptr stuff so it can land sooner. Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
760a5815d4
commit
c866e0b3ca
|
@ -1094,6 +1094,28 @@ brw_blorp_download_miptree(struct brw_context *brw,
|
|||
|
||||
result = true;
|
||||
|
||||
/* As we implement PBO transfers by binding the user-provided BO as a
|
||||
* fake framebuffer and rendering to it. This breaks the invariant of the
|
||||
* GL that nothing is able to render to a BO, causing nondeterministic
|
||||
* corruption issues because the render cache is not coherent with a
|
||||
* number of other caches that the BO could potentially be bound to
|
||||
* afterwards.
|
||||
*
|
||||
* This could be solved in the same way that we guarantee texture
|
||||
* coherency after a texture is attached to a framebuffer and
|
||||
* rendered to, but that would involve checking *all* BOs bound to
|
||||
* the pipeline for the case we need to emit a cache flush due to
|
||||
* previous rendering to any of them -- Including vertex, index,
|
||||
* uniform, atomic counter, shader image, transform feedback,
|
||||
* indirect draw buffers, etc.
|
||||
*
|
||||
* That would increase the per-draw call overhead even though it's
|
||||
* very unlikely that any of the BOs bound to the pipeline has been
|
||||
* rendered to via a PBO at any point, so it seems better to just
|
||||
* flush here unconditionally.
|
||||
*/
|
||||
brw_emit_mi_flush(brw);
|
||||
|
||||
err:
|
||||
brw_bo_unreference(dst_bo);
|
||||
|
||||
|
|
|
@ -275,30 +275,8 @@ intelReadPixels(struct gl_context * ctx,
|
|||
|
||||
if (_mesa_is_bufferobj(pack->BufferObj)) {
|
||||
if (intel_readpixels_blorp(ctx, x, y, width, height,
|
||||
format, type, pixels, pack)) {
|
||||
/* intel_readpixels_blorp() implements PBO transfers by
|
||||
* binding the user-provided BO as a fake framebuffer and rendering
|
||||
* to it. This breaks the invariant of the GL that nothing is able
|
||||
* to render to a BO, causing nondeterministic corruption issues
|
||||
* because the render cache is not coherent with a number of other
|
||||
* caches that the BO could potentially be bound to afterwards.
|
||||
*
|
||||
* This could be solved in the same way that we guarantee texture
|
||||
* coherency after a texture is attached to a framebuffer and
|
||||
* rendered to, but that would involve checking *all* BOs bound to
|
||||
* the pipeline for the case we need to emit a cache flush due to
|
||||
* previous rendering to any of them -- Including vertex, index,
|
||||
* uniform, atomic counter, shader image, transform feedback,
|
||||
* indirect draw buffers, etc.
|
||||
*
|
||||
* That would increase the per-draw call overhead even though it's
|
||||
* very unlikely that any of the BOs bound to the pipeline has been
|
||||
* rendered to via a PBO at any point, so it seems better to just
|
||||
* flush here unconditionally.
|
||||
*/
|
||||
brw_emit_mi_flush(brw);
|
||||
format, type, pixels, pack))
|
||||
return;
|
||||
}
|
||||
|
||||
perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
|
||||
}
|
||||
|
|
|
@ -747,15 +747,8 @@ intel_get_tex_sub_image(struct gl_context *ctx,
|
|||
if (intel_gettexsubimage_blorp(brw, texImage,
|
||||
xoffset, yoffset, zoffset,
|
||||
width, height, depth, format, type,
|
||||
pixels, &ctx->Pack)) {
|
||||
/* Flush to guarantee coherency between the render cache and other
|
||||
* caches the PBO could potentially be bound to after this point.
|
||||
* See the related comment in intelReadPixels() for a more detailed
|
||||
* explanation.
|
||||
*/
|
||||
brw_emit_mi_flush(brw);
|
||||
pixels, &ctx->Pack))
|
||||
return;
|
||||
}
|
||||
|
||||
perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue