i965: Fix rendering to small mipmaps of depth/stencil buffers using a temp mt.
Fixes 51 piglit tests (fbo-clear-formats, and most of the remaining failures in depthstencil). Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
5c8dd6cf79
commit
7139ab80ca
|
@ -1134,6 +1134,7 @@ void brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
|
|||
struct intel_mipmap_tree *stencil_mt,
|
||||
uint32_t *out_tile_mask_x,
|
||||
uint32_t *out_tile_mask_y);
|
||||
void brw_workaround_depthstencil_alignment(struct brw_context *brw);
|
||||
|
||||
/*======================================================================
|
||||
* brw_queryobj.c
|
||||
|
|
|
@ -440,6 +440,11 @@ static bool brw_try_draw_prims( struct gl_context *ctx,
|
|||
*/
|
||||
brw_predraw_resolve_buffers(brw);
|
||||
|
||||
/* This workaround has to happen outside of brw_state_upload() because it
|
||||
* may flush the batchbuffer for a blit, affecting the state flags.
|
||||
*/
|
||||
brw_workaround_depthstencil_alignment(brw);
|
||||
|
||||
/* Bind all inputs, derive varying and size information:
|
||||
*/
|
||||
brw_merge_inputs( brw, arrays );
|
||||
|
|
|
@ -317,6 +317,91 @@ brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
|
|||
*out_tile_mask_y = tile_mask_y;
|
||||
}
|
||||
|
||||
void
|
||||
brw_workaround_depthstencil_alignment(struct brw_context *brw)
|
||||
{
|
||||
struct intel_context *intel = &brw->intel;
|
||||
struct gl_context *ctx = &intel->ctx;
|
||||
struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
bool rebase_depth = false;
|
||||
bool rebase_stencil = false;
|
||||
struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
|
||||
struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
|
||||
struct intel_mipmap_tree *depth_mt = NULL;
|
||||
struct intel_mipmap_tree *stencil_mt = NULL;
|
||||
|
||||
if (depth_irb)
|
||||
depth_mt = depth_irb->mt;
|
||||
if (stencil_irb)
|
||||
stencil_mt = stencil_irb->mt;
|
||||
|
||||
uint32_t tile_mask_x, tile_mask_y;
|
||||
brw_get_depthstencil_tile_masks(depth_mt, stencil_mt,
|
||||
&tile_mask_x, &tile_mask_y);
|
||||
|
||||
if (depth_irb) {
|
||||
uint32_t depth_tile_x = depth_irb->draw_x & tile_mask_x;
|
||||
uint32_t depth_tile_y = depth_irb->draw_y & tile_mask_y;
|
||||
|
||||
/* The low 3 bits of x and y tile offset are ignored by the hardware.
|
||||
* Rebase if they're set, so that we can actually render to the buffer.
|
||||
*/
|
||||
if (depth_tile_x & 7 || depth_tile_y & 7)
|
||||
rebase_depth = true;
|
||||
|
||||
/* We didn't even have intra-tile offsets before g45. */
|
||||
if (intel->gen == 4 && !intel->is_g4x) {
|
||||
if (depth_tile_x || depth_tile_y)
|
||||
rebase_depth = true;
|
||||
}
|
||||
|
||||
if (stencil_irb) {
|
||||
int stencil_tile_x = stencil_irb->draw_x & tile_mask_x;
|
||||
int stencil_tile_y = stencil_irb->draw_y & tile_mask_y;
|
||||
|
||||
/* If the two don't match up, then we need to move them to a
|
||||
* temporary so that the x/y draw offsets will end up being 0.
|
||||
*/
|
||||
if (depth_tile_x != stencil_tile_x ||
|
||||
depth_tile_y != stencil_tile_y) {
|
||||
rebase_depth = true;
|
||||
rebase_stencil = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If we have (just) stencil, check it for ignored low bits as well */
|
||||
if (stencil_irb) {
|
||||
uint32_t stencil_tile_x = stencil_irb->draw_x & tile_mask_x;
|
||||
uint32_t stencil_tile_y = stencil_irb->draw_y & tile_mask_y;
|
||||
|
||||
if (stencil_tile_x & 7 || stencil_tile_y & 7)
|
||||
rebase_stencil = true;
|
||||
|
||||
if (intel->gen == 4 && !intel->is_g4x) {
|
||||
if (stencil_tile_x || stencil_tile_y)
|
||||
rebase_stencil = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (rebase_depth) {
|
||||
intel_renderbuffer_move_to_temp(intel, depth_irb);
|
||||
|
||||
if (stencil_irb && stencil_irb->mt == depth_mt) {
|
||||
intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
|
||||
intel_renderbuffer_set_draw_offset(stencil_irb);
|
||||
}
|
||||
}
|
||||
if (rebase_stencil) {
|
||||
intel_renderbuffer_move_to_temp(intel, stencil_irb);
|
||||
|
||||
if (depth_irb && depth_irb->mt == stencil_mt) {
|
||||
intel_miptree_reference(&depth_irb->mt, stencil_irb->mt);
|
||||
intel_renderbuffer_set_draw_offset(depth_irb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void emit_depthbuffer(struct brw_context *brw)
|
||||
{
|
||||
struct intel_context *intel = &brw->intel;
|
||||
|
@ -331,12 +416,6 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
unsigned int len;
|
||||
bool separate_stencil = false;
|
||||
|
||||
/* Amount by which drawing should be offset in order to draw to the
|
||||
* appropriate miplevel/zoffset/cubeface. We will extract these values
|
||||
* from depth_irb or stencil_irb once we determine which is present.
|
||||
*/
|
||||
uint32_t draw_x = 0, draw_y = 0;
|
||||
|
||||
if (depth_irb &&
|
||||
depth_irb->mt &&
|
||||
depth_irb->mt->hiz_mt) {
|
||||
|
@ -367,6 +446,39 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
brw_get_depthstencil_tile_masks(depth_mt, stencil_mt,
|
||||
&tile_mask_x, &tile_mask_y);
|
||||
|
||||
/* The intra-tile offsets should already have been forced into agreement by
|
||||
* gen7_workaround_depthstencil_alignment().
|
||||
*/
|
||||
uint32_t tile_x = 0, tile_y = 0;
|
||||
if (depth_mt) {
|
||||
tile_x = depth_irb->draw_x & tile_mask_x;
|
||||
tile_y = depth_irb->draw_y & tile_mask_y;
|
||||
|
||||
if (stencil_mt) {
|
||||
assert((stencil_irb->draw_x & tile_mask_x) == tile_x);
|
||||
assert((stencil_irb->draw_y & tile_mask_y) == tile_y);
|
||||
}
|
||||
} else if (stencil_mt) {
|
||||
tile_x = stencil_irb->draw_x & tile_mask_x;
|
||||
tile_y = stencil_irb->draw_y & tile_mask_y;
|
||||
}
|
||||
|
||||
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
|
||||
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
|
||||
* Coordinate Offset X/Y":
|
||||
*
|
||||
* "The 3 LSBs of both offsets must be zero to ensure correct
|
||||
* alignment"
|
||||
*
|
||||
* This should already have been corrected by
|
||||
* gen6_workaround_depthstencil_alignment.
|
||||
*/
|
||||
WARN_ONCE((tile_x & 7) || (tile_y & 7),
|
||||
"Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
|
||||
"Truncating offset, bad rendering may occur.\n");
|
||||
tile_x &= ~7;
|
||||
tile_y &= ~7;
|
||||
|
||||
/* If there's a packed depth/stencil bound to stencil only, we need to
|
||||
* emit the packed depth/stencil buffer packet.
|
||||
*/
|
||||
|
@ -398,8 +510,6 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
ADVANCE_BATCH();
|
||||
|
||||
} else if (!depth_irb && separate_stencil) {
|
||||
uint32_t tile_x, tile_y;
|
||||
|
||||
/*
|
||||
* There exists a separate stencil buffer but no depth buffer.
|
||||
*
|
||||
|
@ -422,29 +532,6 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
*/
|
||||
assert(intel->has_separate_stencil);
|
||||
|
||||
draw_x = stencil_irb->draw_x;
|
||||
draw_y = stencil_irb->draw_y;
|
||||
tile_x = draw_x & tile_mask_x;
|
||||
tile_y = draw_y & tile_mask_y;
|
||||
|
||||
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
|
||||
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
|
||||
* Coordinate Offset X/Y":
|
||||
*
|
||||
* "The 3 LSBs of both offsets must be zero to ensure correct
|
||||
* alignment"
|
||||
*
|
||||
* We have no guarantee that tile_x and tile_y are correctly aligned,
|
||||
* since they are determined by the mipmap layout, which is only aligned
|
||||
* to multiples of 4.
|
||||
*
|
||||
* So, to avoid hanging the GPU, just smash the low order 3 bits of
|
||||
* tile_x and tile_y to 0. This is a temporary workaround until we come
|
||||
* up with a better solution.
|
||||
*/
|
||||
tile_x &= ~7;
|
||||
tile_y &= ~7;
|
||||
|
||||
BEGIN_BATCH(len);
|
||||
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
|
||||
OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
|
||||
|
@ -470,7 +557,7 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
|
||||
} else {
|
||||
struct intel_region *region = depth_irb->mt->region;
|
||||
uint32_t tile_x, tile_y, offset;
|
||||
uint32_t offset;
|
||||
|
||||
/* If using separate stencil, hiz must be enabled. */
|
||||
assert(!separate_stencil || hiz_region);
|
||||
|
@ -478,32 +565,10 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
|
||||
assert(!hiz_region || region->tiling == I915_TILING_Y);
|
||||
|
||||
draw_x = depth_irb->draw_x;
|
||||
draw_y = depth_irb->draw_y;
|
||||
tile_x = draw_x & tile_mask_x;
|
||||
tile_y = draw_y & tile_mask_y;
|
||||
|
||||
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
|
||||
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
|
||||
* Coordinate Offset X/Y":
|
||||
*
|
||||
* "The 3 LSBs of both offsets must be zero to ensure correct
|
||||
* alignment"
|
||||
*
|
||||
* We have no guarantee that tile_x and tile_y are correctly aligned,
|
||||
* since they are determined by the mipmap layout, which is only aligned
|
||||
* to multiples of 4.
|
||||
*
|
||||
* So, to avoid hanging the GPU, just smash the low order 3 bits of
|
||||
* tile_x and tile_y to 0. This is a temporary workaround until we come
|
||||
* up with a better solution.
|
||||
*/
|
||||
tile_x &= ~7;
|
||||
tile_y &= ~7;
|
||||
|
||||
offset = intel_region_get_aligned_offset(region,
|
||||
draw_x & ~tile_mask_x,
|
||||
draw_y & ~tile_mask_y, false);
|
||||
depth_irb->draw_x & ~tile_mask_x,
|
||||
depth_irb->draw_y & ~tile_mask_y,
|
||||
false);
|
||||
|
||||
BEGIN_BATCH(len);
|
||||
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
|
||||
|
@ -546,8 +611,8 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
if (hiz_region) {
|
||||
uint32_t hiz_offset =
|
||||
intel_region_get_aligned_offset(hiz_region,
|
||||
draw_x & ~tile_mask_x,
|
||||
(draw_y & ~tile_mask_y) / 2,
|
||||
depth_irb->draw_x & ~tile_mask_x,
|
||||
(depth_irb->draw_y & ~tile_mask_y) / 2,
|
||||
false);
|
||||
|
||||
BEGIN_BATCH(3);
|
||||
|
@ -574,8 +639,8 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
* that the region is untiled; in fact it's W tiled.
|
||||
*/
|
||||
uint32_t stencil_offset =
|
||||
(draw_y & ~tile_mask_y) * region->pitch +
|
||||
(draw_x & ~tile_mask_x) * 64;
|
||||
(stencil_irb->draw_y & ~tile_mask_y) * region->pitch +
|
||||
(stencil_irb->draw_x & ~tile_mask_x) * 64;
|
||||
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
|
||||
|
|
|
@ -42,12 +42,6 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
*stencil_mt = NULL,
|
||||
*hiz_mt = NULL;
|
||||
|
||||
/* Amount by which drawing should be offset in order to draw to the
|
||||
* appropriate miplevel/zoffset/cubeface. We will extract these values
|
||||
* from depth_irb or stencil_irb once we determine which is present.
|
||||
*/
|
||||
uint32_t draw_x = 0, draw_y = 0;
|
||||
|
||||
if (drb)
|
||||
depth_mt = drb->mt;
|
||||
|
||||
|
@ -66,6 +60,39 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
brw_get_depthstencil_tile_masks(depth_mt, stencil_mt,
|
||||
&tile_mask_x, &tile_mask_y);
|
||||
|
||||
/* The intra-tile offsets should already have been forced into agreement by
|
||||
* gen7_workaround_depthstencil_alignment().
|
||||
*/
|
||||
uint32_t tile_x = 0, tile_y = 0;
|
||||
if (depth_mt) {
|
||||
tile_x = drb->draw_x & tile_mask_x;
|
||||
tile_y = drb->draw_y & tile_mask_y;
|
||||
|
||||
if (stencil_mt) {
|
||||
assert((srb->draw_x & tile_mask_x) == tile_x);
|
||||
assert((srb->draw_y & tile_mask_y) == tile_y);
|
||||
}
|
||||
} else if (stencil_mt) {
|
||||
tile_x = srb->draw_x & tile_mask_x;
|
||||
tile_y = srb->draw_y & tile_mask_y;
|
||||
}
|
||||
|
||||
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
|
||||
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
|
||||
* Coordinate Offset X/Y":
|
||||
*
|
||||
* "The 3 LSBs of both offsets must be zero to ensure correct
|
||||
* alignment"
|
||||
*
|
||||
* This should already have been corrected by
|
||||
* gen6_workaround_depthstencil_alignment.
|
||||
*/
|
||||
WARN_ONCE((tile_x & 7) || (tile_y & 7),
|
||||
"Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
|
||||
"Truncating offset, bad rendering may occur.\n");
|
||||
tile_x &= ~7;
|
||||
tile_y &= ~7;
|
||||
|
||||
/* Gen7 doesn't support packed depth/stencil */
|
||||
assert(stencil_mt == NULL || depth_mt != stencil_mt);
|
||||
assert(!depth_mt || !_mesa_is_format_packed_depth_stencil(depth_mt->format));
|
||||
|
@ -75,7 +102,6 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
if (depth_mt == NULL) {
|
||||
uint32_t dw1 = BRW_DEPTHFORMAT_D32_FLOAT << 18;
|
||||
uint32_t dw3 = 0;
|
||||
uint32_t tile_x = 0, tile_y = 0;
|
||||
|
||||
if (stencil_mt == NULL) {
|
||||
dw1 |= (BRW_SURFACE_NULL << 29);
|
||||
|
@ -83,29 +109,6 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
/* _NEW_STENCIL: enable stencil buffer writes */
|
||||
dw1 |= ((ctx->Stencil.WriteMask != 0) << 27);
|
||||
|
||||
draw_x = srb->draw_x;
|
||||
draw_y = srb->draw_y;
|
||||
tile_x = draw_x & tile_mask_x;
|
||||
tile_y = draw_y & tile_mask_y;
|
||||
|
||||
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
|
||||
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
|
||||
* Coordinate Offset X/Y":
|
||||
*
|
||||
* "The 3 LSBs of both offsets must be zero to ensure correct
|
||||
* alignment"
|
||||
*
|
||||
* We have no guarantee that tile_x and tile_y are correctly aligned,
|
||||
* since they are determined by the mipmap layout, which is only
|
||||
* aligned to multiples of 4.
|
||||
*
|
||||
* So, to avoid hanging the GPU, just smash the low order 3 bits of
|
||||
* tile_x and tile_y to 0. This is a temporary workaround until we
|
||||
* come up with a better solution.
|
||||
*/
|
||||
tile_x &= ~7;
|
||||
tile_y &= ~7;
|
||||
|
||||
/* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */
|
||||
dw1 |= (BRW_SURFACE_2D << 29);
|
||||
dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) |
|
||||
|
@ -123,34 +126,11 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
ADVANCE_BATCH();
|
||||
} else {
|
||||
struct intel_region *region = depth_mt->region;
|
||||
uint32_t tile_x, tile_y, offset;
|
||||
|
||||
draw_x = drb->draw_x;
|
||||
draw_y = drb->draw_y;
|
||||
tile_x = draw_x & tile_mask_x;
|
||||
tile_y = draw_y & tile_mask_y;
|
||||
|
||||
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
|
||||
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
|
||||
* Coordinate Offset X/Y":
|
||||
*
|
||||
* "The 3 LSBs of both offsets must be zero to ensure correct
|
||||
* alignment"
|
||||
*
|
||||
* We have no guarantee that tile_x and tile_y are correctly aligned,
|
||||
* since they are determined by the mipmap layout, which is only aligned
|
||||
* to multiples of 4.
|
||||
*
|
||||
* So, to avoid hanging the GPU, just smash the low order 3 bits of
|
||||
* tile_x and tile_y to 0. This is a temporary workaround until we come
|
||||
* up with a better solution.
|
||||
*/
|
||||
tile_x &= ~7;
|
||||
tile_y &= ~7;
|
||||
uint32_t offset;
|
||||
|
||||
offset = intel_region_get_aligned_offset(region,
|
||||
draw_x & ~tile_mask_x,
|
||||
draw_y & ~tile_mask_y,
|
||||
drb->draw_x & ~tile_mask_x,
|
||||
drb->draw_y & ~tile_mask_y,
|
||||
false);
|
||||
|
||||
assert(region->tiling == I915_TILING_Y);
|
||||
|
@ -184,8 +164,8 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
} else {
|
||||
uint32_t hiz_offset =
|
||||
intel_region_get_aligned_offset(hiz_mt->region,
|
||||
draw_x & ~tile_mask_x,
|
||||
(draw_y & ~tile_mask_y) / 2,
|
||||
drb->draw_x & ~tile_mask_x,
|
||||
(drb->draw_y & ~tile_mask_y) / 2,
|
||||
false);
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
|
||||
|
@ -211,8 +191,8 @@ static void emit_depthbuffer(struct brw_context *brw)
|
|||
* that the region is untiled; in fact it's W tiled.
|
||||
*/
|
||||
uint32_t stencil_offset =
|
||||
(draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
|
||||
(draw_x & ~tile_mask_x) * 64;
|
||||
(srb->draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
|
||||
(srb->draw_x & ~tile_mask_x) * 64;
|
||||
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
|
||||
|
|
Loading…
Reference in New Issue