intel: Fix stencil buffer to be W tiled
Until now, the stencil buffer was allocated as a Y tiled buffer, because in several locations the PRM states that it is. However, it is actually W tiled. From the PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Format: W-Major Tile Format is used for separate stencil. The GTT is incapable of W fencing, so we allocate the stencil buffer with I915_TILING_NONE and decode the tile's layout in software. This fix touches the following portions of code: - In intel_allocate_renderbuffer_storage(), allocate the stencil buffer with I915_TILING_NONE. - In intel_verify_dri2_has_hiz(), verify that the stencil buffer is not tiled. - In the stencil buffer's span functions, the tile's layout must be decoded in software. This commit mutually depends on the xf86-video-intel commit dri: Do not tile stencil buffer Author: Chad Versace <chad@chad-versace.us> Date: Mon Jul 18 00:38:00 2011 -0700 On Gen6 with separate stencil enabled, fixes the following Piglit tests: bugs/fdo23670-drawpix_stencil general/stencil-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-copypixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-readpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-copypixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-readpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-copypixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-readpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-copypixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-readpixels spec/EXT_packed_depth_stencil/fbo-stencil-GL_DEPTH24_STENCIL8-copypixels spec/EXT_packed_depth_stencil/fbo-stencil-GL_DEPTH24_STENCIL8-readpixels spec/EXT_packed_depth_stencil/readpixels-24_8 Note: This is a candidate for the 7.11 branch. Signed-off-by: Chad Versace <chad@chad-versace.us> Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Acked-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
fb5ff51f42
commit
f7dbcba280
|
@ -143,6 +143,12 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
|
|||
*/
|
||||
tri_mask |= BUFFER_BIT_STENCIL;
|
||||
}
|
||||
else if (intel->has_separate_stencil &&
|
||||
stencilRegion->tiling == I915_TILING_NONE) {
|
||||
/* The stencil buffer is actually W tiled, which the hardware
|
||||
* cannot blit to. */
|
||||
tri_mask |= BUFFER_BIT_STENCIL;
|
||||
}
|
||||
else {
|
||||
/* clearing all stencil bits, use blitting */
|
||||
blit_mask |= BUFFER_BIT_STENCIL;
|
||||
|
|
|
@ -1439,7 +1439,12 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
|
|||
assert(stencil_rb->Base.Format == MESA_FORMAT_S8);
|
||||
assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24);
|
||||
|
||||
if (stencil_rb->region->tiling == I915_TILING_Y) {
|
||||
if (stencil_rb->region->tiling == I915_TILING_NONE) {
|
||||
/*
|
||||
* The stencil buffer is actually W tiled. The region's tiling is
|
||||
* I915_TILING_NONE, however, because the GTT is incapable of W
|
||||
* fencing.
|
||||
*/
|
||||
intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE;
|
||||
return;
|
||||
} else {
|
||||
|
@ -1527,7 +1532,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
|
|||
* Presently, however, no verification or clean up is necessary, and
|
||||
* execution should not reach here. If the framebuffer still has a hiz
|
||||
* region, then we have already set dri2_has_hiz to true after
|
||||
* confirming above that the stencil buffer is Y tiled.
|
||||
* confirming above that the stencil buffer is W tiled.
|
||||
*/
|
||||
assert(0);
|
||||
}
|
||||
|
|
|
@ -173,6 +173,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
|
|||
|
||||
if (irb->Base.Format == MESA_FORMAT_S8) {
|
||||
/*
|
||||
* The stencil buffer is W tiled. However, we request from the kernel a
|
||||
* non-tiled buffer because the GTT is incapable of W fencing.
|
||||
*
|
||||
* The stencil buffer has quirky pitch requirements. From Vol 2a,
|
||||
* 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
|
||||
* The pitch must be set to 2x the value computed based on width, as
|
||||
|
@ -180,14 +183,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
|
|||
* To accomplish this, we resort to the nasty hack of doubling the drm
|
||||
* region's cpp and halving its height.
|
||||
*
|
||||
* If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt()
|
||||
* maps the memory incorrectly.
|
||||
* If we neglect to double the pitch, then render corruption occurs.
|
||||
*/
|
||||
irb->region = intel_region_alloc(intel->intelScreen,
|
||||
I915_TILING_Y,
|
||||
I915_TILING_NONE,
|
||||
cpp * 2,
|
||||
width,
|
||||
height / 2,
|
||||
ALIGN(width, 64),
|
||||
ALIGN((height + 1) / 2, 64),
|
||||
GL_TRUE);
|
||||
if (!irb->region)
|
||||
return false;
|
||||
|
|
|
@ -63,9 +63,12 @@
|
|||
* x8_z24 and s8).
|
||||
*
|
||||
* Eventually, intel_update_renderbuffers() makes a DRI2 request for
|
||||
* DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled,
|
||||
* then we joyfully set intel_screen.dri2_has_hiz to true and continue as if
|
||||
* nothing happend.
|
||||
* DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is
|
||||
* I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to
|
||||
* true and continue as if nothing happend.
|
||||
*
|
||||
* [1] The stencil buffer is actually W tiled. However, we request from the
|
||||
* kernel a non-tiled buffer because the GTT is incapable of W fencing.
|
||||
*
|
||||
* If the buffers are X tiled, however, the handshake has failed and we must
|
||||
* clean up.
|
||||
|
|
|
@ -131,38 +131,84 @@ intel_set_span_functions(struct intel_context *intel,
|
|||
int miny = 0; \
|
||||
int maxx = rb->Width; \
|
||||
int maxy = rb->Height; \
|
||||
int stride = rb->RowStride; \
|
||||
uint8_t *buf = rb->Data; \
|
||||
\
|
||||
/* \
|
||||
* Here we ignore rb->Data and rb->RowStride as set by \
|
||||
* intelSpanRenderStart. Since intel_offset_S8 decodes the W tile \
|
||||
* manually, the region's *real* base address and stride is \
|
||||
* required. \
|
||||
*/ \
|
||||
struct intel_renderbuffer *irb = intel_renderbuffer(rb); \
|
||||
uint8_t *buf = irb->region->buffer->virtual; \
|
||||
unsigned stride = irb->region->pitch; \
|
||||
unsigned height = 2 * irb->region->height; \
|
||||
bool flip = rb->Name == 0; \
|
||||
int y_scale = flip ? -1 : 1; \
|
||||
int y_bias = flip ? (height - 1) : 0; \
|
||||
|
||||
/* Don't flip y. */
|
||||
#undef Y_FLIP
|
||||
#define Y_FLIP(y) y
|
||||
#define Y_FLIP(y) (y_scale * (y) + y_bias)
|
||||
|
||||
/**
|
||||
* \brief Get pointer offset into stencil buffer.
|
||||
*
|
||||
* The stencil buffer interleaves two rows into one. Yay for crazy hardware.
|
||||
* The table below demonstrates how the pointer arithmetic behaves for a buffer
|
||||
* with positive stride (s=stride).
|
||||
*
|
||||
* x | y | byte offset
|
||||
* --------------------------
|
||||
* 0 | 0 | 0
|
||||
* 0 | 1 | 1
|
||||
* 1 | 0 | 2
|
||||
* 1 | 1 | 3
|
||||
* ... | ... | ...
|
||||
* 0 | 2 | s
|
||||
* 0 | 3 | s + 1
|
||||
* 1 | 2 | s + 2
|
||||
* 1 | 3 | s + 3
|
||||
* The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
|
||||
* must decode the tile's layout in software.
|
||||
*
|
||||
* See
|
||||
* - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
|
||||
* Format.
|
||||
* - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
|
||||
*
|
||||
* Even though the returned offset is always positive, the return type is
|
||||
* signed due to
|
||||
* commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
|
||||
* mesa: Fix return type of _mesa_get_format_bytes() (#37351)
|
||||
*/
|
||||
static inline intptr_t
|
||||
intel_offset_S8(int stride, GLint x, GLint y)
|
||||
intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y)
|
||||
{
|
||||
return 2 * ((y / 2) * stride + x) + y % 2;
|
||||
uint32_t tile_size = 4096;
|
||||
uint32_t tile_width = 64;
|
||||
uint32_t tile_height = 64;
|
||||
uint32_t row_size = 64 * stride;
|
||||
|
||||
uint32_t tile_x = x / tile_width;
|
||||
uint32_t tile_y = y / tile_height;
|
||||
|
||||
/* The byte's address relative to the tile's base addres. */
|
||||
uint32_t byte_x = x % tile_width;
|
||||
uint32_t byte_y = y % tile_height;
|
||||
|
||||
uintptr_t u = tile_y * row_size
|
||||
+ tile_x * tile_size
|
||||
+ 512 * (byte_x / 8)
|
||||
+ 64 * (byte_y / 8)
|
||||
+ 32 * ((byte_y / 4) % 2)
|
||||
+ 16 * ((byte_x / 4) % 2)
|
||||
+ 8 * ((byte_y / 2) % 2)
|
||||
+ 4 * ((byte_x / 2) % 2)
|
||||
+ 2 * (byte_y % 2)
|
||||
+ 1 * (byte_x % 2);
|
||||
|
||||
/*
|
||||
* Errata for Gen5:
|
||||
*
|
||||
* An additional offset is needed which is not documented in the PRM.
|
||||
*
|
||||
* if ((byte_x / 8) % 2 == 1) {
|
||||
* if ((byte_y / 8) % 2) == 0) {
|
||||
* u += 64;
|
||||
* } else {
|
||||
* u -= 64;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* The offset is expressed more tersely as
|
||||
* u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1));
|
||||
*/
|
||||
|
||||
return u;
|
||||
}
|
||||
|
||||
#define WRITE_STENCIL(x, y, src) buf[intel_offset_S8(stride, x, y)] = src;
|
||||
|
|
Loading…
Reference in New Issue