r600: add span support for 1D tiles
1D tile span support for depth/stencil/color/textures Z and stencil buffers are always tiled, so this fixes sw access to Z and stencil buffers. color and textures are currently linear, but this adds span support when we implement 1D tiling. This fixes the text in progs/demos/engine and progs/tests/z*
This commit is contained in:
parent
f662cedae4
commit
364ca57aff
|
@ -1366,6 +1366,7 @@ enum {
|
|||
DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3,
|
||||
DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15,
|
||||
DB_DEPTH_INFO__ARRAY_MODE_shift = 15,
|
||||
ARRAY_1D_TILED_THIN1 = 0x02,
|
||||
ARRAY_2D_TILED_THIN1 = 0x04,
|
||||
TILE_SURFACE_ENABLE_bit = 1 << 25,
|
||||
TILE_COMPACT_bit = 1 << 26,
|
||||
|
@ -1449,6 +1450,7 @@ enum {
|
|||
CB_COLOR0_INFO__ARRAY_MODE_shift = 8,
|
||||
ARRAY_LINEAR_GENERAL = 0x00,
|
||||
ARRAY_LINEAR_ALIGNED = 0x01,
|
||||
/* ARRAY_1D_TILED_THIN1 = 0x02, */
|
||||
/* ARRAY_2D_TILED_THIN1 = 0x04, */
|
||||
NUMBER_TYPE_mask = 0x07 << 12,
|
||||
NUMBER_TYPE_shift = 12,
|
||||
|
|
|
@ -351,7 +351,7 @@ static void r700SetDepthTarget(context_t *context)
|
|||
SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16,
|
||||
DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask);
|
||||
}
|
||||
SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_2D_TILED_THIN1,
|
||||
SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_1D_TILED_THIN1,
|
||||
DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask);
|
||||
/* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */
|
||||
}
|
||||
|
|
|
@ -106,6 +106,142 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
|
|||
}
|
||||
#endif
|
||||
|
||||
/* r600 tiling
|
||||
* two main types:
|
||||
* - 1D (akin to macro-linear/micro-tiled on older asics)
|
||||
* - 2D (akin to macro-tiled/micro-tiled on older asics)
|
||||
* only 1D tiling is implemented below
|
||||
*/
|
||||
#if defined(RADEON_COMMON_FOR_R600)
|
||||
static GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y, GLint is_depth, GLint is_stencil)
|
||||
{
|
||||
GLint element_bytes = rrb->cpp;
|
||||
GLint num_samples = 1;
|
||||
GLint tile_width = 8;
|
||||
GLint tile_height = 8;
|
||||
GLint tile_thickness = 1;
|
||||
GLint pitch_elements = rrb->pitch / element_bytes;
|
||||
GLint height = rrb->base.Height;
|
||||
GLint z = 0;
|
||||
GLint sample_number = 0;
|
||||
/* */
|
||||
GLint tile_bytes;
|
||||
GLint tiles_per_row;
|
||||
GLint tiles_per_slice;
|
||||
GLint slice_offset;
|
||||
GLint tile_row_index;
|
||||
GLint tile_column_index;
|
||||
GLint tile_offset;
|
||||
GLint pixel_number = 0;
|
||||
GLint element_offset;
|
||||
GLint offset = 0;
|
||||
|
||||
tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
|
||||
tiles_per_row = pitch_elements /tile_width;
|
||||
tiles_per_slice = tiles_per_row * (height / tile_height);
|
||||
slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
|
||||
tile_row_index = y / tile_height;
|
||||
tile_column_index = x / tile_width;
|
||||
tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
|
||||
|
||||
if (is_depth) {
|
||||
GLint pixel_offset = 0;
|
||||
|
||||
pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
|
||||
pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
|
||||
pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
|
||||
pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
|
||||
pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
|
||||
pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
|
||||
switch (element_bytes) {
|
||||
case 2:
|
||||
pixel_offset = pixel_number * element_bytes * num_samples;
|
||||
element_offset = pixel_offset + (sample_number * element_bytes);
|
||||
break;
|
||||
case 4:
|
||||
/* stencil and depth data are stored separately within a tile.
|
||||
* stencil is stored in a contiguous tile before the depth tile.
|
||||
* stencil element is 1 byte, depth element is 3 bytes.
|
||||
* stencil tile is 64 bytes.
|
||||
*/
|
||||
if (is_stencil)
|
||||
pixel_offset = pixel_number * 1 * num_samples;
|
||||
else
|
||||
pixel_offset = (pixel_number * 3 * num_samples) + 64;
|
||||
break;
|
||||
}
|
||||
element_offset = pixel_offset + (sample_number * element_bytes);
|
||||
} else {
|
||||
GLint sample_offset;
|
||||
|
||||
switch (element_bytes) {
|
||||
case 1:
|
||||
pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
|
||||
pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
|
||||
pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
|
||||
pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
|
||||
pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
|
||||
pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
|
||||
break;
|
||||
case 2:
|
||||
pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
|
||||
pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
|
||||
pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
|
||||
pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
|
||||
pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
|
||||
pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
|
||||
break;
|
||||
case 4:
|
||||
pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
|
||||
pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
|
||||
pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
|
||||
pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
|
||||
pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
|
||||
pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
|
||||
break;
|
||||
}
|
||||
sample_offset = sample_number * (tile_bytes / num_samples);
|
||||
element_offset = sample_offset + (pixel_number * element_bytes);
|
||||
}
|
||||
offset = slice_offset + tile_offset + element_offset;
|
||||
return offset;
|
||||
}
|
||||
|
||||
/* depth buffers */
|
||||
static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y)
|
||||
{
|
||||
GLubyte *ptr = rrb->bo->ptr;
|
||||
GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
|
||||
return &ptr[offset];
|
||||
}
|
||||
|
||||
static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y)
|
||||
{
|
||||
GLubyte *ptr = rrb->bo->ptr;
|
||||
GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
|
||||
return &ptr[offset];
|
||||
}
|
||||
|
||||
static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y)
|
||||
{
|
||||
GLubyte *ptr = rrb->bo->ptr;
|
||||
uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
|
||||
GLint offset;
|
||||
|
||||
if (rrb->has_surface || !(rrb->bo->flags & mask)) {
|
||||
offset = x * rrb->cpp + y * rrb->pitch;
|
||||
} else {
|
||||
offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
|
||||
}
|
||||
return &ptr[offset];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* radeon tiling on r300-r500 has 4 states,
|
||||
macro-linear/micro-linear
|
||||
macro-linear/micro-tiled
|
||||
|
@ -270,7 +406,11 @@ s8z24_to_z24s8(uint32_t val)
|
|||
|
||||
#define TAG(x) radeon##x##_RGB565
|
||||
#define TAG2(x,y) radeon##x##_RGB565##y
|
||||
#if defined(RADEON_COMMON_FOR_R600)
|
||||
#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
|
||||
#else
|
||||
#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
|
||||
#endif
|
||||
#include "spantmp2.h"
|
||||
|
||||
/* 16 bit, ARGB1555 color spanline and pixel functions
|
||||
|
@ -280,7 +420,11 @@ s8z24_to_z24s8(uint32_t val)
|
|||
|
||||
#define TAG(x) radeon##x##_ARGB1555
|
||||
#define TAG2(x,y) radeon##x##_ARGB1555##y
|
||||
#if defined(RADEON_COMMON_FOR_R600)
|
||||
#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
|
||||
#else
|
||||
#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
|
||||
#endif
|
||||
#include "spantmp2.h"
|
||||
|
||||
/* 16 bit, RGBA4 color spanline and pixel functions
|
||||
|
@ -290,7 +434,11 @@ s8z24_to_z24s8(uint32_t val)
|
|||
|
||||
#define TAG(x) radeon##x##_ARGB4444
|
||||
#define TAG2(x,y) radeon##x##_ARGB4444##y
|
||||
#if defined(RADEON_COMMON_FOR_R600)
|
||||
#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
|
||||
#else
|
||||
#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
|
||||
#endif
|
||||
#include "spantmp2.h"
|
||||
|
||||
/* 32 bit, xRGB8888 color spanline and pixel functions
|
||||
|
@ -300,11 +448,19 @@ s8z24_to_z24s8(uint32_t val)
|
|||
|
||||
#define TAG(x) radeon##x##_xRGB8888
|
||||
#define TAG2(x,y) radeon##x##_xRGB8888##y
|
||||
#if defined(RADEON_COMMON_FOR_R600)
|
||||
#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
|
||||
#define PUT_VALUE(_x, _y, d) { \
|
||||
GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
|
||||
*_ptr = d; \
|
||||
} while (0)
|
||||
#else
|
||||
#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
|
||||
#define PUT_VALUE(_x, _y, d) { \
|
||||
GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
|
||||
*_ptr = d; \
|
||||
} while (0)
|
||||
#endif
|
||||
#include "spantmp2.h"
|
||||
|
||||
/* 32 bit, ARGB8888 color spanline and pixel functions
|
||||
|
@ -314,11 +470,19 @@ s8z24_to_z24s8(uint32_t val)
|
|||
|
||||
#define TAG(x) radeon##x##_ARGB8888
|
||||
#define TAG2(x,y) radeon##x##_ARGB8888##y
|
||||
#if defined(RADEON_COMMON_FOR_R600)
|
||||
#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
|
||||
#define PUT_VALUE(_x, _y, d) { \
|
||||
GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
|
||||
*_ptr = d; \
|
||||
} while (0)
|
||||
#else
|
||||
#define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
|
||||
#define PUT_VALUE(_x, _y, d) { \
|
||||
GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
|
||||
*_ptr = d; \
|
||||
} while (0)
|
||||
#endif
|
||||
#include "spantmp2.h"
|
||||
|
||||
/* ================================================================
|
||||
|
@ -342,6 +506,9 @@ s8z24_to_z24s8(uint32_t val)
|
|||
#if defined(RADEON_COMMON_FOR_R200)
|
||||
#define WRITE_DEPTH( _x, _y, d ) \
|
||||
*(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
|
||||
#elif defined(RADEON_COMMON_FOR_R600)
|
||||
#define WRITE_DEPTH( _x, _y, d ) \
|
||||
*(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
|
||||
#else
|
||||
#define WRITE_DEPTH( _x, _y, d ) \
|
||||
*(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
|
||||
|
@ -350,6 +517,9 @@ s8z24_to_z24s8(uint32_t val)
|
|||
#if defined(RADEON_COMMON_FOR_R200)
|
||||
#define READ_DEPTH( d, _x, _y ) \
|
||||
d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
|
||||
#elif defined(RADEON_COMMON_FOR_R600)
|
||||
#define READ_DEPTH( d, _x, _y ) \
|
||||
d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
|
||||
#else
|
||||
#define READ_DEPTH( d, _x, _y ) \
|
||||
d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
|
||||
|
@ -374,6 +544,15 @@ do { \
|
|||
tmp |= ((d << 8) & 0xffffff00); \
|
||||
*_ptr = tmp; \
|
||||
} while (0)
|
||||
#elif defined(RADEON_COMMON_FOR_R600)
|
||||
#define WRITE_DEPTH( _x, _y, d ) \
|
||||
do { \
|
||||
GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
|
||||
GLuint tmp = *_ptr; \
|
||||
tmp &= 0xff000000; \
|
||||
tmp |= ((d) & 0x00ffffff); \
|
||||
*_ptr = tmp; \
|
||||
} while (0)
|
||||
#elif defined(RADEON_COMMON_FOR_R200)
|
||||
#define WRITE_DEPTH( _x, _y, d ) \
|
||||
do { \
|
||||
|
@ -399,6 +578,11 @@ do { \
|
|||
do { \
|
||||
d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
|
||||
}while(0)
|
||||
#elif defined(RADEON_COMMON_FOR_R600)
|
||||
#define READ_DEPTH( d, _x, _y ) \
|
||||
do { \
|
||||
d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
|
||||
}while(0)
|
||||
#elif defined(RADEON_COMMON_FOR_R200)
|
||||
#define READ_DEPTH( d, _x, _y ) \
|
||||
do { \
|
||||
|
@ -426,6 +610,20 @@ do { \
|
|||
GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
|
||||
*_ptr = d; \
|
||||
} while (0)
|
||||
#elif defined(RADEON_COMMON_FOR_R600)
|
||||
#define WRITE_DEPTH( _x, _y, d ) \
|
||||
do { \
|
||||
GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
|
||||
GLuint tmp = *_ptr; \
|
||||
tmp &= 0xff000000; \
|
||||
tmp |= (((d) >> 8) & 0x00ffffff); \
|
||||
*_ptr = tmp; \
|
||||
_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
|
||||
tmp = *_ptr; \
|
||||
tmp &= 0xffffff00; \
|
||||
tmp |= (d) & 0xff; \
|
||||
*_ptr = tmp; \
|
||||
} while (0)
|
||||
#elif defined(RADEON_COMMON_FOR_R200)
|
||||
#define WRITE_DEPTH( _x, _y, d ) \
|
||||
do { \
|
||||
|
@ -447,6 +645,12 @@ do { \
|
|||
do { \
|
||||
d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
|
||||
}while(0)
|
||||
#elif defined(RADEON_COMMON_FOR_R600)
|
||||
#define READ_DEPTH( d, _x, _y ) \
|
||||
do { \
|
||||
d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \
|
||||
d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff; \
|
||||
}while(0)
|
||||
#elif defined(RADEON_COMMON_FOR_R200)
|
||||
#define READ_DEPTH( d, _x, _y ) \
|
||||
do { \
|
||||
|
@ -476,6 +680,15 @@ do { \
|
|||
tmp |= (d) & 0xff; \
|
||||
*_ptr = tmp; \
|
||||
} while (0)
|
||||
#elif defined(RADEON_COMMON_FOR_R600)
|
||||
#define WRITE_STENCIL( _x, _y, d ) \
|
||||
do { \
|
||||
GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
|
||||
GLuint tmp = *_ptr; \
|
||||
tmp &= 0xffffff00; \
|
||||
tmp |= (d) & 0xff; \
|
||||
*_ptr = tmp; \
|
||||
} while (0)
|
||||
#elif defined(RADEON_COMMON_FOR_R200)
|
||||
#define WRITE_STENCIL( _x, _y, d ) \
|
||||
do { \
|
||||
|
@ -503,6 +716,13 @@ do { \
|
|||
GLuint tmp = *_ptr; \
|
||||
d = tmp & 0x000000ff; \
|
||||
} while (0)
|
||||
#elif defined(RADEON_COMMON_FOR_R600)
|
||||
#define READ_STENCIL( d, _x, _y ) \
|
||||
do { \
|
||||
GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \
|
||||
GLuint tmp = *_ptr; \
|
||||
d = tmp & 0x000000ff; \
|
||||
} while (0)
|
||||
#elif defined(RADEON_COMMON_FOR_R200)
|
||||
#define READ_STENCIL( d, _x, _y ) \
|
||||
do { \
|
||||
|
|
Loading…
Reference in New Issue