llvmpipe: Autogenerate lp_tile_soa.c from u_format.csv.

This is just a temporary change until we code generate the tile read/write
functions in runtime. The new code avoids an extra memcpy that exists in
u_tile.c functions, from which lp_tile_soa.c was originally based.

This achieves up to 5% improvement, particularly in frames with
little geometry overlap.
This commit is contained in:
José Fonseca 2009-10-04 13:04:08 +01:00
parent 7a7dfb09aa
commit 589ec337f0
7 changed files with 336 additions and 971 deletions

View File

@ -0,0 +1 @@
lp_tile_soa.c

View File

@ -55,3 +55,6 @@ C_SOURCES = \
lp_tile_soa.c
include ../../Makefile.template
lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv
python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@

View File

@ -9,6 +9,13 @@ if not env.has_key('LLVM_VERSION'):
env.Tool('udis86')
env.CodeGenerate(
target = 'lp_tile_soa.c',
script = 'lp_tile_soa.py',
source = ['#src/gallium/auxiliary/util/u_format.csv'],
command = 'python $SCRIPT $SOURCE > $TARGET'
)
llvmpipe = env.ConvenienceLibrary(
target = 'llvmpipe',
source = [

View File

@ -241,42 +241,34 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc)
for (x = 0; x < pt->width; x += TILE_SIZE) {
struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE];
switch(tile->status) {
case LP_TILE_STATUS_UNDEFINED:
break;
if(tile->status != LP_TILE_STATUS_UNDEFINED) {
unsigned w = TILE_SIZE;
unsigned h = TILE_SIZE;
case LP_TILE_STATUS_CLEAR: {
/**
* Actually clear the tiles which were flagged as being in a clear state.
*/
if (!pipe_clip_tile(x, y, &w, &h, pt)) {
switch(tile->status) {
case LP_TILE_STATUS_CLEAR:
/* Actually clear the tiles which were flagged as being in a
* clear state. */
util_fill_rect(tc->transfer_map, &pt->block, pt->stride,
x, y, w, h,
tc->clear_val);
break;
struct pipe_screen *screen = pt->texture->screen;
unsigned tw = TILE_SIZE;
unsigned th = TILE_SIZE;
void *dst;
case LP_TILE_STATUS_DEFINED:
lp_tile_write_4ub(pt->format,
tile->color,
tc->transfer_map, pt->stride,
x, y, w, h);
break;
if (pipe_clip_tile(x, y, &tw, &th, pt))
continue;
dst = screen->transfer_map(screen, pt);
assert(dst);
if(!dst)
continue;
util_fill_rect(dst, &pt->block, pt->stride,
x, y, tw, th,
tc->clear_val);
screen->transfer_unmap(screen, pt);
default:
assert(0);
break;
}
}
tile->status = LP_TILE_STATUS_UNDEFINED;
break;
}
case LP_TILE_STATUS_DEFINED:
lp_put_tile_rgba_soa(pt, x, y, tile->color);
tile->status = LP_TILE_STATUS_UNDEFINED;
break;
}
}
}
@ -304,11 +296,22 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc,
tile->status = LP_TILE_STATUS_DEFINED;
break;
case LP_TILE_STATUS_UNDEFINED:
/* get new tile data from transfer */
lp_get_tile_rgba_soa(pt, x & ~(TILE_SIZE - 1), y & ~(TILE_SIZE - 1), tile->color);
case LP_TILE_STATUS_UNDEFINED: {
unsigned w = TILE_SIZE;
unsigned h = TILE_SIZE;
x &= ~(TILE_SIZE - 1);
y &= ~(TILE_SIZE - 1);
if (!pipe_clip_tile(x, y, &w, &h, tc->transfer))
lp_tile_read_4ub(pt->format,
tile->color,
tc->transfer_map, tc->transfer->stride,
x, y, w, h);
tile->status = LP_TILE_STATUS_DEFINED;
break;
}
case LP_TILE_STATUS_DEFINED:
/* nothing to do */

View File

@ -1,931 +0,0 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* RGBA/float tile get/put functions.
* Usable both by drivers and state trackers.
*/
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_rect.h"
#include "util/u_tile.h"
#include "lp_tile_cache.h"
#include "lp_tile_soa.h"
const unsigned char
tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {
{ 0, 1, 4, 5, 8, 9, 12, 13},
{ 2, 3, 6, 7, 10, 11, 14, 15}
};
/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
static void
a8r8g8b8_get_tile_rgba(const unsigned *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
const unsigned pixel = *src++;
TILE_PIXEL(p, j, i, 0) = (pixel >> 16) & 0xff;
TILE_PIXEL(p, j, i, 1) = (pixel >> 8) & 0xff;
TILE_PIXEL(p, j, i, 2) = (pixel >> 0) & 0xff;
TILE_PIXEL(p, j, i, 3) = (pixel >> 24) & 0xff;
}
}
}
static void
a8r8g8b8_put_tile_rgba(unsigned *dst,
unsigned w, unsigned h,
const uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
unsigned r, g, b, a;
r = TILE_PIXEL(p, j, i, 0);
g = TILE_PIXEL(p, j, i, 1);
b = TILE_PIXEL(p, j, i, 2);
a = TILE_PIXEL(p, j, i, 3);
*dst++ = (a << 24) | (r << 16) | (g << 8) | b;
}
}
}
/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
static void
x8r8g8b8_get_tile_rgba(const unsigned *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
const unsigned pixel = *src++;
TILE_PIXEL(p, j, i, 0) = (pixel >> 16) & 0xff;
TILE_PIXEL(p, j, i, 1) = (pixel >> 8) & 0xff;
TILE_PIXEL(p, j, i, 2) = (pixel >> 0) & 0xff;
TILE_PIXEL(p, j, i, 3) = 0xff;
}
}
}
static void
x8r8g8b8_put_tile_rgba(unsigned *dst,
unsigned w, unsigned h,
const uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
unsigned r, g, b;
r = TILE_PIXEL(p, j, i, 0);
g = TILE_PIXEL(p, j, i, 1);
b = TILE_PIXEL(p, j, i, 2);
*dst++ = (0xff << 24) | (r << 16) | (g << 8) | b;
}
}
}
/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/
static void
b8g8r8a8_get_tile_rgba(const unsigned *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
const unsigned pixel = *src++;
TILE_PIXEL(p, j, i, 0) = (pixel >> 8) & 0xff;
TILE_PIXEL(p, j, i, 1) = (pixel >> 16) & 0xff;
TILE_PIXEL(p, j, i, 2) = (pixel >> 24) & 0xff;
TILE_PIXEL(p, j, i, 3) = (pixel >> 0) & 0xff;
}
}
}
static void
b8g8r8a8_put_tile_rgba(unsigned *dst,
unsigned w, unsigned h,
const uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
unsigned r, g, b, a;
r = TILE_PIXEL(p, j, i, 0);
g = TILE_PIXEL(p, j, i, 1);
b = TILE_PIXEL(p, j, i, 2);
a = TILE_PIXEL(p, j, i, 3);
*dst++ = (b << 24) | (g << 16) | (r << 8) | a;
}
}
}
/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/
static void
a1r5g5b5_get_tile_rgba(const ushort *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
const ushort pixel = *src++;
TILE_PIXEL(p, j, i, 0) = ((pixel >> 10) & 0x1f) * 255 / 31;
TILE_PIXEL(p, j, i, 1) = ((pixel >> 5) & 0x1f) * 255 / 31;
TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0x1f) * 255 / 31;
TILE_PIXEL(p, j, i, 3) = ((pixel >> 15) ) * 255;
}
}
}
static void
a1r5g5b5_put_tile_rgba(ushort *dst,
unsigned w, unsigned h,
const uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
unsigned r, g, b, a;
r = TILE_PIXEL(p, j, i, 0);
g = TILE_PIXEL(p, j, i, 1);
b = TILE_PIXEL(p, j, i, 2);
a = TILE_PIXEL(p, j, i, 3);
r = r >> 3; /* 5 bits */
g = g >> 3; /* 5 bits */
b = b >> 3; /* 5 bits */
a = a >> 7; /* 1 bit */
*dst++ = (a << 15) | (r << 10) | (g << 5) | b;
}
}
}
/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/
static void
a4r4g4b4_get_tile_rgba(const ushort *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
const ushort pixel = *src++;
TILE_PIXEL(p, j, i, 0) = ((pixel >> 8) & 0xf) * 255 / 15;
TILE_PIXEL(p, j, i, 1) = ((pixel >> 4) & 0xf) * 255 / 15;
TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0xf) * 255 / 15;
TILE_PIXEL(p, j, i, 3) = ((pixel >> 12) ) * 255 / 15;
}
}
}
static void
a4r4g4b4_put_tile_rgba(ushort *dst,
unsigned w, unsigned h,
const uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
unsigned r, g, b, a;
r = TILE_PIXEL(p, j, i, 0);
g = TILE_PIXEL(p, j, i, 1);
b = TILE_PIXEL(p, j, i, 2);
a = TILE_PIXEL(p, j, i, 3);
r >>= 4;
g >>= 4;
b >>= 4;
a >>= 4;
*dst++ = (a << 12) | (r << 16) | (g << 4) | b;
}
}
}
/*** PIPE_FORMAT_R5G6B5_UNORM ***/
static void
r5g6b5_get_tile_rgba(const ushort *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
const ushort pixel = *src++;
TILE_PIXEL(p, j, i, 0) = ((pixel >> 11) & 0x1f) * 255 / 31;
TILE_PIXEL(p, j, i, 1) = ((pixel >> 5) & 0x3f) * 255 / 63;
TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0x1f) * 255 / 31;
TILE_PIXEL(p, j, i, 3) = 255;
}
}
}
static void
r5g6b5_put_tile_rgba(ushort *dst,
unsigned w, unsigned h,
const uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
uint r = (uint) TILE_PIXEL(p, j, i, 0) * 31 / 255;
uint g = (uint) TILE_PIXEL(p, j, i, 1) * 63 / 255;
uint b = (uint) TILE_PIXEL(p, j, i, 2) * 31 / 255;
*dst++ = (r << 11) | (g << 5) | (b);
}
}
}
/*** PIPE_FORMAT_Z16_UNORM ***/
/**
* Return each Z value as four floats in [0,1].
*/
static void
z16_get_tile_rgba(const ushort *src,
unsigned w, unsigned h,
uint8_t *p)
{
const float scale = 1.0f / 65535.0f;
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
TILE_PIXEL(p, j, i, 0) =
TILE_PIXEL(p, j, i, 1) =
TILE_PIXEL(p, j, i, 2) =
TILE_PIXEL(p, j, i, 3) = *src++ * scale;
}
}
}
/*** PIPE_FORMAT_L8_UNORM ***/
static void
l8_get_tile_rgba(const ubyte *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++, src++) {
TILE_PIXEL(p, j, i, 0) =
TILE_PIXEL(p, j, i, 1) =
TILE_PIXEL(p, j, i, 2) = *src;
TILE_PIXEL(p, j, i, 3) = 255;
}
}
}
static void
l8_put_tile_rgba(ubyte *dst,
unsigned w, unsigned h,
const uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
unsigned r;
r = TILE_PIXEL(p, j, i, 0);
*dst++ = (ubyte) r;
}
}
}
/*** PIPE_FORMAT_A8_UNORM ***/
static void
a8_get_tile_rgba(const ubyte *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++, src++) {
TILE_PIXEL(p, j, i, 0) =
TILE_PIXEL(p, j, i, 1) =
TILE_PIXEL(p, j, i, 2) = 0;
TILE_PIXEL(p, j, i, 3) = *src;
}
}
}
static void
a8_put_tile_rgba(ubyte *dst,
unsigned w, unsigned h,
const uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
unsigned a;
a = TILE_PIXEL(p, j, i, 3);
*dst++ = (ubyte) a;
}
}
}
/*** PIPE_FORMAT_R16_SNORM ***/
static void
r16_get_tile_rgba(const short *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++, src++) {
TILE_PIXEL(p, j, i, 0) = MAX2(src[0] >> 7, 0);
TILE_PIXEL(p, j, i, 1) =
TILE_PIXEL(p, j, i, 2) = 0;
TILE_PIXEL(p, j, i, 3) = 255;
}
}
}
static void
r16_put_tile_rgba(short *dst,
unsigned w, unsigned h,
const uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++, dst++) {
dst[0] = TILE_PIXEL(p, j, i, 0) << 7;
}
}
}
/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/
static void
r16g16b16a16_get_tile_rgba(const short *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++, src += 4) {
TILE_PIXEL(p, j, i, 0) = src[0] >> 8;
TILE_PIXEL(p, j, i, 1) = src[1] >> 8;
TILE_PIXEL(p, j, i, 2) = src[2] >> 8;
TILE_PIXEL(p, j, i, 3) = src[3] >> 8;
}
}
}
static void
r16g16b16a16_put_tile_rgba(short *dst,
unsigned w, unsigned h,
const uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++, dst += 4) {
dst[0] = TILE_PIXEL(p, j, i, 0) << 8;
dst[1] = TILE_PIXEL(p, j, i, 1) << 8;
dst[2] = TILE_PIXEL(p, j, i, 2) << 8;
dst[3] = TILE_PIXEL(p, j, i, 3) << 8;
}
}
}
/*** PIPE_FORMAT_I8_UNORM ***/
static void
i8_get_tile_rgba(const ubyte *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++, src++) {
TILE_PIXEL(p, j, i, 0) =
TILE_PIXEL(p, j, i, 1) =
TILE_PIXEL(p, j, i, 2) =
TILE_PIXEL(p, j, i, 3) = *src;
}
}
}
static void
i8_put_tile_rgba(ubyte *dst,
unsigned w, unsigned h,
const uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
unsigned r;
r = TILE_PIXEL(p, j, i, 0);
*dst++ = (ubyte) r;
}
}
}
/*** PIPE_FORMAT_A8L8_UNORM ***/
static void
a8l8_get_tile_rgba(const ushort *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
ushort ra = *src++;
TILE_PIXEL(p, j, i, 0) =
TILE_PIXEL(p, j, i, 1) =
TILE_PIXEL(p, j, i, 2) = ra & 0xff;
TILE_PIXEL(p, j, i, 3) = ra >> 8;
}
}
}
static void
a8l8_put_tile_rgba(ushort *dst,
unsigned w, unsigned h,
const uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
unsigned r, a;
r = TILE_PIXEL(p, j, i, 0);
a = TILE_PIXEL(p, j, i, 3);
*dst++ = (a << 8) | r;
}
}
}
/*** PIPE_FORMAT_Z32_UNORM ***/
/**
* Return each Z value as four floats in [0,1].
*/
static void
z32_get_tile_rgba(const unsigned *src,
unsigned w, unsigned h,
uint8_t *p)
{
const double scale = 1.0 / (double) 0xffffffff;
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
TILE_PIXEL(p, j, i, 0) =
TILE_PIXEL(p, j, i, 1) =
TILE_PIXEL(p, j, i, 2) =
TILE_PIXEL(p, j, i, 3) = (float) (*src++ * scale);
}
}
}
/*** PIPE_FORMAT_S8Z24_UNORM ***/
/**
* Return Z component as four float in [0,1]. Stencil part ignored.
*/
static void
s8z24_get_tile_rgba(const unsigned *src,
unsigned w, unsigned h,
uint8_t *p)
{
const double scale = 1.0 / ((1 << 24) - 1);
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
TILE_PIXEL(p, j, i, 0) =
TILE_PIXEL(p, j, i, 1) =
TILE_PIXEL(p, j, i, 2) =
TILE_PIXEL(p, j, i, 3) = (float) (scale * (*src++ & 0xffffff));
}
}
}
/*** PIPE_FORMAT_Z24S8_UNORM ***/
/**
* Return Z component as four float in [0,1]. Stencil part ignored.
*/
static void
z24s8_get_tile_rgba(const unsigned *src,
unsigned w, unsigned h,
uint8_t *p)
{
const double scale = 1.0 / ((1 << 24) - 1);
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
TILE_PIXEL(p, j, i, 0) =
TILE_PIXEL(p, j, i, 1) =
TILE_PIXEL(p, j, i, 2) =
TILE_PIXEL(p, j, i, 3) = (float) (scale * (*src++ >> 8));
}
}
}
/*** PIPE_FORMAT_Z32_FLOAT ***/
/**
* Return each Z value as four floats in [0,1].
*/
static void
z32f_get_tile_rgba(const float *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
TILE_PIXEL(p, j, i, 0) =
TILE_PIXEL(p, j, i, 1) =
TILE_PIXEL(p, j, i, 2) =
TILE_PIXEL(p, j, i, 3) = *src++;
}
}
}
/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/
/**
* Convert YCbCr (or YCrCb) to RGBA.
*/
static void
ycbcr_get_tile_rgba(const ushort *src,
unsigned w, unsigned h,
uint8_t *p,
boolean rev)
{
unsigned i, j;
for (i = 0; i < h; i++) {
/* do two texels at a time */
for (j = 0; j < (w & ~1); j += 2, src += 2) {
const ushort t0 = src[0];
const ushort t1 = src[1];
const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */
const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */
ubyte cb, cr;
float r, g, b;
if (rev) {
cb = t1 & 0xff; /* chroma U */
cr = t0 & 0xff; /* chroma V */
}
else {
cb = t0 & 0xff; /* chroma U */
cr = t1 & 0xff; /* chroma V */
}
/* even pixel: y0,cr,cb */
r = 1.164f * (y0-16) + 1.596f * (cr-128);
g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
b = 1.164f * (y0-16) + 2.018f * (cb-128);
TILE_PIXEL(p, j, i, 0) = r;
TILE_PIXEL(p, j, i, 1) = g;
TILE_PIXEL(p, j, i, 2) = b;
TILE_PIXEL(p, j, i, 3) = 255;
/* odd pixel: use y1,cr,cb */
r = 1.164f * (y1-16) + 1.596f * (cr-128);
g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
b = 1.164f * (y1-16) + 2.018f * (cb-128);
TILE_PIXEL(p, j + 1, i, 0) = r;
TILE_PIXEL(p, j + 1, i, 1) = g;
TILE_PIXEL(p, j + 1, i, 2) = b;
TILE_PIXEL(p, j + 1, i, 3) = 255;
}
/* do the last texel */
if (w & 1) {
const ushort t0 = src[0];
const ushort t1 = src[1];
const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */
ubyte cb, cr;
float r, g, b;
if (rev) {
cb = t1 & 0xff; /* chroma U */
cr = t0 & 0xff; /* chroma V */
}
else {
cb = t0 & 0xff; /* chroma U */
cr = t1 & 0xff; /* chroma V */
}
/* even pixel: y0,cr,cb */
r = 1.164f * (y0-16) + 1.596f * (cr-128);
g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
b = 1.164f * (y0-16) + 2.018f * (cb-128);
TILE_PIXEL(p, j, i, 0) = r;
TILE_PIXEL(p, j, i, 1) = g;
TILE_PIXEL(p, j, i, 2) = b;
TILE_PIXEL(p, j, i, 3) = 255;
}
}
}
static void
fake_get_tile_rgba(const ushort *src,
unsigned w, unsigned h,
uint8_t *p)
{
unsigned i, j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
TILE_PIXEL(p, j, i, 0) =
TILE_PIXEL(p, j, i, 1) =
TILE_PIXEL(p, j, i, 2) =
TILE_PIXEL(p, j, i, 3) = (i ^ j) & 1 ? 255 : 0;
}
}
}
static void
lp_tile_raw_to_rgba_soa(enum pipe_format format,
void *src,
uint w, uint h,
uint8_t *p)
{
switch (format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, p);
break;
case PIPE_FORMAT_X8R8G8B8_UNORM:
x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, p);
break;
case PIPE_FORMAT_B8G8R8A8_UNORM:
b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, p);
break;
case PIPE_FORMAT_A1R5G5B5_UNORM:
a1r5g5b5_get_tile_rgba((ushort *) src, w, h, p);
break;
case PIPE_FORMAT_A4R4G4B4_UNORM:
a4r4g4b4_get_tile_rgba((ushort *) src, w, h, p);
break;
case PIPE_FORMAT_R5G6B5_UNORM:
r5g6b5_get_tile_rgba((ushort *) src, w, h, p);
break;
case PIPE_FORMAT_L8_UNORM:
l8_get_tile_rgba((ubyte *) src, w, h, p);
break;
case PIPE_FORMAT_A8_UNORM:
a8_get_tile_rgba((ubyte *) src, w, h, p);
break;
case PIPE_FORMAT_I8_UNORM:
i8_get_tile_rgba((ubyte *) src, w, h, p);
break;
case PIPE_FORMAT_A8L8_UNORM:
a8l8_get_tile_rgba((ushort *) src, w, h, p);
break;
case PIPE_FORMAT_R16_SNORM:
r16_get_tile_rgba((short *) src, w, h, p);
break;
case PIPE_FORMAT_R16G16B16A16_SNORM:
r16g16b16a16_get_tile_rgba((short *) src, w, h, p);
break;
case PIPE_FORMAT_Z16_UNORM:
z16_get_tile_rgba((ushort *) src, w, h, p);
break;
case PIPE_FORMAT_Z32_UNORM:
z32_get_tile_rgba((unsigned *) src, w, h, p);
break;
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
s8z24_get_tile_rgba((unsigned *) src, w, h, p);
break;
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
z24s8_get_tile_rgba((unsigned *) src, w, h, p);
break;
case PIPE_FORMAT_Z32_FLOAT:
z32f_get_tile_rgba((float *) src, w, h, p);
break;
case PIPE_FORMAT_YCBCR:
ycbcr_get_tile_rgba((ushort *) src, w, h, p, FALSE);
break;
case PIPE_FORMAT_YCBCR_REV:
ycbcr_get_tile_rgba((ushort *) src, w, h, p, TRUE);
break;
default:
debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format));
fake_get_tile_rgba(src, w, h, p);
}
}
void
lp_get_tile_rgba_soa(struct pipe_transfer *pt,
uint x, uint y,
uint8_t *p)
{
uint w = TILE_SIZE, h = TILE_SIZE;
void *packed;
if (pipe_clip_tile(x, y, &w, &h, pt))
return;
packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size);
if (!packed)
return;
if(pt->format == PIPE_FORMAT_YCBCR || pt->format == PIPE_FORMAT_YCBCR_REV)
assert((x & 1) == 0);
pipe_get_tile_raw(pt, x, y, w, h, packed, 0);
lp_tile_raw_to_rgba_soa(pt->format, packed, w, h, p);
FREE(packed);
}
void
lp_put_tile_rgba_soa(struct pipe_transfer *pt,
uint x, uint y,
const uint8_t *p)
{
uint w = TILE_SIZE, h = TILE_SIZE;
void *packed;
if (pipe_clip_tile(x, y, &w, &h, pt))
return;
packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size);
if (!packed)
return;
switch (pt->format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p);
break;
case PIPE_FORMAT_X8R8G8B8_UNORM:
x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p);
break;
case PIPE_FORMAT_B8G8R8A8_UNORM:
b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p);
break;
case PIPE_FORMAT_A1R5G5B5_UNORM:
a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p);
break;
case PIPE_FORMAT_R5G6B5_UNORM:
r5g6b5_put_tile_rgba((ushort *) packed, w, h, p);
break;
case PIPE_FORMAT_R8G8B8A8_UNORM:
assert(0);
break;
case PIPE_FORMAT_A4R4G4B4_UNORM:
a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p);
break;
case PIPE_FORMAT_L8_UNORM:
l8_put_tile_rgba((ubyte *) packed, w, h, p);
break;
case PIPE_FORMAT_A8_UNORM:
a8_put_tile_rgba((ubyte *) packed, w, h, p);
break;
case PIPE_FORMAT_I8_UNORM:
i8_put_tile_rgba((ubyte *) packed, w, h, p);
break;
case PIPE_FORMAT_A8L8_UNORM:
a8l8_put_tile_rgba((ushort *) packed, w, h, p);
break;
case PIPE_FORMAT_R16_SNORM:
r16_put_tile_rgba((short *) packed, w, h, p);
break;
case PIPE_FORMAT_R16G16B16A16_SNORM:
r16g16b16a16_put_tile_rgba((short *) packed, w, h, p);
break;
case PIPE_FORMAT_Z16_UNORM:
/*z16_put_tile_rgba((ushort *) packed, w, h, p);*/
break;
case PIPE_FORMAT_Z32_UNORM:
/*z32_put_tile_rgba((unsigned *) packed, w, h, p);*/
break;
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
/*s8z24_put_tile_rgba((unsigned *) packed, w, h, p);*/
break;
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
/*z24s8_put_tile_rgba((unsigned *) packed, w, h, p);*/
break;
default:
debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(pt->format));
}
pipe_put_tile_raw(pt, x, y, w, h, packed, 0);
FREE(packed);
}

View File

@ -64,14 +64,18 @@ tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH];
void
lp_get_tile_rgba_soa(struct pipe_transfer *pt,
uint x, uint y,
uint8_t *p);
lp_tile_read_4ub(enum pipe_format format,
uint8_t *dst,
const void *src, unsigned src_stride,
unsigned x, unsigned y, unsigned w, unsigned h);
void
lp_put_tile_rgba_soa(struct pipe_transfer *pt,
uint x, uint y,
const uint8_t *p);
lp_tile_write_4ub(enum pipe_format format,
const uint8_t *src,
void *dst, unsigned dst_stride,
unsigned x, unsigned y, unsigned w, unsigned h);
#ifdef __cplusplus

View File

@ -0,0 +1,278 @@
#!/usr/bin/env python
'''
/**************************************************************************
*
* Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* @file
* Pixel format accessor functions.
*
* @author Jose Fonseca <jfonseca@vmware.com>
*/
'''
import sys
import os.path
sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '../../auxiliary/util'))
from u_format_access import *
def generate_format_read(format, dst_type, dst_native_type, dst_suffix):
'''Generate the function to read pixels from a particular format'''
name = short_name(format)
src_native_type = native_type(format)
print 'static void'
print 'lp_tile_%s_read_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type)
print '{'
print ' unsigned x, y;'
print ' const uint8_t *src_row = src + y0*src_stride;'
print ' for (y = 0; y < h; ++y) {'
print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride())
print ' for (x = 0; x < w; ++x) {'
names = ['']*4
if format.colorspace == 'rgb':
for i in range(4):
swizzle = format.out_swizzle[i]
if swizzle < 4:
names[swizzle] += 'rgba'[i]
elif format.colorspace == 'zs':
swizzle = format.out_swizzle[0]
if swizzle < 4:
names[swizzle] = 'z'
else:
assert False
else:
assert False
if format.layout == ARITH:
print ' %s pixel = *src_pixel++;' % src_native_type
shift = 0;
for i in range(4):
src_type = format.in_types[i]
width = src_type.size
if names[i]:
value = 'pixel'
mask = (1 << width) - 1
if shift:
value = '(%s >> %u)' % (value, shift)
if shift + width < format.block_size():
value = '(%s & 0x%x)' % (value, mask)
value = conversion_expr(src_type, dst_type, dst_native_type, value)
print ' %s %s = %s;' % (dst_native_type, names[i], value)
shift += width
elif format.layout == ARRAY:
for i in range(4):
src_type = format.in_types[i]
if names[i]:
value = '(*src_pixel++)'
value = conversion_expr(src_type, dst_type, dst_native_type, value)
print ' %s %s = %s;' % (dst_native_type, names[i], value)
else:
assert False
for i in range(4):
if format.colorspace == 'rgb':
swizzle = format.out_swizzle[i]
if swizzle < 4:
value = names[swizzle]
elif swizzle == SWIZZLE_0:
value = '0'
elif swizzle == SWIZZLE_1:
value = '1'
else:
assert False
elif format.colorspace == 'zs':
if i < 3:
value = 'z'
else:
value = '1'
else:
assert False
print ' TILE_PIXEL(dst, x, y, %u) = %s; /* %s */' % (i, value, 'rgba'[i])
print ' }'
print ' src_row += src_stride;'
print ' }'
print '}'
print
def generate_format_write(format, src_type, src_native_type, src_suffix):
'''Generate the function to write pixels to a particular format'''
name = short_name(format)
dst_native_type = native_type(format)
print 'static void'
print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type)
print '{'
print ' unsigned x, y;'
print ' uint8_t *dst_row = dst + y0*dst_stride;'
print ' for (y = 0; y < h; ++y) {'
print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride())
print ' for (x = 0; x < w; ++x) {'
inv_swizzle = [None]*4
if format.colorspace == 'rgb':
for i in range(4):
swizzle = format.out_swizzle[i]
if swizzle < 4:
inv_swizzle[swizzle] = i
elif format.colorspace == 'zs':
swizzle = format.out_swizzle[0]
if swizzle < 4:
inv_swizzle[swizzle] = 0
else:
assert False
if format.layout == ARITH:
print ' %s pixel = 0;' % dst_native_type
shift = 0;
for i in range(4):
dst_type = format.in_types[i]
width = dst_type.size
if inv_swizzle[i] is not None:
value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i]
value = conversion_expr(src_type, dst_type, dst_native_type, value)
if shift:
value = '(%s << %u)' % (value, shift)
print ' pixel |= %s;' % value
shift += width
print ' *dst_pixel++ = pixel;'
elif format.layout == ARRAY:
for i in range(4):
dst_type = format.in_types[i]
if inv_swizzle[i] is not None:
value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i]
value = conversion_expr(src_type, dst_type, dst_native_type, value)
print ' *dst_pixel++ = %s;' % value
else:
assert False
print ' }'
print ' dst_row += dst_stride;'
print ' }'
print '}'
print
def generate_read(formats, dst_type, dst_native_type, dst_suffix):
'''Generate the dispatch function to read pixels from any format'''
for format in formats:
if is_format_supported(format):
generate_format_read(format, dst_type, dst_native_type, dst_suffix)
print 'void'
print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type)
print '{'
print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type
print ' switch(format) {'
for format in formats:
if is_format_supported(format):
print ' case %s:' % format.name
print ' func = &lp_tile_%s_read_%s;' % (short_name(format), dst_suffix)
print ' break;'
print ' default:'
print ' debug_printf("unsupported format\\n");'
print ' return;'
print ' }'
print ' func(dst, (const uint8_t *)src, src_stride, x, y, w, h);'
print '}'
print
def generate_write(formats, src_type, src_native_type, src_suffix):
'''Generate the dispatch function to write pixels to any format'''
for format in formats:
if is_format_supported(format):
generate_format_write(format, src_type, src_native_type, src_suffix)
print 'void'
print 'lp_tile_write_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type)
print '{'
print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type
print ' switch(format) {'
for format in formats:
if is_format_supported(format):
print ' case %s:' % format.name
print ' func = &lp_tile_%s_write_%s;' % (short_name(format), src_suffix)
print ' break;'
print ' default:'
print ' debug_printf("unsupported format\\n");'
print ' return;'
print ' }'
print ' func(src, (uint8_t *)dst, dst_stride, x, y, w, h);'
print '}'
print
def main():
formats = []
for arg in sys.argv[1:]:
formats.extend(parse(arg))
print '/* This file is autogenerated by lp_tile_soa.py from u_format.csv. Do not edit directly. */'
print
# This will print the copyright message on the top of this file
print __doc__.strip()
print
print '#include "pipe/p_compiler.h"'
print '#include "util/u_format.h"'
print '#include "util/u_math.h"'
print '#include "lp_tile_soa.h"'
print
print 'const unsigned char'
print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {'
print ' { 0, 1, 4, 5, 8, 9, 12, 13},'
print ' { 2, 3, 6, 7, 10, 11, 14, 15}'
print '};'
print
generate_clamp()
type = Type(UNSIGNED, True, 8)
native_type = 'uint8_t'
suffix = '4ub'
generate_read(formats, type, native_type, suffix)
generate_write(formats, type, native_type, suffix)
if __name__ == '__main__':
main()