Improved the performance of software fallbacks by not waiting for idle

in every single span function. Instead flush and wait in the
SpanRenderStart hook and in wrappers around _swrast_Copy/Draw/ReadPixels.
Misc. cleanups in savagespan.c while I'm there.
This commit is contained in:
Felix Kuehling 2005-01-02 01:22:10 +00:00
parent 1067ce0cea
commit 7a231da442
2 changed files with 85 additions and 41 deletions

View File

@ -33,6 +33,7 @@
#define DBG 0
#define LOCAL_VARS \
savageContextPtr imesa = SAVAGE_CONTEXT(ctx); \
__DRIdrawablePrivate *dPriv = imesa->mesa_drawable; \
savageScreenPrivate *savageScreen = imesa->savageScreen; \
GLuint cpp = savageScreen->cpp; \
@ -48,6 +49,7 @@
(void) read_buf; (void) buf; (void) p
#define LOCAL_DEPTH_VARS \
savageContextPtr imesa = SAVAGE_CONTEXT(ctx); \
__DRIdrawablePrivate *dPriv = imesa->mesa_drawable; \
savageScreenPrivate *savageScreen = imesa->savageScreen; \
GLuint zpp = savageScreen->zpp; \
@ -77,8 +79,7 @@
#define Y_FLIP(_y) (height - _y - 1)
#define HW_LOCK() savageContextPtr imesa = SAVAGE_CONTEXT(ctx); \
WAIT_IDLE_EMPTY;\
#define HW_LOCK()
#define HW_CLIPLOOP() \
do { \
@ -95,11 +96,7 @@
} \
} while (0)
#if 0
#define HW_UNLOCK() \
UNLOCK_HARDWARE(imesa);
#endif
#define HW_UNLOCK() { }
#define HW_UNLOCK()
/* 16 bit, 565 rgb color spanline and pixel functions
@ -164,55 +161,47 @@ do { \
/* 16 bit depthbuffer functions.
*/
#define WRITE_DEPTH( _x, _y, d ) \
do{ \
*(GLushort *)(buf + (_x<<1) + _y*pitch) = d; \
}while(0)
*(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) = d
#define READ_DEPTH( d, _x, _y ) \
do{ \
d = *(GLushort *)(buf + (_x<<1) + _y*pitch); \
}while(0)
/* d = 0xffff; */
d = *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch)
#define TAG(x) savage##x##_16
#include "depthtmp.h"
/* 8-bit stencil /24-bit depth depthbuffer functions.
*/
#define WRITE_DEPTH( _x, _y, d ) { \
GLuint tmp = *(GLuint *)(buf + (_x<<2) + _y*pitch); \
tmp &= 0xFF000000; \
tmp |= d; \
#define WRITE_DEPTH( _x, _y, d ) do { \
GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch); \
tmp &= 0xFF000000; \
tmp |= d; \
*(GLuint *)(buf + (_x<<2) + _y*pitch) = tmp; \
}
} while(0)
#define READ_DEPTH( d, _x, _y ) \
d = *(GLuint *)(buf + (_x<<2) + _y*pitch) & 0x00FFFFFF;
/* d = 0x00ffffff; */
d = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch)
#define TAG(x) savage##x##_8_24
#include "depthtmp.h"
#define WRITE_STENCIL( _x, _y, d ) { \
GLuint tmp = *(GLuint *)(buf + (_x<<2) + _y*pitch); \
tmp &= 0x00FFFFFF; \
tmp |= (((GLuint)d)<<24) & 0xFF000000; \
*(GLuint *)(buf + (_x<<2) + _y*pitch) = tmp; \
}
#define READ_STENCIL( d, _x, _y ) \
d = (GLstencil)((*(GLuint *)(buf + (_x<<2) + _y*pitch) & 0xFF000000) >> 24);
#define WRITE_STENCIL( _x, _y, d ) do { \
GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch); \
tmp &= 0x00FFFFFF; \
tmp |= (((GLuint)d)<<24) & 0xFF000000; \
*(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) = tmp; \
} while(0)
#define READ_STENCIL( d, _x, _y ) \
d = (GLstencil)((*(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) & 0xFF000000) >> 24)
#define TAG(x) savage##x##_8_24
#include "stenciltmp.h"
/*
* This function is called to specify which buffer to read and write
@ -241,6 +230,58 @@ static void savageDDSetBuffer(GLcontext *ctx, GLframebuffer *buffer,
? imesa->driDrawable : imesa->driReadable;
}
/*
* Wrappers around _swrast_Copy/Draw/ReadPixels that make sure all
* primitives are flushed and the hardware is idle before accessing
* the frame buffer.
*/
static void
savageCopyPixels( GLcontext *ctx,
GLint srcx, GLint srcy, GLsizei width, GLsizei height,
GLint destx, GLint desty,
GLenum type )
{
savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
FLUSH_BATCH(imesa);
WAIT_IDLE_EMPTY;
_swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
}
static void
savageDrawPixels( GLcontext *ctx,
GLint x, GLint y,
GLsizei width, GLsizei height,
GLenum format, GLenum type,
const struct gl_pixelstore_attrib *packing,
const GLvoid *pixels )
{
savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
FLUSH_BATCH(imesa);
WAIT_IDLE_EMPTY;
_swrast_DrawPixels(ctx, x, y, width, height, format, type, packing, pixels);
}
static void
savageReadPixels( GLcontext *ctx,
GLint x, GLint y, GLsizei width, GLsizei height,
GLenum format, GLenum type,
const struct gl_pixelstore_attrib *packing,
GLvoid *pixels )
{
savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
FLUSH_BATCH(imesa);
WAIT_IDLE_EMPTY;
_swrast_ReadPixels(ctx, x, y, width, height, format, type, packing, pixels);
}
/*
* Make sure the hardware is idle when span-rendering.
*/
static void savageSpanRenderStart( GLcontext *ctx )
{
savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
FLUSH_BATCH(imesa);
WAIT_IDLE_EMPTY;
}
void savageDDInitSpanFuncs( GLcontext *ctx )
{
@ -277,6 +318,7 @@ void savageDDInitSpanFuncs( GLcontext *ctx )
case 2:
swdd->ReadDepthSpan = savageReadDepthSpan_16;
swdd->WriteDepthSpan = savageWriteDepthSpan_16;
swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_16;
swdd->ReadDepthPixels = savageReadDepthPixels_16;
swdd->WriteDepthPixels = savageWriteDepthPixels_16;
@ -284,6 +326,7 @@ void savageDDInitSpanFuncs( GLcontext *ctx )
case 4:
swdd->ReadDepthSpan = savageReadDepthSpan_8_24;
swdd->WriteDepthSpan = savageWriteDepthSpan_8_24;
swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_8_24;
swdd->ReadDepthPixels = savageReadDepthPixels_8_24;
swdd->WriteDepthPixels = savageWriteDepthPixels_8_24;
swdd->ReadStencilSpan = savageReadStencilSpan_8_24;
@ -301,11 +344,13 @@ void savageDDInitSpanFuncs( GLcontext *ctx )
swdd->ReadCI32Span =NULL;
swdd->ReadCI32Pixels =NULL;
swdd->SpanRenderStart = savageSpanRenderStart;
/* Pixel path fallbacks.
*/
ctx->Driver.Accum = _swrast_Accum;
ctx->Driver.Bitmap = _swrast_Bitmap;
ctx->Driver.CopyPixels = _swrast_CopyPixels;
ctx->Driver.DrawPixels = _swrast_DrawPixels;
ctx->Driver.ReadPixels = _swrast_ReadPixels;
ctx->Driver.CopyPixels = savageCopyPixels;
ctx->Driver.DrawPixels = savageDrawPixels;
ctx->Driver.ReadPixels = savageReadPixels;
}

View File

@ -1120,7 +1120,6 @@ void savageFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
imesa->Fallback |= bit;
if (oldfallback == 0) {
/* the first fallback */
FLUSH_BATCH( imesa );
_swsetup_Wakeup( ctx );
imesa->RenderIndex = ~0;
}