i965: Move repeat-instruction-suppression to batchbuffer core

Move the tracking of the last emitted instructions into the core
batchbuffer routines and take advantage of the shadow batch copy to
avoid extra memory allocations and copies.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2011-02-20 13:23:47 +00:00
parent 8d68a90e22
commit aac120977d
9 changed files with 119 additions and 151 deletions

View File

@ -233,18 +233,16 @@ const struct brw_tracked_state brw_cc_unit = {
static void upload_blend_constant_color(struct brw_context *brw) static void upload_blend_constant_color(struct brw_context *brw)
{ {
struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel;
struct brw_blend_constant_color bcc; struct gl_context *ctx = &intel->ctx;
memset(&bcc, 0, sizeof(bcc)); BEGIN_BATCH(5);
bcc.header.opcode = _3DSTATE_BLEND_CONSTANT_COLOR; OUT_BATCH(_3DSTATE_BLEND_CONSTANT_COLOR << 16 | (5-2));
bcc.header.length = sizeof(bcc)/4-2; OUT_BATCH(ctx->Color.BlendColor[0]);
bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; OUT_BATCH(ctx->Color.BlendColor[1]);
bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; OUT_BATCH(ctx->Color.BlendColor[2]);
bcc.blend_constant_color[2] = ctx->Color.BlendColor[2]; OUT_BATCH(ctx->Color.BlendColor[3]);
bcc.blend_constant_color[3] = ctx->Color.BlendColor[3]; CACHED_BATCH();
BRW_CACHED_BATCH_STRUCT(brw, &bcc);
} }
const struct brw_tracked_state brw_blend_constant_color = { const struct brw_tracked_state brw_blend_constant_color = {

View File

@ -146,22 +146,24 @@ const struct brw_tracked_state brw_curbe_offsets = {
*/ */
void brw_upload_cs_urb_state(struct brw_context *brw) void brw_upload_cs_urb_state(struct brw_context *brw)
{ {
struct brw_cs_urb_state cs_urb; struct intel_context *intel = &brw->intel;
memset(&cs_urb, 0, sizeof(cs_urb));
BEGIN_BATCH(2);
/* It appears that this is the state packet for the CS unit, ie. the /* It appears that this is the state packet for the CS unit, ie. the
* urb entries detailed here are housed in the CS range from the * urb entries detailed here are housed in the CS range from the
* URB_FENCE command. * URB_FENCE command.
*/ */
cs_urb.header.opcode = CMD_CS_URB_STATE; OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2));
cs_urb.header.length = sizeof(cs_urb)/4 - 2;
/* BRW_NEW_URB_FENCE */ /* BRW_NEW_URB_FENCE */
cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries; if (brw->urb.csize == 0) {
cs_urb.bits0.urb_entry_size = brw->urb.csize - 1; OUT_BATCH(0);
} else {
assert(brw->urb.nr_cs_entries); /* BRW_NEW_URB_FENCE */
BRW_CACHED_BATCH_STRUCT(brw, &cs_urb); assert(brw->urb.nr_cs_entries);
OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries);
}
CACHED_BATCH();
} }
static GLfloat fixed_plane[6][4] = { static GLfloat fixed_plane[6][4] = {

View File

@ -301,16 +301,15 @@ const struct brw_tracked_state brw_depthbuffer = {
static void upload_polygon_stipple(struct brw_context *brw) static void upload_polygon_stipple(struct brw_context *brw)
{ {
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx; struct gl_context *ctx = &brw->intel.ctx;
struct brw_polygon_stipple bps;
GLuint i; GLuint i;
if (!ctx->Polygon.StippleFlag) if (!ctx->Polygon.StippleFlag)
return; return;
memset(&bps, 0, sizeof(bps)); BEGIN_BATCH(33);
bps.header.opcode = _3DSTATE_POLY_STIPPLE_PATTERN; OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
bps.header.length = sizeof(bps)/4-2;
/* Polygon stipple is provided in OpenGL order, i.e. bottom /* Polygon stipple is provided in OpenGL order, i.e. bottom
* row first. If we're rendering to a window (i.e. the * row first. If we're rendering to a window (i.e. the
@ -321,14 +320,13 @@ static void upload_polygon_stipple(struct brw_context *brw)
*/ */
if (ctx->DrawBuffer->Name == 0) { if (ctx->DrawBuffer->Name == 0) {
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */ OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
} }
else { else {
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */ OUT_BATCH(ctx->PolygonStipple[i]);
} }
CACHED_BATCH();
BRW_CACHED_BATCH_STRUCT(brw, &bps);
} }
const struct brw_tracked_state brw_polygon_stipple = { const struct brw_tracked_state brw_polygon_stipple = {
@ -347,15 +345,14 @@ const struct brw_tracked_state brw_polygon_stipple = {
static void upload_polygon_stipple_offset(struct brw_context *brw) static void upload_polygon_stipple_offset(struct brw_context *brw)
{ {
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx; struct gl_context *ctx = &brw->intel.ctx;
struct brw_polygon_stipple_offset bpso;
if (!ctx->Polygon.StippleFlag) if (!ctx->Polygon.StippleFlag)
return; return;
memset(&bpso, 0, sizeof(bpso)); BEGIN_BATCH(2);
bpso.header.opcode = _3DSTATE_POLY_STIPPLE_OFFSET; OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
bpso.header.length = sizeof(bpso)/4-2;
/* If we're drawing to a system window (ctx->DrawBuffer->Name == 0), /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
* we have to invert the Y axis in order to match the OpenGL * we have to invert the Y axis in order to match the OpenGL
@ -365,16 +362,11 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
* system works just fine, and there's no window system to * system works just fine, and there's no window system to
* worry about. * worry about.
*/ */
if (brw->intel.ctx.DrawBuffer->Name == 0) { if (brw->intel.ctx.DrawBuffer->Name == 0)
bpso.bits0.x_offset = 0; OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
bpso.bits0.y_offset = (32 - (ctx->DrawBuffer->Height & 31)) & 31; else
} OUT_BATCH(0);
else { CACHED_BATCH();
bpso.bits0.y_offset = 0;
bpso.bits0.x_offset = 0;
}
BRW_CACHED_BATCH_STRUCT(brw, &bpso);
} }
#define _NEW_WINDOW_POS 0x40000000 #define _NEW_WINDOW_POS 0x40000000
@ -393,18 +385,17 @@ const struct brw_tracked_state brw_polygon_stipple_offset = {
*/ */
static void upload_aa_line_parameters(struct brw_context *brw) static void upload_aa_line_parameters(struct brw_context *brw)
{ {
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx; struct gl_context *ctx = &brw->intel.ctx;
struct brw_aa_line_parameters balp;
if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters) if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
return; return;
OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
/* use legacy aa line coverage computation */ /* use legacy aa line coverage computation */
memset(&balp, 0, sizeof(balp)); OUT_BATCH(0);
balp.header.opcode = _3DSTATE_AA_LINE_PARAMETERS; OUT_BATCH(0);
balp.header.length = sizeof(balp) / 4 - 2; CACHED_BATCH();
BRW_CACHED_BATCH_STRUCT(brw, &balp);
} }
const struct brw_tracked_state brw_aa_line_parameters = { const struct brw_tracked_state brw_aa_line_parameters = {
@ -422,28 +413,21 @@ const struct brw_tracked_state brw_aa_line_parameters = {
static void upload_line_stipple(struct brw_context *brw) static void upload_line_stipple(struct brw_context *brw)
{ {
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx; struct gl_context *ctx = &brw->intel.ctx;
struct brw_line_stipple bls;
GLfloat tmp; GLfloat tmp;
GLint tmpi; GLint tmpi;
if (!ctx->Line.StippleFlag) if (!ctx->Line.StippleFlag)
return; return;
memset(&bls, 0, sizeof(bls)); BEGIN_BATCH(3);
bls.header.opcode = _3DSTATE_LINE_STIPPLE_PATTERN; OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
bls.header.length = sizeof(bls)/4 - 2; OUT_BATCH(ctx->Line.StipplePattern);
bls.bits0.pattern = ctx->Line.StipplePattern;
bls.bits1.repeat_count = ctx->Line.StippleFactor;
tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
tmpi = tmp * (1<<13); tmpi = tmp * (1<<13);
OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
CACHED_BATCH();
bls.bits1.inverse_repeat_count = tmpi;
BRW_CACHED_BATCH_STRUCT(brw, &bls);
} }
const struct brw_tracked_state brw_line_stipple = { const struct brw_tracked_state brw_line_stipple = {

View File

@ -166,13 +166,7 @@ void brw_destroy_caches( struct brw_context *brw );
*/ */
#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data(&brw->intel, (s), \ #define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data(&brw->intel, (s), \
sizeof(*(s)), false) sizeof(*(s)), false)
#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
GLboolean brw_cached_batch_struct( struct brw_context *brw,
const void *data,
GLuint sz );
void brw_destroy_batch_cache( struct brw_context *brw );
void brw_clear_batch_cache( struct brw_context *brw );
void *brw_state_batch(struct brw_context *brw, void *brw_state_batch(struct brw_context *brw,
int size, int size,
int alignment, int alignment,

View File

@ -29,75 +29,10 @@
* Keith Whitwell <keith@tungstengraphics.com> * Keith Whitwell <keith@tungstengraphics.com>
*/ */
#include "brw_state.h" #include "brw_state.h"
#include "intel_batchbuffer.h" #include "intel_batchbuffer.h"
#include "main/imports.h" #include "main/imports.h"
/* A facility similar to the data caching code above, which aims to
* prevent identical commands being issued repeatedly.
*/
GLboolean brw_cached_batch_struct( struct brw_context *brw,
const void *data,
GLuint sz )
{
struct brw_cached_batch_item *item = brw->cached_batch_items;
struct header *newheader = (struct header *)data;
if (brw->emit_state_always) {
intel_batchbuffer_data(&brw->intel, data, sz, false);
return GL_TRUE;
}
while (item) {
if (item->header->opcode == newheader->opcode) {
if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
return GL_FALSE;
if (item->sz != sz) {
free(item->header);
item->header = malloc(sz);
item->sz = sz;
}
goto emit;
}
item = item->next;
}
assert(!item);
item = CALLOC_STRUCT(brw_cached_batch_item);
item->header = malloc(sz);
item->sz = sz;
item->next = brw->cached_batch_items;
brw->cached_batch_items = item;
emit:
memcpy(item->header, newheader, sz);
intel_batchbuffer_data(&brw->intel, data, sz, false);
return GL_TRUE;
}
void brw_clear_batch_cache( struct brw_context *brw )
{
struct brw_cached_batch_item *item = brw->cached_batch_items;
while (item) {
struct brw_cached_batch_item *next = item->next;
free((void *)item->header);
free(item);
item = next;
}
brw->cached_batch_items = NULL;
}
void brw_destroy_batch_cache( struct brw_context *brw )
{
brw_clear_batch_cache(brw);
}
/** /**
* Allocates a block of space in the batchbuffer for indirect state. * Allocates a block of space in the batchbuffer for indirect state.
* *

View File

@ -176,7 +176,6 @@ void brw_init_state( struct brw_context *brw )
void brw_destroy_state( struct brw_context *brw ) void brw_destroy_state( struct brw_context *brw )
{ {
brw_destroy_caches(brw); brw_destroy_caches(brw);
brw_destroy_batch_cache(brw);
} }
/*********************************************************************** /***********************************************************************
@ -383,9 +382,6 @@ void brw_validate_state( struct brw_context *brw )
state->brw == 0) state->brw == 0)
return; return;
if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
brw_clear_batch_cache(brw);
brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */ brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */
/* do prepare stage for all atoms */ /* do prepare stage for all atoms */

View File

@ -33,6 +33,25 @@
#include "intel_bufmgr.h" #include "intel_bufmgr.h"
#include "intel_buffers.h" #include "intel_buffers.h"
struct cached_batch_item {
struct cached_batch_item *next;
uint16_t header;
uint16_t size;
};
static void clear_cache( struct intel_context *intel )
{
struct cached_batch_item *item = intel->batch.cached_items;
while (item) {
struct cached_batch_item *next = item->next;
free(item);
item = next;
}
intel->batch.cached_items = NULL;
}
void void
intel_batchbuffer_reset(struct intel_context *intel) intel_batchbuffer_reset(struct intel_context *intel)
{ {
@ -40,6 +59,7 @@ intel_batchbuffer_reset(struct intel_context *intel)
drm_intel_bo_unreference(intel->batch.bo); drm_intel_bo_unreference(intel->batch.bo);
intel->batch.bo = NULL; intel->batch.bo = NULL;
} }
clear_cache(intel);
intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",
intel->maxBatchSize, 4096); intel->maxBatchSize, 4096);
@ -53,6 +73,7 @@ void
intel_batchbuffer_free(struct intel_context *intel) intel_batchbuffer_free(struct intel_context *intel)
{ {
drm_intel_bo_unreference(intel->batch.bo); drm_intel_bo_unreference(intel->batch.bo);
clear_cache(intel);
} }
@ -165,7 +186,8 @@ intel_batchbuffer_emit_reloc(struct intel_context *intel,
ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used, ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used,
buffer, delta, buffer, delta,
read_domains, write_domain); read_domains, write_domain);
assert (ret == 0); assert(ret == 0);
(void)ret;
/* /*
* Using the old buffer offset, write in what the right data would be, in case * Using the old buffer offset, write in what the right data would be, in case
@ -191,7 +213,8 @@ intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,
ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used, ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used,
buffer, delta, buffer, delta,
read_domains, write_domain); read_domains, write_domain);
assert (ret == 0); assert(ret == 0);
(void)ret;
/* /*
* Using the old buffer offset, write in what the right data would * Using the old buffer offset, write in what the right data would
@ -213,6 +236,47 @@ intel_batchbuffer_data(struct intel_context *intel,
intel->batch.used += bytes >> 2; intel->batch.used += bytes >> 2;
} }
void
intel_batchbuffer_cached_advance(struct intel_context *intel)
{
struct cached_batch_item **prev = &intel->batch.cached_items, *item;
uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t);
uint32_t *start = intel->batch.map + intel->batch.emit;
uint16_t op = *start >> 16;
while (*prev) {
uint32_t *old;
item = *prev;
old = intel->batch.map + item->header;
if (op == *old >> 16) {
if (item->size == sz && memcmp(old, start, sz) == 0) {
if (prev != &intel->batch.cached_items) {
*prev = item->next;
item->next = intel->batch.cached_items;
intel->batch.cached_items = item;
}
intel->batch.used = intel->batch.emit;
return;
}
goto emit;
}
prev = &item->next;
}
item = malloc(sizeof(struct cached_batch_item));
if (item == NULL)
return;
item->next = intel->batch.cached_items;
intel->batch.cached_items = item;
emit:
item->size = sz;
item->header = intel->batch.emit;
}
/* Emit a pipelined flush to either flush render and texture cache for /* Emit a pipelined flush to either flush render and texture cache for
* reading from a FBO-drawn texture, or flush so that frontbuffer * reading from a FBO-drawn texture, or flush so that frontbuffer
* render appears on the screen in DRI1. * render appears on the screen in DRI1.

View File

@ -101,9 +101,9 @@ intel_batchbuffer_begin(struct intel_context *intel, int n, bool is_blit)
{ {
intel_batchbuffer_require_space(intel, n * 4, is_blit); intel_batchbuffer_require_space(intel, n * 4, is_blit);
intel->batch.emit = intel->batch.used;
#ifdef DEBUG #ifdef DEBUG
intel->batch.emit.total = n; intel->batch.total = n;
intel->batch.emit.start_ptr = intel->batch.used;
#endif #endif
} }
@ -123,6 +123,8 @@ intel_batchbuffer_advance(struct intel_context *intel)
#endif #endif
} }
void intel_batchbuffer_cached_advance(struct intel_context *intel);
/* Here are the crusty old macros, to be removed: /* Here are the crusty old macros, to be removed:
*/ */
#define BATCH_LOCALS #define BATCH_LOCALS
@ -141,5 +143,6 @@ intel_batchbuffer_advance(struct intel_context *intel)
} while (0) } while (0)
#define ADVANCE_BATCH() intel_batchbuffer_advance(intel); #define ADVANCE_BATCH() intel_batchbuffer_advance(intel);
#define CACHED_BATCH() intel_batchbuffer_cached_advance(intel);
#endif #endif

View File

@ -171,22 +171,14 @@ struct intel_context
struct intel_batchbuffer { struct intel_batchbuffer {
drm_intel_bo *bo; drm_intel_bo *bo;
struct cached_batch_item *cached_items;
uint16_t used; uint16_t emit, total;
uint16_t reserved_space; uint16_t used, reserved_space;
uint32_t map[8192]; uint32_t map[8192];
#define BATCH_SZ (8192*sizeof(uint32_t)) #define BATCH_SZ (8192*sizeof(uint32_t))
uint32_t state_batch_offset; uint32_t state_batch_offset;
#ifdef DEBUG
/** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */
struct {
uint16_t total;
uint16_t start_ptr;
} emit;
#endif
bool is_blit; bool is_blit;
} batch; } batch;