r600g: precompute some of the hw state

Idea is to build hw state at pipe state creation and
reuse them while keeping a non PM4 packet interface
btw winsys & pipe driver. This commit also force rebuild
of pm4 packet on each call to radeon_state_pm4 which
in turn slow down everythings, this will be addressed.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
This commit is contained in:
Jerome Glisse 2010-08-29 21:01:51 -04:00
parent 0bba7796a3
commit de0b76cab2
11 changed files with 231 additions and 218 deletions

View File

@ -670,7 +670,7 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te
if (r) {
return r;
}
r = r600_texture_cb0(ctx, rtexture, level);
r = r600_texture_cb(ctx, rtexture, 0, level);
if (r) {
return r;
}
@ -772,7 +772,7 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te
if (r) {
goto out;
}
r = radeon_draw_set(draw, rtexture->cb0[level]);
r = radeon_draw_set(draw, rtexture->cb[0][level]);
if (r) {
goto out;
}

View File

@ -121,7 +121,7 @@ struct r600_context_hw_states {
struct radeon_state *config;
struct radeon_state *cb_cntl;
struct radeon_state *db;
struct radeon_state *ucp[6];
struct radeon_state *ucp;
unsigned ps_nresource;
unsigned ps_nsampler;
struct radeon_state *ps_resource[160];

View File

@ -57,7 +57,7 @@ struct r600_resource_texture {
unsigned dirty;
struct radeon_bo *uncompressed;
struct radeon_state *scissor[PIPE_MAX_TEXTURE_LEVELS];
struct radeon_state *cb0[PIPE_MAX_TEXTURE_LEVELS];
struct radeon_state *cb[8][PIPE_MAX_TEXTURE_LEVELS];
struct radeon_state *db[PIPE_MAX_TEXTURE_LEVELS];
struct radeon_state *viewport[PIPE_MAX_TEXTURE_LEVELS];
};

View File

@ -84,7 +84,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer);
int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
int r600_texture_cb0(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level);
int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);

View File

@ -34,6 +34,16 @@
#include "r600d.h"
#include "r600_state_inlines.h"
static struct radeon_state *r600_blend(struct r600_context *rctx, const struct pipe_blend_state *state);
static struct radeon_state *r600_viewport(struct r600_context *rctx, const struct pipe_viewport_state *state);
static struct radeon_state *r600_ucp(struct r600_context *rctx, const struct pipe_clip_state *state);
static struct radeon_state *r600_sampler(struct r600_context *rctx,
const struct pipe_sampler_state *state,
unsigned id);
static struct radeon_state *r600_resource(struct pipe_context *ctx,
const struct pipe_sampler_view *view,
unsigned id);
static void *r600_create_blend_state(struct pipe_context *ctx,
const struct pipe_blend_state *state)
{
@ -86,6 +96,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
rstate->state.sampler_view.texture = texture;
rstate->state.sampler_view.reference.count = 1;
rstate->state.sampler_view.context = ctx;
rstate->rstate = r600_resource(ctx, &rstate->state.sampler_view, 0);
return &rstate->state.sampler_view;
}
@ -229,6 +240,9 @@ static void r600_bind_ps_sampler(struct pipe_context *ctx,
for (i = 0; i < count; i++) {
rstate = (struct r600_context_state *)states[i];
rctx->ps_sampler[i] = r600_context_state_incref(rstate);
if (rstate) {
radeon_state_convert(rstate->rstate, R600_STATE_SAMPLER, i, R600_SHADER_PS);
}
}
rctx->ps_nsampler = count;
}
@ -246,6 +260,9 @@ static void r600_bind_vs_sampler(struct pipe_context *ctx,
for (i = 0; i < count; i++) {
rstate = (struct r600_context_state *)states[i];
rctx->vs_sampler[i] = r600_context_state_incref(rstate);
if (rstate) {
radeon_state_convert(rstate->rstate, R600_STATE_SAMPLER, i, R600_SHADER_VS);
}
}
rctx->vs_nsampler = count;
}
@ -337,6 +354,9 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx,
for (i = 0; i < count; i++) {
rstate = (struct r600_context_state *)views[i];
rctx->ps_sampler_view[i] = r600_context_state_incref(rstate);
if (rstate) {
radeon_state_convert(rstate->rstate, R600_STATE_RESOURCE, i, R600_SHADER_PS);
}
}
rctx->ps_nsampler_view = count;
}
@ -355,6 +375,9 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx,
for (i = 0; i < count; i++) {
rstate = (struct r600_context_state *)views[i];
rctx->vs_sampler_view[i] = r600_context_state_incref(rstate);
if (rstate) {
radeon_state_convert(rstate->rstate, R600_STATE_RESOURCE, i, R600_SHADER_VS);
}
}
rctx->vs_nsampler_view = count;
}
@ -363,10 +386,19 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
const struct pipe_framebuffer_state *state)
{
struct r600_context *rctx = r600_context(ctx);
struct r600_resource_texture *rtexture;
struct r600_context_state *rstate;
rstate = r600_context_state(rctx, pipe_framebuffer_type, state);
r600_bind_state(ctx, rstate);
for (int i = 0; i < state->nr_cbufs; i++) {
rtexture = (struct r600_resource_texture*)state->cbufs[i]->texture;
r600_texture_cb(ctx, rtexture, i, state->cbufs[i]->level);
}
if (state->zsbuf) {
rtexture = (struct r600_resource_texture*)state->zsbuf->texture;
r600_texture_db(ctx, rtexture, state->zsbuf->level);
}
}
static void r600_set_polygon_stipple(struct pipe_context *ctx,
@ -565,6 +597,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
break;
case pipe_viewport_type:
rstate->state.viewport = (*states).viewport;
rstate->rstate = r600_viewport(rctx, &rstate->state.viewport);
break;
case pipe_depth_type:
rstate->state.depth = (*states).depth;
@ -580,6 +613,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
break;
case pipe_clip_type:
rstate->state.clip = (*states).clip;
rstate->rstate = r600_ucp(rctx, &rstate->state.clip);
break;
case pipe_stencil_type:
rstate->state.stencil = (*states).stencil;
@ -592,6 +626,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
break;
case pipe_blend_type:
rstate->state.blend = (*states).blend;
rstate->rstate = r600_blend(rctx, &rstate->state.blend);
break;
case pipe_stencil_ref_type:
rstate->state.stencil_ref = (*states).stencil_ref;
@ -606,6 +641,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
break;
case pipe_sampler_type:
rstate->state.sampler = (*states).sampler;
rstate->rstate = r600_sampler(rctx, &rstate->state.sampler, 0);
break;
default:
R600_ERR("invalid type %d\n", rstate->type);
@ -615,11 +651,10 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
return rstate;
}
static struct radeon_state *r600_blend(struct r600_context *rctx)
static struct radeon_state *r600_blend(struct r600_context *rctx, const struct pipe_blend_state *state)
{
struct r600_screen *rscreen = rctx->screen;
struct radeon_state *rstate;
const struct pipe_blend_state *state = &rctx->blend->state.blend;
int i;
rstate = radeon_state(rscreen->rw, R600_STATE_BLEND, 0);
@ -675,20 +710,21 @@ static struct radeon_state *r600_blend(struct r600_context *rctx)
return rstate;
}
static struct radeon_state *r600_ucp(struct r600_context *rctx, int clip)
static struct radeon_state *r600_ucp(struct r600_context *rctx, const struct pipe_clip_state *state)
{
struct r600_screen *rscreen = rctx->screen;
struct radeon_state *rstate;
const struct pipe_clip_state *state = &rctx->clip->state.clip;
rstate = radeon_state(rscreen->rw, R600_STATE_CLIP, clip);
rstate = radeon_state(rscreen->rw, R600_STATE_UCP, 0);
if (rstate == NULL)
return NULL;
rstate->states[R600_CLIP__PA_CL_UCP_X_0] = fui(state->ucp[clip][0]);
rstate->states[R600_CLIP__PA_CL_UCP_Y_0] = fui(state->ucp[clip][1]);
rstate->states[R600_CLIP__PA_CL_UCP_Z_0] = fui(state->ucp[clip][2]);
rstate->states[R600_CLIP__PA_CL_UCP_W_0] = fui(state->ucp[clip][3]);
for (int i = 0; i < state->nr; i++) {
rstate->states[i * 4 + 0] = fui(state->ucp[i][0]);
rstate->states[i * 4 + 1] = fui(state->ucp[i][1]);
rstate->states[i * 4 + 2] = fui(state->ucp[i][2]);
rstate->states[i * 4 + 3] = fui(state->ucp[i][3]);
}
if (radeon_state_pm4(rstate)) {
radeon_state_decref(rstate);
@ -698,108 +734,6 @@ static struct radeon_state *r600_ucp(struct r600_context *rctx, int clip)
}
static struct radeon_state *r600_cb(struct r600_context *rctx, int cb)
{
struct r600_screen *rscreen = rctx->screen;
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
struct radeon_state *rstate;
const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer;
unsigned level = state->cbufs[cb]->level;
unsigned pitch, slice;
unsigned color_info;
unsigned format, swap, ntype;
const struct util_format_description *desc;
rstate = radeon_state(rscreen->rw, R600_STATE_CB0 + cb, 0);
if (rstate == NULL)
return NULL;
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
rbuffer = &rtex->resource;
rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
rstate->bo[2] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
rstate->placement[4] = RADEON_GEM_DOMAIN_GTT;
rstate->nbo = 3;
pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1;
ntype = 0;
desc = util_format_description(rtex->resource.base.b.format);
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
ntype = V_0280A0_NUMBER_SRGB;
format = r600_translate_colorformat(rtex->resource.base.b.format);
swap = r600_translate_colorswap(rtex->resource.base.b.format);
color_info = S_0280A0_FORMAT(format) |
S_0280A0_COMP_SWAP(swap) |
S_0280A0_BLEND_CLAMP(1) |
S_0280A0_SOURCE_FORMAT(1) |
S_0280A0_NUMBER_TYPE(ntype);
rstate->states[R600_CB0__CB_COLOR0_BASE] = rtex->offset[level] >> 8;
rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info;
rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) |
S_028060_SLICE_TILE_MAX(slice);
rstate->states[R600_CB0__CB_COLOR0_VIEW] = 0x00000000;
rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000;
rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000;
rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000;
if (radeon_state_pm4(rstate)) {
radeon_state_decref(rstate);
return NULL;
}
return rstate;
}
static struct radeon_state *r600_db(struct r600_context *rctx)
{
struct r600_screen *rscreen = rctx->screen;
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
struct radeon_state *rstate;
const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer;
unsigned level;
unsigned pitch, slice, format;
if (state->zsbuf == NULL)
return NULL;
rstate = radeon_state(rscreen->rw, R600_STATE_DB, 0);
if (rstate == NULL)
return NULL;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
rtex->tilled = 1;
rtex->array_mode = 2;
rtex->tile_type = 1;
rtex->depth = 1;
rbuffer = &rtex->resource;
rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
rstate->nbo = 1;
rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM;
level = state->zsbuf->level;
pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
rstate->states[R600_DB__DB_DEPTH_BASE] = rtex->offset[level] >> 8;
rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtex->array_mode) |
S_028010_FORMAT(format);
rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000;
rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1;
rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) |
S_028000_SLICE_TILE_MAX(slice);
if (radeon_state_pm4(rstate)) {
radeon_state_decref(rstate);
return NULL;
}
return rstate;
}
static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
{
const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer;
@ -954,9 +888,8 @@ static struct radeon_state *r600_scissor(struct r600_context *rctx)
return rstate;
}
static struct radeon_state *r600_viewport(struct r600_context *rctx)
static struct radeon_state *r600_viewport(struct r600_context *rctx, const struct pipe_viewport_state *state)
{
const struct pipe_viewport_state *state = &rctx->viewport->state.viewport;
struct r600_screen *rscreen = rctx->screen;
struct radeon_state *rstate;
@ -1366,6 +1299,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
int r600_context_hw_states(struct pipe_context *ctx)
{
struct r600_context *rctx = r600_context(ctx);
struct r600_resource_texture *rtexture;
unsigned i;
int r;
int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs;
@ -1377,69 +1311,59 @@ int r600_context_hw_states(struct pipe_context *ctx)
/* free previous TODO determine what need to be updated, what
* doesn't
*/
//radeon_state_decref(rctx->hw_states.config);
rctx->hw_states.cb_cntl = radeon_state_decref(rctx->hw_states.cb_cntl);
rctx->hw_states.db = radeon_state_decref(rctx->hw_states.db);
rctx->hw_states.rasterizer = radeon_state_decref(rctx->hw_states.rasterizer);
rctx->hw_states.scissor = radeon_state_decref(rctx->hw_states.scissor);
rctx->hw_states.dsa = radeon_state_decref(rctx->hw_states.dsa);
rctx->hw_states.blend = radeon_state_decref(rctx->hw_states.blend);
rctx->hw_states.viewport = radeon_state_decref(rctx->hw_states.viewport);
for (i = 0; i < 8; i++) {
rctx->hw_states.cb[i] = radeon_state_decref(rctx->hw_states.cb[i]);
}
for (i = 0; i < 6; i++) {
rctx->hw_states.ucp[i] = radeon_state_decref(rctx->hw_states.ucp[i]);
}
for (i = 0; i < rctx->hw_states.ps_nresource; i++) {
radeon_state_decref(rctx->hw_states.ps_resource[i]);
rctx->hw_states.ps_resource[i] = NULL;
}
rctx->hw_states.ps_nresource = 0;
for (i = 0; i < rctx->hw_states.ps_nsampler; i++) {
radeon_state_decref(rctx->hw_states.ps_sampler[i]);
rctx->hw_states.ps_sampler[i] = NULL;
}
rctx->hw_states.ps_nsampler = 0;
/* build new states */
rctx->hw_states.blend = NULL;
rctx->hw_states.viewport = NULL;
rctx->hw_states.ucp = NULL;
rctx->hw_states.rasterizer = r600_rasterizer(rctx);
rctx->hw_states.scissor = r600_scissor(rctx);
rctx->hw_states.dsa = r600_dsa(rctx);
rctx->hw_states.blend = r600_blend(rctx);
rctx->hw_states.viewport = r600_viewport(rctx);
for (i = 0; i < nr_cbufs; i++) {
rctx->hw_states.cb[i] = r600_cb(rctx, i);
}
for (i = 0; i < ucp_nclip; i++) {
rctx->hw_states.ucp[i] = r600_ucp(rctx, i);
}
rctx->hw_states.db = r600_db(rctx);
rctx->hw_states.cb_cntl = r600_cb_cntl(rctx);
if (rctx->viewport) {
rctx->hw_states.viewport = rctx->viewport->rstate;
}
if (rctx->blend) {
rctx->hw_states.blend = rctx->blend->rstate;
}
if (rctx->clip) {
rctx->hw_states.ucp = rctx->clip->rstate;
}
for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) {
rtexture = (struct r600_resource_texture*)rctx->framebuffer->state.framebuffer.cbufs[i]->texture;
rctx->hw_states.cb[i] = rtexture->cb[i][rctx->framebuffer->state.framebuffer.cbufs[i]->level];
}
if (rctx->framebuffer->state.framebuffer.zsbuf) {
rtexture = (struct r600_resource_texture*)rctx->framebuffer->state.framebuffer.zsbuf->texture;
rctx->hw_states.db = rtexture->db[rctx->framebuffer->state.framebuffer.zsbuf->level];
}
for (i = 0; i < rctx->ps_nsampler; i++) {
if (rctx->ps_sampler[i]) {
rctx->hw_states.ps_sampler[i] = r600_sampler(rctx,
&rctx->ps_sampler[i]->state.sampler,
i);
rctx->hw_states.ps_sampler[i] = rctx->ps_sampler[i]->rstate;
} else {
rctx->hw_states.ps_sampler[i] = NULL;
}
}
rctx->hw_states.ps_nsampler = rctx->ps_nsampler;
for (i = 0; i < rctx->ps_nsampler_view; i++) {
if (rctx->ps_sampler_view[i]) {
rctx->hw_states.ps_resource[i] = r600_resource(ctx,
&rctx->ps_sampler_view[i]->state.sampler_view,
i);
rctx->hw_states.ps_resource[i] = rctx->ps_sampler_view[i]->rstate;
} else {
rctx->hw_states.ps_resource[i] = NULL;
}
}
rctx->hw_states.ps_nresource = rctx->ps_nsampler_view;
/* bind states */
for (i = 0; i < ucp_nclip; i++) {
r = radeon_draw_set(rctx->draw, rctx->hw_states.ucp[i]);
if (r)
return r;
}
r = radeon_draw_set(rctx->draw, rctx->hw_states.ucp);
if (r)
return r;
r = radeon_draw_set(rctx->draw, rctx->hw_states.db);
if (r)
return r;

View File

@ -128,13 +128,25 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
return &resource->base.b;
}
static void r600_texture_destroy_state(struct pipe_resource *ptexture)
{
struct r600_resource_texture *rtexture = (struct r600_resource_texture*)ptexture;
for (int i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) {
radeon_state_decref(rtexture->scissor[i]);
radeon_state_decref(rtexture->db[i]);
for (int j = 0; j < 8; j++) {
radeon_state_decref(rtexture->cb[j][i]);
}
}
}
static void r600_texture_destroy(struct pipe_screen *screen,
struct pipe_resource *ptex)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
struct r600_resource *resource = &rtex->resource;
struct r600_screen *rscreen = r600_screen(screen);
unsigned i;
if (resource->bo) {
radeon_bo_decref(rscreen->rw, resource->bo);
@ -142,11 +154,7 @@ static void r600_texture_destroy(struct pipe_screen *screen,
if (rtex->uncompressed) {
radeon_bo_decref(rscreen->rw, rtex->uncompressed);
}
for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) {
radeon_state_decref(rtex->scissor[i]);
radeon_state_decref(rtex->cb0[i]);
radeon_state_decref(rtex->db[i]);
}
r600_texture_destroy_state(ptex);
FREE(rtex);
}
@ -211,9 +219,12 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
pipe_reference_init(&resource->base.b.reference, 1);
resource->base.b.screen = screen;
resource->bo = bo;
rtex->depth = 0;
rtex->pitch_override = whandle->stride;
rtex->bpt = util_format_get_blocksize(templ->format);
rtex->pitch[0] = whandle->stride;
rtex->width[0] = templ->width0;
rtex->height[0] = templ->height0;
rtex->offset[0] = 0;
rtex->size = align(rtex->pitch[0] * templ->height0, 64);
@ -696,9 +707,9 @@ static struct radeon_state *r600_texture_state_scissor(struct r600_screen *rscre
return rstate;
}
static struct radeon_state *r600_texture_state_cb0(struct r600_screen *rscreen,
static struct radeon_state *r600_texture_state_cb(struct r600_screen *rscreen,
struct r600_resource_texture *rtexture,
unsigned level)
unsigned cb, unsigned level)
{
struct radeon_state *rstate;
struct r600_resource *rbuffer;
@ -707,7 +718,7 @@ static struct radeon_state *r600_texture_state_cb0(struct r600_screen *rscreen,
unsigned format, swap, ntype;
const struct util_format_description *desc;
rstate = radeon_state(rscreen->rw, R600_STATE_CB0, 0);
rstate = radeon_state(rscreen->rw, R600_STATE_CB0 + cb, 0);
if (rstate == NULL)
return NULL;
rbuffer = &rtexture->resource;
@ -770,6 +781,10 @@ static struct radeon_state *r600_texture_state_db(struct r600_screen *rscreen,
if (rstate == NULL)
return NULL;
rbuffer = &rtexture->resource;
rtexture->tilled = 1;
rtexture->array_mode = 2;
rtexture->tile_type = 1;
rtexture->depth = 1;
/* set states (most default value are 0 and struct already
* initialized to 0, thus avoid resetting them)
@ -838,14 +853,14 @@ static struct radeon_state *r600_texture_state_viewport(struct r600_screen *rscr
return rstate;
}
int r600_texture_cb0(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level)
int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level)
{
struct r600_screen *rscreen = r600_screen(ctx->screen);
if (rtexture->cb0[level] == NULL) {
rtexture->cb0[level] = r600_texture_state_cb0(rscreen, rtexture, level);
if (rtexture->cb0[level] == NULL) {
R600_ERR("failed to create cb0 state for texture\n");
if (rtexture->cb[cb][level] == NULL) {
rtexture->cb[cb][level] = r600_texture_state_cb(rscreen, rtexture, cb, level);
if (rtexture->cb[cb][level] == NULL) {
R600_ERR("failed to create cb%d state for texture\n", cb);
return -ENOMEM;
}
}

View File

@ -109,13 +109,11 @@ struct radeon_state {
unsigned id;
unsigned shader_index;
unsigned nstates;
u32 *states;
u32 states[64];
unsigned npm4;
unsigned cpm4;
u32 pm4_crc;
u32 *pm4;
u32 nimmd;
u32 *immd;
u32 pm4[128];
unsigned nbo;
struct radeon_bo *bo[4];
unsigned nreloc;
@ -130,6 +128,7 @@ struct radeon_state *radeon_state_shader(struct radeon *radeon, u32 type, u32 id
struct radeon_state *radeon_state_incref(struct radeon_state *state);
struct radeon_state *radeon_state_decref(struct radeon_state *state);
int radeon_state_pm4(struct radeon_state *state);
int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type);
/*
* draw functions
@ -219,7 +218,7 @@ enum r600_stype {
R600_STATE_DB,
R600_STATE_QUERY_BEGIN,
R600_STATE_QUERY_END,
R600_STATE_CLIP,
R600_STATE_UCP,
R600_STATE_VGT,
R600_STATE_DRAW,
};
@ -613,17 +612,37 @@ enum {
/* R600_DRAW */
#define R600_DRAW__VGT_NUM_INDICES 0
#define R600_DRAW__VGT_DMA_BASE_HI 1
#define R600_DRAW__VGT_DMA_BASE 2
#define R600_DRAW__VGT_DMA_BASE 2
#define R600_DRAW__VGT_DRAW_INITIATOR 3
#define R600_DRAW_SIZE 4
#define R600_DRAW_PM4 128
#define R600_DRAW_SIZE 4
#define R600_DRAW_PM4 128
/* R600_CLIP */
#define R600_CLIP__PA_CL_UCP_X_0 0
#define R600_CLIP__PA_CL_UCP_Y_0 1
#define R600_CLIP__PA_CL_UCP_Z_0 2
#define R600_CLIP__PA_CL_UCP_W_0 3
#define R600_CLIP_SIZE 4
#define R600_CLIP_PM4 128
#define R600_CLIP__PA_CL_UCP_X_0 0
#define R600_CLIP__PA_CL_UCP_Y_0 1
#define R600_CLIP__PA_CL_UCP_Z_0 2
#define R600_CLIP__PA_CL_UCP_W_0 3
#define R600_CLIP__PA_CL_UCP_X_1 4
#define R600_CLIP__PA_CL_UCP_Y_1 5
#define R600_CLIP__PA_CL_UCP_Z_1 6
#define R600_CLIP__PA_CL_UCP_W_1 7
#define R600_CLIP__PA_CL_UCP_X_2 8
#define R600_CLIP__PA_CL_UCP_Y_2 9
#define R600_CLIP__PA_CL_UCP_Z_2 10
#define R600_CLIP__PA_CL_UCP_W_2 11
#define R600_CLIP__PA_CL_UCP_X_3 12
#define R600_CLIP__PA_CL_UCP_Y_3 13
#define R600_CLIP__PA_CL_UCP_Z_3 14
#define R600_CLIP__PA_CL_UCP_W_3 15
#define R600_CLIP__PA_CL_UCP_X_4 16
#define R600_CLIP__PA_CL_UCP_Y_4 17
#define R600_CLIP__PA_CL_UCP_Z_4 18
#define R600_CLIP__PA_CL_UCP_W_4 19
#define R600_CLIP__PA_CL_UCP_X_5 20
#define R600_CLIP__PA_CL_UCP_Y_5 21
#define R600_CLIP__PA_CL_UCP_Z_5 22
#define R600_CLIP__PA_CL_UCP_W_5 23
#define R600_CLIP_SIZE 24
#define R600_CLIP_PM4 128
/* R600 QUERY BEGIN/END */
#define R600_QUERY__OFFSET 0
#define R600_QUERY_SIZE 1

View File

@ -80,7 +80,7 @@ struct radeon_stype_info r600_stypes[] = {
{ R600_STATE_QUERY_BEGIN, 1, 0, r600_state_pm4_query_begin, SUB_NONE(VGT_EVENT) },
{ R600_STATE_QUERY_END, 1, 0, r600_state_pm4_query_end, SUB_NONE(VGT_EVENT) },
{ R600_STATE_DB, 1, 0, r600_state_pm4_db, SUB_NONE(DB) },
{ R600_STATE_CLIP, 6, 0, r600_state_pm4_generic, SUB_NONE(UCP) },
{ R600_STATE_UCP, 1, 0, r600_state_pm4_generic, SUB_NONE(UCP) },
{ R600_STATE_VGT, 1, 0, r600_state_pm4_vgt, SUB_NONE(VGT) },
{ R600_STATE_DRAW, 1, 0, r600_state_pm4_draw, SUB_NONE(DRAW) },
};
@ -381,13 +381,6 @@ static int r600_state_pm4_draw(struct radeon_state *state)
if (r)
return r;
state->pm4[state->cpm4++] = state->bo[0]->handle;
} else if (state->nimmd) {
state->pm4[state->cpm4++] = PKT3(PKT3_DRAW_INDEX_IMMD, state->nimmd + 1);
state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_NUM_INDICES];
state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_DRAW_INITIATOR];
for (i = 0; i < state->nimmd; i++) {
state->pm4[state->cpm4++] = state->immd[i];
}
} else {
state->pm4[state->cpm4++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
state->pm4[state->cpm4++] = state->states[R600_DRAW__VGT_NUM_INDICES];

View File

@ -284,10 +284,30 @@ static const struct radeon_register R600_names_VS_CONSTANT[] = {
};
static const struct radeon_register R600_names_UCP[] = {
{0x00028e20, 0, 0, "PA_CL_UCP0_X"},
{0x00028e24, 0, 0, "PA_CL_UCP0_Y"},
{0x00028e28, 0, 0, "PA_CL_UCP0_Z"},
{0x00028e2c, 0, 0, "PA_CL_UCP0_W"},
{0x00028E20, 0, 0, "PA_CL_UCP0_X"},
{0x00028E24, 0, 0, "PA_CL_UCP0_Y"},
{0x00028E28, 0, 0, "PA_CL_UCP0_Z"},
{0x00028E2C, 0, 0, "PA_CL_UCP0_W"},
{0x00028E30, 0, 0, "PA_CL_UCP1_X"},
{0x00028E34, 0, 0, "PA_CL_UCP1_Y"},
{0x00028E38, 0, 0, "PA_CL_UCP1_Z"},
{0x00028E3C, 0, 0, "PA_CL_UCP1_W"},
{0x00028E40, 0, 0, "PA_CL_UCP2_X"},
{0x00028E44, 0, 0, "PA_CL_UCP2_Y"},
{0x00028E48, 0, 0, "PA_CL_UCP2_Z"},
{0x00028E4C, 0, 0, "PA_CL_UCP2_W"},
{0x00028E50, 0, 0, "PA_CL_UCP3_X"},
{0x00028E54, 0, 0, "PA_CL_UCP3_Y"},
{0x00028E58, 0, 0, "PA_CL_UCP3_Z"},
{0x00028E5C, 0, 0, "PA_CL_UCP3_W"},
{0x00028E60, 0, 0, "PA_CL_UCP4_X"},
{0x00028E64, 0, 0, "PA_CL_UCP4_Y"},
{0x00028E68, 0, 0, "PA_CL_UCP4_Z"},
{0x00028E6C, 0, 0, "PA_CL_UCP4_W"},
{0x00028E70, 0, 0, "PA_CL_UCP5_X"},
{0x00028E74, 0, 0, "PA_CL_UCP5_Y"},
{0x00028E78, 0, 0, "PA_CL_UCP5_Z"},
{0x00028E7C, 0, 0, "PA_CL_UCP5_W"},
};
static const struct radeon_register R600_names_PS_RESOURCE[] = {

View File

@ -38,19 +38,19 @@ struct radeon_register {
};
struct radeon_sub_type {
int shader_type;
const struct radeon_register *regs;
unsigned nstates;
int shader_type;
const struct radeon_register *regs;
unsigned nstates;
};
struct radeon_stype_info {
unsigned stype;
unsigned num;
unsigned stride;
radeon_state_pm4_t pm4;
struct radeon_sub_type reginfo[R600_SHADER_MAX];
unsigned base_id;
unsigned npm4;
unsigned stype;
unsigned num;
unsigned stride;
radeon_state_pm4_t pm4;
struct radeon_sub_type reginfo[R600_SHADER_MAX];
unsigned base_id;
unsigned npm4;
};
struct radeon {

View File

@ -80,15 +80,59 @@ struct radeon_state *radeon_state_shader(struct radeon *radeon, u32 stype, u32 i
state->refcount = 1;
state->npm4 = found->npm4;
state->nstates = found->reginfo[shader_index].nstates;
state->states = calloc(1, state->nstates * 4);
state->pm4 = calloc(1, found->npm4 * 4);
if (state->states == NULL || state->pm4 == NULL) {
radeon_state_decref(state);
return NULL;
}
return state;
}
int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type)
{
struct radeon_stype_info *found = NULL;
int i, j, shader_index = -1;
if (state == NULL)
return 0;
/* traverse the stype array */
for (i = 0; i < state->radeon->nstype; i++) {
/* if the type doesn't match, if the shader doesn't match */
if (stype != state->radeon->stype[i].stype)
continue;
if (shader_type) {
for (j = 0; j < 4; j++) {
if (state->radeon->stype[i].reginfo[j].shader_type == shader_type) {
shader_index = j;
break;
}
}
if (shader_index == -1)
continue;
} else {
if (state->radeon->stype[i].reginfo[0].shader_type)
continue;
else
shader_index = 0;
}
if (id > state->radeon->stype[i].num)
continue;
found = &state->radeon->stype[i];
break;
}
if (!found) {
fprintf(stderr, "%s invalid type %d/id %d/shader class %d\n", __func__, stype, id, shader_type);
return -EINVAL;
}
if (found->reginfo[shader_index].nstates != state->nstates) {
fprintf(stderr, "invalid type change from (%d %d %d) to (%d %d %d)\n",
state->stype->stype, state->id, state->shader_index, stype, id, shader_index);
}
state->stype = found;
state->id = id;
state->shader_index = shader_index;
return radeon_state_pm4(state);
}
struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id)
{
return radeon_state_shader(radeon, type, id, 0);
@ -134,9 +178,6 @@ struct radeon_state *radeon_state_decref(struct radeon_state *state)
for (i = 0; i < state->nbo; i++) {
state->bo[i] = radeon_bo_decref(state->radeon, state->bo[i]);
}
free(state->immd);
free(state->states);
free(state->pm4);
memset(state, 0, sizeof(*state));
free(state);
return NULL;
@ -179,8 +220,9 @@ int radeon_state_pm4(struct radeon_state *state)
{
int r;
if (state == NULL || state->cpm4)
if (state == NULL)
return 0;
state->cpm4 = 0;
r = state->stype->pm4(state);
if (r) {
fprintf(stderr, "%s failed to build PM4 for state(%d %d)\n",