i965g: first pass at vs immediates in curbe
This commit is contained in:
parent
3fafe2b969
commit
63b0af0775
|
@ -149,12 +149,17 @@ struct brw_blend_state {
|
|||
|
||||
struct brw_rasterizer_state;
|
||||
|
||||
struct brw_immediate_data {
|
||||
unsigned nr;
|
||||
float (*data)[4];
|
||||
};
|
||||
|
||||
struct brw_vertex_shader {
|
||||
const struct tgsi_token *tokens;
|
||||
struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */
|
||||
|
||||
struct tgsi_shader_info info;
|
||||
struct brw_immediate_data immediates;
|
||||
|
||||
GLuint has_flow_control:1;
|
||||
GLuint use_const_buffer:1;
|
||||
|
@ -189,6 +194,7 @@ struct brw_fragment_shader {
|
|||
struct tgsi_shader_info info;
|
||||
|
||||
struct brw_fs_signature signature;
|
||||
struct brw_immediate_data immediates;
|
||||
|
||||
unsigned iz_lookup;
|
||||
//unsigned wm_lookup;
|
||||
|
|
|
@ -226,21 +226,34 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
|
|||
/* vertex shader constants */
|
||||
if (brw->curbe.vs_size) {
|
||||
GLuint offset = brw->curbe.vs_start * 16;
|
||||
GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
|
||||
struct brw_vertex_shader *vs = brw->curr.vertex_shader;
|
||||
GLuint nr_immediate, nr_const;
|
||||
|
||||
/* XXX: note that constant buffers are currently *already* in
|
||||
* buffer objects. If we want to keep on putting them into the
|
||||
* curbe, makes sense to treat constbuf's specially with malloc.
|
||||
*/
|
||||
const GLfloat *value = screen->buffer_map( screen,
|
||||
brw->curr.vertex_constants,
|
||||
PIPE_BUFFER_USAGE_CPU_READ);
|
||||
nr_immediate = vs->immediates.nr;
|
||||
if (nr_immediate) {
|
||||
memcpy(&buf[offset],
|
||||
vs->immediates.data,
|
||||
nr_immediate * 4 * sizeof(float));
|
||||
|
||||
/* XXX: what if user's constant buffer is too small?
|
||||
*/
|
||||
memcpy(&buf[offset], value, nr * 4 * sizeof(float));
|
||||
offset += nr_immediate * 4;
|
||||
}
|
||||
|
||||
screen->buffer_unmap( screen, brw->curr.vertex_constants );
|
||||
nr_const = vs->info.file_max[TGSI_FILE_CONSTANT] + 1;
|
||||
if (nr_const) {
|
||||
/* XXX: note that constant buffers are currently *already* in
|
||||
* buffer objects. If we want to keep on putting them into the
|
||||
* curbe, makes sense to treat constbuf's specially with malloc.
|
||||
*/
|
||||
const GLfloat *value = screen->buffer_map( screen,
|
||||
brw->curr.vertex_constants,
|
||||
PIPE_BUFFER_USAGE_CPU_READ);
|
||||
|
||||
/* XXX: what if user's constant buffer is too small?
|
||||
*/
|
||||
memcpy(&buf[offset], value, nr_const * 4 * sizeof(float));
|
||||
|
||||
screen->buffer_unmap( screen, brw->curr.vertex_constants );
|
||||
}
|
||||
}
|
||||
|
||||
if (BRW_DEBUG & DEBUG_CURBE) {
|
||||
|
@ -263,8 +276,7 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
|
|||
}
|
||||
else {
|
||||
/* constants have changed */
|
||||
if (brw->curbe.last_buf)
|
||||
FREE(brw->curbe.last_buf);
|
||||
FREE(brw->curbe.last_buf);
|
||||
|
||||
brw->curbe.last_buf = buf;
|
||||
brw->curbe.last_bufsz = bufsz;
|
||||
|
|
|
@ -55,6 +55,47 @@ static GLboolean has_flow_control(const struct tgsi_shader_info *info)
|
|||
}
|
||||
|
||||
|
||||
static void scan_immediates(const struct tgsi_token *tokens,
|
||||
const struct tgsi_shader_info *info,
|
||||
struct brw_immediate_data *imm)
|
||||
{
|
||||
struct tgsi_parse_context parse;
|
||||
boolean done = FALSE;
|
||||
|
||||
imm->nr = 0;
|
||||
imm->data = MALLOC(info->immediate_count * 4 * sizeof(float));
|
||||
|
||||
tgsi_parse_init( &parse, tokens );
|
||||
while (!tgsi_parse_end_of_tokens( &parse ) && !done) {
|
||||
tgsi_parse_token( &parse );
|
||||
|
||||
switch (parse.FullToken.Token.Type) {
|
||||
case TGSI_TOKEN_TYPE_DECLARATION:
|
||||
break;
|
||||
|
||||
case TGSI_TOKEN_TYPE_IMMEDIATE: {
|
||||
static const float id[4] = {0,0,0,1};
|
||||
const float *value = &parse.FullToken.FullImmediate.u[0].Float;
|
||||
unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
imm->data[imm->nr][i] = value[i];
|
||||
|
||||
for (; i < 4; i++)
|
||||
imm->data[imm->nr][i] = id[i];
|
||||
|
||||
imm->nr++;
|
||||
break;
|
||||
}
|
||||
|
||||
case TGSI_TOKEN_TYPE_INSTRUCTION:
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void brw_bind_fs_state( struct pipe_context *pipe, void *prog )
|
||||
{
|
||||
|
@ -106,6 +147,7 @@ static void *brw_create_fs_state( struct pipe_context *pipe,
|
|||
goto fail;
|
||||
|
||||
tgsi_scan_shader(fs->tokens, &fs->info);
|
||||
scan_immediates(fs->tokens, &fs->info, &fs->immediates);
|
||||
|
||||
fs->signature.nr_inputs = fs->info.num_inputs;
|
||||
for (i = 0; i < fs->info.num_inputs; i++) {
|
||||
|
@ -150,6 +192,7 @@ static void *brw_create_vs_state( struct pipe_context *pipe,
|
|||
goto fail;
|
||||
|
||||
tgsi_scan_shader(vs->tokens, &vs->info);
|
||||
scan_immediates(vs->tokens, &vs->info, &vs->immediates);
|
||||
|
||||
vs->id = brw->program_id++;
|
||||
vs->has_flow_control = has_flow_control(&vs->info);
|
||||
|
|
|
@ -42,6 +42,15 @@
|
|||
#include "brw_vs.h"
|
||||
#include "brw_debug.h"
|
||||
|
||||
/* Choose one of the 4 vec4's which can be packed into each 16-wide reg.
|
||||
*/
|
||||
static INLINE struct brw_reg brw_vec4_grf_repeat( GLuint reg, GLuint slot )
|
||||
{
|
||||
int nr = reg + slot/2;
|
||||
int subnr = (slot%2) * 4;
|
||||
|
||||
return stride(brw_vec4_grf(nr, subnr), 0, 4, 1);
|
||||
}
|
||||
|
||||
|
||||
static struct brw_reg get_tmp( struct brw_vs_compile *c )
|
||||
|
@ -119,7 +128,7 @@ static boolean find_output_slot( struct brw_vs_compile *c,
|
|||
*/
|
||||
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
|
||||
{
|
||||
GLuint i, reg = 0, mrf;
|
||||
GLuint i, reg = 0, subreg = 0, mrf;
|
||||
int attributes_in_vue;
|
||||
|
||||
/* Determine whether to use a real constant buffer or use a block
|
||||
|
@ -150,33 +159,57 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
|
|||
/* User clip planes from curbe:
|
||||
*/
|
||||
if (c->key.nr_userclip) {
|
||||
for (i = 0; i < c->key.nr_userclip; i++) {
|
||||
c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
|
||||
/* Skip over fixed planes: Or never read them into vs unit?
|
||||
*/
|
||||
subreg += 6;
|
||||
|
||||
for (i = 0; i < c->key.nr_userclip; i++, subreg++) {
|
||||
c->userplane[i] =
|
||||
stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
|
||||
}
|
||||
|
||||
/* Deal with curbe alignment:
|
||||
*/
|
||||
reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;
|
||||
subreg = align(subreg, 2);
|
||||
/*reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;*/
|
||||
}
|
||||
|
||||
/* Vertex program parameters from curbe:
|
||||
|
||||
/* Immediates: always in the curbe.
|
||||
*
|
||||
* XXX: Can try to encode some immediates as brw immediates
|
||||
* XXX: Make sure ureg sets minimal immediate size and respect it
|
||||
* here.
|
||||
*/
|
||||
if (c->vp->use_const_buffer) {
|
||||
/* get constants from a real constant buffer */
|
||||
c->prog_data.curb_read_length = 0;
|
||||
c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
|
||||
for (i = 0; i < c->vp->info.immediate_count; i++, subreg++) {
|
||||
c->regs[TGSI_FILE_IMMEDIATE][i] =
|
||||
stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
|
||||
}
|
||||
else {
|
||||
/* use a section of the GRF for constants */
|
||||
c->prog_data.nr_params = c->vp->info.immediate_count * 4;
|
||||
|
||||
|
||||
/* Vertex constant buffer.
|
||||
*
|
||||
* Constants from the buffer can be either cached in the curbe or
|
||||
* loaded as needed from the actual constant buffer.
|
||||
*/
|
||||
if (!c->vp->use_const_buffer) {
|
||||
GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1;
|
||||
for (i = 0; i < nr_params; i++) {
|
||||
c->regs[TGSI_FILE_CONSTANT][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
|
||||
|
||||
for (i = 0; i < nr_params; i++, subreg++) {
|
||||
c->regs[TGSI_FILE_CONSTANT][i] =
|
||||
stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
|
||||
}
|
||||
reg += (nr_params + 1) / 2;
|
||||
c->prog_data.curb_read_length = reg - 1;
|
||||
c->prog_data.nr_params = nr_params * 4;
|
||||
|
||||
c->prog_data.nr_params += nr_params * 4;
|
||||
}
|
||||
|
||||
/* All regs allocated
|
||||
*/
|
||||
reg += (subreg + 1) / 2;
|
||||
c->prog_data.curb_read_length = reg - 1;
|
||||
|
||||
|
||||
/* Allocate input regs:
|
||||
*/
|
||||
c->nr_inputs = c->vp->info.num_inputs;
|
||||
|
@ -191,28 +224,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
|
|||
if (c->nr_inputs == 0)
|
||||
reg++;
|
||||
|
||||
/* Allocate a GRF and load immediate values by hand with 4 MOVs!!!
|
||||
*
|
||||
* XXX: Try to encode float immediates as brw immediates
|
||||
* XXX: Put immediates into the CURBE.
|
||||
* XXX: Make sure ureg sets minimal immediate size and respect it
|
||||
* here.
|
||||
*/
|
||||
for (i = 0; i < c->nr_immediates; i++) {
|
||||
struct brw_reg r;
|
||||
int j;
|
||||
|
||||
c->regs[TGSI_FILE_IMMEDIATE][i] =
|
||||
r = brw_vec8_grf(reg, 0);
|
||||
|
||||
for (j = 0; j < 4; j++) {
|
||||
brw_MOV(&c->func,
|
||||
brw_writemask(r, (1<<j)),
|
||||
brw_imm_f(c->immediate[i][j]));
|
||||
}
|
||||
|
||||
reg++;
|
||||
}
|
||||
|
||||
|
||||
/* Allocate outputs. The non-position outputs go straight into message regs.
|
||||
|
@ -1605,8 +1616,6 @@ void brw_vs_emit(struct brw_vs_compile *c)
|
|||
struct brw_instruction *end_inst, *last_inst;
|
||||
struct tgsi_parse_context parse;
|
||||
struct tgsi_full_instruction *inst;
|
||||
boolean done = FALSE;
|
||||
int i;
|
||||
|
||||
if (BRW_DEBUG & DEBUG_VS)
|
||||
tgsi_dump(c->vp->tokens, 0);
|
||||
|
@ -1616,37 +1625,6 @@ void brw_vs_emit(struct brw_vs_compile *c)
|
|||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
brw_set_access_mode(p, BRW_ALIGN_16);
|
||||
|
||||
/* Inputs */
|
||||
tgsi_parse_init( &parse, tokens );
|
||||
while( !tgsi_parse_end_of_tokens( &parse ) ) {
|
||||
tgsi_parse_token( &parse );
|
||||
|
||||
switch( parse.FullToken.Token.Type ) {
|
||||
case TGSI_TOKEN_TYPE_DECLARATION:
|
||||
/* Nothing to do -- using info from tgsi_scan().
|
||||
*/
|
||||
break;
|
||||
|
||||
case TGSI_TOKEN_TYPE_IMMEDIATE: {
|
||||
static const float id[4] = {0,0,0,1};
|
||||
const float *imm = &parse.FullToken.FullImmediate.u[0].Float;
|
||||
unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
c->immediate[c->nr_immediates][i] = imm[i];
|
||||
|
||||
for ( ; i < 4; i++)
|
||||
c->immediate[c->nr_immediates][i] = id[i];
|
||||
|
||||
c->nr_immediates++;
|
||||
break;
|
||||
}
|
||||
|
||||
case TGSI_TOKEN_TYPE_INSTRUCTION:
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Static register allocation
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue