Merge branch 'gallium-vertex-linear' into gallium-0.1

Conflicts:

	src/gallium/auxiliary/draw/draw_pt_varray.c
This commit is contained in:
Jakob Bornecrantz 2008-05-28 12:42:42 +02:00
commit 938d9d5963
53 changed files with 5472 additions and 583 deletions

View File

@ -230,7 +230,7 @@ int main( int argc, char *argv[] )
glutInitWindowPosition( 0, 0 );
glutInitWindowSize( 250, 250 );
glutInitDisplayMode( GLUT_RGB | GLUT_SINGLE | GLUT_DEPTH );
glutCreateWindow(argv[0]);
glutCreateWindow(argv[argc-1]);
glutReshapeFunc( Reshape );
glutKeyboardFunc( Key );
glutDisplayFunc( Display );

View File

@ -26,12 +26,17 @@ C_SOURCES = \
draw_pt_emit.c \
draw_pt_fetch.c \
draw_pt_fetch_emit.c \
draw_pt_fetch_shade_emit.c \
draw_pt_fetch_shade_pipeline.c \
draw_pt_post_vs.c \
draw_pt_util.c \
draw_pt_varray.c \
draw_pt_vcache.c \
draw_vertex.c \
draw_vs.c \
draw_vs_varient.c \
draw_vs_aos.c \
draw_vs_aos_io.c \
draw_vs_exec.c \
draw_vs_llvm.c \
draw_vs_sse.c

View File

@ -56,12 +56,6 @@ struct draw_context *draw_create( void )
draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
tgsi_exec_machine_init(&draw->machine);
/* FIXME: give this machine thing a proper constructor:
*/
draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
if (!draw_pipeline_init( draw ))
goto fail;
@ -69,6 +63,9 @@ struct draw_context *draw_create( void )
if (!draw_pt_init( draw ))
goto fail;
if (!draw_vs_init( draw ))
goto fail;
return draw;
fail:
@ -83,13 +80,6 @@ void draw_destroy( struct draw_context *draw )
return;
if (draw->machine.Inputs)
align_free(draw->machine.Inputs);
if (draw->machine.Outputs)
align_free(draw->machine.Outputs);
tgsi_exec_machine_free_data(&draw->machine);
/* Not so fast -- we're just borrowing this at the moment.
*
@ -99,6 +89,7 @@ void draw_destroy( struct draw_context *draw )
draw_pipeline_destroy( draw );
draw_pt_destroy( draw );
draw_vs_destroy( draw );
FREE( draw );
}
@ -295,7 +286,7 @@ int
draw_find_vs_output(struct draw_context *draw,
uint semantic_name, uint semantic_index)
{
const struct draw_vertex_shader *vs = draw->vertex_shader;
const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == semantic_name &&
@ -320,7 +311,7 @@ draw_find_vs_output(struct draw_context *draw,
uint
draw_num_vs_outputs(struct draw_context *draw)
{
uint count = draw->vertex_shader->info.num_outputs;
uint count = draw->vs.vertex_shader->info.num_outputs;
if (draw->extra_vp_outputs.slot > 0)
count++;
return count;

View File

@ -212,6 +212,71 @@ void draw_pipeline_run( struct draw_context *draw,
draw->pipeline.vertex_count = 0;
}
#define QUAD(i0,i1,i2,i3) \
do_triangle( draw, \
( DRAW_PIPE_RESET_STIPPLE | \
DRAW_PIPE_EDGE_FLAG_0 | \
DRAW_PIPE_EDGE_FLAG_2 ), \
verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
verts + stride * (i1), \
verts + stride * (i3)); \
do_triangle( draw, \
( DRAW_PIPE_EDGE_FLAG_0 | \
DRAW_PIPE_EDGE_FLAG_1 ), \
verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
verts + stride * (i2), \
verts + stride * (i3))
#define TRIANGLE(flags,i0,i1,i2) \
do_triangle( draw, \
flags, /* flags */ \
verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
verts + stride * (i1), \
verts + stride * (i2))
#define LINE(flags,i0,i1) \
do_line( draw, \
flags, \
verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
verts + stride * (i+1))
#define POINT(i0) \
do_point( draw, \
verts + stride * i0 )
#define FUNC pipe_run_linear
#define ARGS \
struct draw_context *draw, \
unsigned prim, \
struct vertex_header *vertices, \
unsigned stride
#define LOCAL_VARS \
char *verts = (char *)vertices; \
boolean flatfirst = (draw->rasterizer->flatshade && \
draw->rasterizer->flatshade_first); \
unsigned i, flags
#define FLUSH
#include "draw_pt_decompose.h"
void draw_pipeline_run_linear( struct draw_context *draw,
unsigned prim,
struct vertex_header *vertices,
unsigned count,
unsigned stride )
{
char *verts = (char *)vertices;
draw->pipeline.verts = verts;
draw->pipeline.vertex_stride = stride;
draw->pipeline.vertex_count = count;
pipe_run_linear(draw, prim, vertices, stride, count);
draw->pipeline.verts = NULL;
draw->pipeline.vertex_count = 0;
}
void draw_pipeline_flush( struct draw_context *draw,

View File

@ -116,7 +116,7 @@ dup_vert( struct draw_stage *stage,
{
struct vertex_header *tmp = stage->tmp[idx];
const uint vsize = sizeof(struct vertex_header)
+ stage->draw->num_vs_outputs * 4 * sizeof(float);
+ stage->draw->vs.num_vs_outputs * 4 * sizeof(float);
memcpy(tmp, vert, vsize);
tmp->vertex_id = UNDEFINED_VERTEX_ID;
return tmp;

View File

@ -653,7 +653,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
}
/* update vertex attrib info */
aaline->tex_slot = draw->num_vs_outputs;
aaline->tex_slot = draw->vs.num_vs_outputs;
assert(aaline->tex_slot > 0); /* output[0] is vertex pos */
/* advertise the extra post-transformed vertex attribute */

View File

@ -681,7 +681,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
bind_aapoint_fragment_shader(aapoint);
/* update vertex attrib info */
aapoint->tex_slot = draw->num_vs_outputs;
aapoint->tex_slot = draw->vs.num_vs_outputs;
assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
@ -692,7 +692,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
aapoint->psize_slot = -1;
if (draw->rasterizer->point_size_per_vertex) {
/* find PSIZ vertex output */
const struct draw_vertex_shader *vs = draw->vertex_shader;
const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {

View File

@ -112,7 +112,7 @@ static void interp( const struct clipper *clip,
const struct vertex_header *out,
const struct vertex_header *in )
{
const unsigned nr_attrs = clip->stage.draw->num_vs_outputs;
const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs;
unsigned j;
/* Vertex header.
@ -180,7 +180,7 @@ static void emit_poly( struct draw_stage *stage,
header.flags |= edge_last;
if (0) {
const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint j, k;
debug_printf("Clipped tri:\n");
for (j = 0; j < 3; j++) {
@ -425,7 +425,7 @@ clip_init_state( struct draw_stage *stage )
clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE;
if (clipper->flat) {
const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint i;
clipper->num_color_attribs = 0;

View File

@ -159,7 +159,7 @@ static void flatshade_line_1( struct draw_stage *stage,
static void flatshade_init_state( struct draw_stage *stage )
{
struct flat_stage *flat = flat_stage(stage);
const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint i;
/* Find which vertex shader outputs are colors, make a list */

View File

@ -71,7 +71,7 @@ screen_interp( struct draw_context *draw,
const struct vertex_header *v1 )
{
uint attr;
for (attr = 0; attr < draw->num_vs_outputs; attr++) {
for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) {
const float *val0 = v0->data[attr];
const float *val1 = v1->data[attr];
float *newv = dst->data[attr];

View File

@ -105,7 +105,7 @@ static void twoside_first_tri( struct draw_stage *stage,
struct prim_header *header )
{
struct twoside_stage *twoside = twoside_stage(stage);
const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint i;
twoside->attrib_front0 = 0;

View File

@ -197,7 +197,7 @@ static void widepoint_first_point( struct draw_stage *stage,
if (draw->rasterizer->point_sprite) {
/* find vertex shader texcoord outputs */
const struct draw_vertex_shader *vs = draw->vertex_shader;
const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i, j = 0;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
@ -212,7 +212,7 @@ static void widepoint_first_point( struct draw_stage *stage,
wide->psize_slot = -1;
if (draw->rasterizer->point_size_per_vertex) {
/* find PSIZ vertex output */
const struct draw_vertex_shader *vs = draw->vertex_shader;
const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {

View File

@ -124,6 +124,7 @@ struct draw_context
struct {
struct {
struct draw_pt_middle_end *fetch_emit;
struct draw_pt_middle_end *fetch_shade_emit;
struct draw_pt_middle_end *general;
} middle;
@ -154,6 +155,7 @@ struct draw_context
const void *constants;
} user;
boolean test_fse;
} pt;
struct {
@ -167,14 +169,27 @@ struct draw_context
/* pipe state that we need: */
const struct pipe_rasterizer_state *rasterizer;
struct pipe_viewport_state viewport;
struct draw_vertex_shader *vertex_shader;
boolean identity_viewport;
uint num_vs_outputs; /**< convenience, from vertex_shader */
struct {
struct draw_vertex_shader *vertex_shader;
uint num_vs_outputs; /**< convenience, from vertex_shader */
/** TGSI program interpreter runtime state */
struct tgsi_exec_machine machine;
/* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
*/
struct gallivm_cpu_engine *engine;
struct translate *fetch;
struct translate_cache *fetch_cache;
struct translate *emit;
struct translate_cache *emit_cache;
} vs;
/* Clip derived state:
*/
float plane[12][4];
@ -190,16 +205,15 @@ struct draw_context
unsigned reduced_prim;
/** TGSI program interpreter runtime state */
struct tgsi_exec_machine machine;
/* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
*/
struct gallivm_cpu_engine *engine;
void *driver_private;
};
/*******************************************************************************
* Vertex shader code:
*/
boolean draw_vs_init( struct draw_context *draw );
void draw_vs_destroy( struct draw_context *draw );
@ -247,6 +261,12 @@ void draw_pipeline_run( struct draw_context *draw,
const ushort *elts,
unsigned count );
void draw_pipeline_run_linear( struct draw_context *draw,
unsigned prim,
struct vertex_header *vertices,
unsigned count,
unsigned stride );
void draw_pipeline_flush( struct draw_context *draw,

View File

@ -64,7 +64,7 @@ draw_pt_arrays(struct draw_context *draw,
opt |= PT_PIPELINE;
}
if (!draw->bypass_clipping) {
if (!draw->bypass_clipping && !draw->pt.test_fse) {
opt |= PT_CLIPTEST;
}
@ -72,16 +72,18 @@ draw_pt_arrays(struct draw_context *draw,
opt |= PT_SHADE;
}
if (opt)
middle = draw->pt.middle.general;
else
if (opt == 0)
middle = draw->pt.middle.fetch_emit;
else if (opt == PT_SHADE && draw->pt.test_fse)
middle = draw->pt.middle.fetch_shade_emit;
else
middle = draw->pt.middle.general;
/* Pick the right frontend
*/
if (draw->pt.user.elts ||
count >= 256) {
if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
frontend = draw->pt.front.vcache;
} else {
frontend = draw->pt.front.varray;
@ -102,6 +104,8 @@ draw_pt_arrays(struct draw_context *draw,
boolean draw_pt_init( struct draw_context *draw )
{
draw->pt.test_fse = GETENV("DRAW_FSE") != NULL;
draw->pt.front.vcache = draw_pt_vcache( draw );
if (!draw->pt.front.vcache)
return FALSE;
@ -114,6 +118,13 @@ boolean draw_pt_init( struct draw_context *draw )
if (!draw->pt.middle.fetch_emit)
return FALSE;
if (draw->pt.test_fse) {
draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
if (!draw->pt.middle.fetch_shade_emit)
return FALSE;
}
draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
if (!draw->pt.middle.general)
return FALSE;
@ -134,6 +145,11 @@ void draw_pt_destroy( struct draw_context *draw )
draw->pt.middle.fetch_emit = NULL;
}
if (draw->pt.middle.fetch_shade_emit) {
draw->pt.middle.fetch_shade_emit->destroy( draw->pt.middle.fetch_shade_emit );
draw->pt.middle.fetch_shade_emit = NULL;
}
if (draw->pt.front.vcache) {
draw->pt.front.vcache->destroy( draw->pt.front.vcache );
draw->pt.front.vcache = NULL;
@ -147,19 +163,6 @@ void draw_pt_destroy( struct draw_context *draw )
static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
PIPE_PRIM_POINTS,
PIPE_PRIM_LINES,
PIPE_PRIM_LINES,
PIPE_PRIM_LINES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES
};
/**
* Draw vertex arrays
@ -172,9 +175,10 @@ void
draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count)
{
if (reduced_prim[prim] != draw->reduced_prim) {
unsigned reduced_prim = draw_pt_reduced_prim(prim);
if (reduced_prim != draw->reduced_prim) {
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
draw->reduced_prim = reduced_prim[prim];
draw->reduced_prim = reduced_prim;
}
/* drawing done here: */

View File

@ -92,6 +92,10 @@ struct draw_pt_middle_end {
const ushort *draw_elts,
unsigned draw_count );
void (*run_linear)(struct draw_pt_middle_end *,
unsigned start,
unsigned count);
void (*finish)( struct draw_pt_middle_end * );
void (*destroy)( struct draw_pt_middle_end * );
};
@ -117,6 +121,7 @@ const void *draw_pt_elt_ptr( struct draw_context *draw,
struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
/* Middle-ends:
*
* Currently one general-purpose case which can do all possibilities,
@ -128,6 +133,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
* vertex_elements.
*/
struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw );
struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw );
struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw);
@ -152,6 +158,13 @@ void draw_pt_emit( struct pt_emit *emit,
const ushort *elts,
unsigned count );
void draw_pt_emit_linear( struct pt_emit *emit,
const float (*vertex_data)[4],
unsigned vertex_count,
unsigned stride,
unsigned start,
unsigned count );
void draw_pt_emit_destroy( struct pt_emit *emit );
struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
@ -170,6 +183,11 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
unsigned count,
char *verts );
void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
unsigned start,
unsigned count,
char *verts );
void draw_pt_fetch_destroy( struct pt_fetch *fetch );
struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw );
@ -194,4 +212,11 @@ struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw );
void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
/*******************************************************************************
* Utils:
*/
void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr);
unsigned draw_pt_reduced_prim(unsigned prim);
#endif

View File

@ -0,0 +1,153 @@
static void FUNC( ARGS,
unsigned count )
{
LOCAL_VARS;
switch (prim) {
case PIPE_PRIM_POINTS:
for (i = 0; i < count; i ++) {
POINT( (i + 0) );
}
break;
case PIPE_PRIM_LINES:
for (i = 0; i+1 < count; i += 2) {
LINE( DRAW_PIPE_RESET_STIPPLE,
(i + 0),
(i + 1));
}
break;
case PIPE_PRIM_LINE_LOOP:
if (count >= 2) {
flags = DRAW_PIPE_RESET_STIPPLE;
for (i = 1; i < count; i++, flags = 0) {
LINE( flags,
(i - 1),
(i ));
}
LINE( flags,
(i - 1),
(0 ));
}
break;
case PIPE_PRIM_LINE_STRIP:
flags = DRAW_PIPE_RESET_STIPPLE;
for (i = 1; i < count; i++, flags = 0) {
LINE( flags,
(i - 1),
(i ));
}
break;
case PIPE_PRIM_TRIANGLES:
for (i = 0; i+2 < count; i += 3) {
TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
(i + 0),
(i + 1),
(i + 2 ));
}
break;
case PIPE_PRIM_TRIANGLE_STRIP:
if (flatfirst) {
for (i = 0; i+2 < count; i++) {
TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
(i + 0),
(i + 1 + (i&1)),
(i + 2 - (i&1)));
}
}
else {
for (i = 0; i+2 < count; i++) {
TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
(i + 0 + (i&1)),
(i + 1 - (i&1)),
(i + 2 ));
}
}
break;
case PIPE_PRIM_TRIANGLE_FAN:
if (count >= 3) {
if (flatfirst) {
for (i = 0; i+2 < count; i++) {
TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
(i + 1),
(i + 2),
(0 ));
}
}
else {
for (i = 0; i+2 < count; i++) {
TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
(0),
(i + 1),
(i + 2 ));
}
}
}
break;
case PIPE_PRIM_QUADS:
for (i = 0; i+3 < count; i += 4) {
QUAD( (i + 0),
(i + 1),
(i + 2),
(i + 3));
}
break;
case PIPE_PRIM_QUAD_STRIP:
for (i = 0; i+3 < count; i += 2) {
QUAD( (i + 2),
(i + 0),
(i + 1),
(i + 3));
}
break;
case PIPE_PRIM_POLYGON:
{
/* These bitflags look a little odd because we submit the
* vertices as (1,2,0) to satisfy flatshade requirements.
*/
const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
for (i = 0; i+2 < count; i++, flags = edge_middle) {
if (i + 3 == count)
flags |= edge_last;
TRIANGLE( flags,
(i + 1),
(i + 2),
(0));
}
}
break;
default:
assert(0);
break;
}
FLUSH;
}
#undef TRIANGLE
#undef QUAD
#undef POINT
#undef LINE
#undef FUNC

View File

@ -40,6 +40,9 @@ struct pt_emit {
struct translate *translate;
struct translate_cache *cache;
unsigned prim;
const struct vertex_info *vinfo;
};
void draw_pt_emit_prepare( struct pt_emit *emit,
@ -51,8 +54,18 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
struct translate_key hw_key;
unsigned i;
boolean ok;
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
ok = draw->render->set_primitive(draw->render, prim);
/* XXX: may need to defensively reset this later on as clipping can
* clobber this state in the render backend.
*/
emit->prim = prim;
ok = draw->render->set_primitive(draw->render, emit->prim);
if (!ok) {
assert(0);
return;
@ -60,7 +73,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
/* Must do this after set_primitive() above:
*/
vinfo = draw->render->get_vertex_info(draw->render);
emit->vinfo = vinfo = draw->render->get_vertex_info(draw->render);
/* Translate from pipeline vertices to hw vertices.
@ -100,6 +113,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
case EMIT_4UB:
output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
emit_sz = 4 * sizeof(ubyte);
break;
default:
assert(0);
output_format = PIPE_FORMAT_NONE;
@ -144,6 +158,14 @@ void draw_pt_emit( struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
if (!draw->render->set_primitive(draw->render, emit->prim)) {
assert(0);
return;
}
hw_verts = render->allocate_vertices(render,
(ushort)translate->key.output_stride,
(ushort)vertex_count);
@ -178,6 +200,72 @@ void draw_pt_emit( struct pt_emit *emit,
}
void draw_pt_emit_linear(struct pt_emit *emit,
const float (*vertex_data)[4],
unsigned vertex_count,
unsigned stride,
unsigned start,
unsigned count)
{
struct draw_context *draw = emit->draw;
struct translate *translate = emit->translate;
struct vbuf_render *render = draw->render;
void *hw_verts;
#if 0
debug_printf("Linear emit\n");
#endif
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
if (!draw->render->set_primitive(draw->render, emit->prim)) {
assert(0);
return;
}
hw_verts = render->allocate_vertices(render,
(ushort)translate->key.output_stride,
(ushort)count);
if (!hw_verts) {
assert(0);
return;
}
translate->set_buffer(translate, 0,
vertex_data, stride);
translate->set_buffer(translate, 1,
&draw->rasterizer->point_size,
0);
translate->run(translate,
0,
vertex_count,
hw_verts);
if (0) {
unsigned i;
for (i = 0; i < vertex_count; i++) {
debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i);
draw_dump_emitted_vertex( emit->vinfo,
(const uint8_t *)hw_verts +
translate->key.output_stride * i );
}
}
render->draw_arrays(render, start, count);
render->release_vertices(render,
hw_verts,
translate->key.output_stride,
vertex_count);
}
struct pt_emit *draw_pt_emit_create( struct draw_context *draw )
{
struct pt_emit *emit = CALLOC_STRUCT(pt_emit);

View File

@ -166,6 +166,42 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
}
void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
unsigned start,
unsigned count,
char *verts )
{
struct draw_context *draw = fetch->draw;
struct translate *translate = fetch->translate;
unsigned i;
for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
translate->set_buffer(translate,
i,
((char *)draw->pt.user.vbuffer[i] +
draw->pt.vertex_buffer[i].buffer_offset),
draw->pt.vertex_buffer[i].pitch );
}
translate->run( translate,
start,
count,
verts );
/* Edgeflags are hard to fit into a translate program, populate
* them separately if required. In the setup above they are
* defaulted to one, so only need this if there is reason to change
* that default:
*/
if (fetch->need_edgeflags) {
for (i = 0; i < count; i++) {
struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size);
vh->edgeflag = draw_pt_get_edgeflag( draw, start + i );
}
}
}
struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw )
{
struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch);

View File

@ -258,6 +258,59 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
}
static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
void *hw_verts;
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
hw_verts = draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)count );
if (!hw_verts) {
assert(0);
return;
}
/* Single routine to fetch vertices and emit HW verts.
*/
feme->translate->run( feme->translate,
start,
count,
hw_verts );
if (0) {
unsigned i;
for (i = 0; i < count; i++) {
debug_printf("\n\nvertex %d:\n", i);
draw_dump_emitted_vertex( feme->vinfo,
(const uint8_t *)hw_verts + feme->vinfo->size * 4 * i );
}
}
/* XXX: Draw arrays path to avoid re-emitting index list again and
* again.
*/
draw->render->draw_arrays( draw->render,
0, /*start*/
count );
/* Done -- that was easy, wasn't it:
*/
draw->render->release_vertices( draw->render,
hw_verts,
feme->translate->key.output_stride,
count );
}
static void fetch_emit_finish( struct draw_pt_middle_end *middle )
{
@ -287,10 +340,11 @@ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw )
return NULL;
}
fetch_emit->base.prepare = fetch_emit_prepare;
fetch_emit->base.run = fetch_emit_run;
fetch_emit->base.finish = fetch_emit_finish;
fetch_emit->base.destroy = fetch_emit_destroy;
fetch_emit->base.prepare = fetch_emit_prepare;
fetch_emit->base.run = fetch_emit_run;
fetch_emit->base.run_linear = fetch_emit_run_linear;
fetch_emit->base.finish = fetch_emit_finish;
fetch_emit->base.destroy = fetch_emit_destroy;
fetch_emit->draw = draw;

View File

@ -0,0 +1,344 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "pipe/p_util.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
#include "draw/draw_vs.h"
#include "translate/translate.h"
struct fetch_shade_emit;
/* Prototype fetch, shade, emit-hw-verts all in one go.
*/
struct fetch_shade_emit {
struct draw_pt_middle_end base;
struct draw_context *draw;
/* Temporaries:
*/
const float *constants;
unsigned pitch[PIPE_MAX_ATTRIBS];
const ubyte *src[PIPE_MAX_ATTRIBS];
unsigned prim;
struct draw_vs_varient_key key;
struct draw_vs_varient *active;
const struct vertex_info *vinfo;
};
static void fse_prepare( struct draw_pt_middle_end *middle,
unsigned prim,
unsigned opt )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs;
const struct vertex_info *vinfo;
unsigned i;
if (!draw->render->set_primitive( draw->render,
prim )) {
assert(0);
return;
}
/* Must do this after set_primitive() above:
*/
fse->vinfo = vinfo = draw->render->get_vertex_info(draw->render);
fse->key.output_stride = vinfo->size * 4;
fse->key.nr_outputs = vinfo->num_attribs;
fse->key.nr_inputs = num_vs_inputs;
fse->key.nr_elements = MAX2(fse->key.nr_outputs, /* outputs - translate to hw format */
fse->key.nr_inputs); /* inputs - fetch from api format */
fse->key.viewport = !draw->identity_viewport;
fse->key.clip = !draw->bypass_clipping;
fse->key.pad = 0;
memset(fse->key.element, 0,
fse->key.nr_elements * sizeof(fse->key.element[0]));
for (i = 0; i < num_vs_inputs; i++) {
const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
fse->key.element[i].in.format = src->src_format;
/* Consider ignoring these, ie make generated programs
* independent of this state:
*/
fse->key.element[i].in.buffer = src->vertex_buffer_index;
fse->key.element[i].in.offset = src->src_offset;
}
{
unsigned dst_offset = 0;
for (i = 0; i < vinfo->num_attribs; i++) {
unsigned emit_sz = 0;
switch (vinfo->emit[i]) {
case EMIT_4F:
emit_sz = 4 * sizeof(float);
break;
case EMIT_3F:
emit_sz = 3 * sizeof(float);
break;
case EMIT_2F:
emit_sz = 2 * sizeof(float);
break;
case EMIT_1F:
emit_sz = 1 * sizeof(float);
break;
case EMIT_1F_PSIZE:
emit_sz = 1 * sizeof(float);
break;
case EMIT_4UB:
emit_sz = 4 * sizeof(ubyte);
break;
default:
assert(0);
break;
}
/* The elements in the key correspond to vertex shader output
* numbers, not to positions in the hw vertex description --
* that's handled by the output_offset field.
*/
fse->key.element[i].out.format = vinfo->emit[i];
fse->key.element[i].out.vs_output = vinfo->src_index[i];
fse->key.element[i].out.offset = dst_offset;
dst_offset += emit_sz;
assert(fse->key.output_stride >= dst_offset);
}
}
/* Would normally look up a vertex shader and peruse its list of
* varients somehow. We omitted that step and put all the
* hardcoded "shaders" into an array. We're just making the
* assumption that this happens to be a matching shader... ie
* you're running isosurf, aren't you?
*/
fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader,
&fse->key );
if (!fse->active) {
assert(0);
return ;
}
/* Now set buffer pointers:
*/
for (i = 0; i < num_vs_inputs; i++) {
unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index;
fse->active->set_input( fse->active,
i,
((const ubyte *) draw->pt.user.vbuffer[buf] +
draw->pt.vertex_buffer[buf].buffer_offset),
draw->pt.vertex_buffer[buf].pitch );
}
fse->active->set_constants( fse->active,
(const float (*)[4])draw->pt.user.constants );
fse->active->set_viewport( fse->active,
&draw->viewport );
//return TRUE;
}
static void fse_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
unsigned alloc_count = align(count, 4);
char *hw_verts;
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
hw_verts = draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)alloc_count );
if (!hw_verts) {
assert(0);
return;
}
/* Single routine to fetch vertices, run shader and emit HW verts.
* Clipping is done elsewhere -- either by the API or on hardware,
* or for some other reason not required...
*/
fse->active->run_linear( fse->active,
start, count,
hw_verts );
/* Draw arrays path to avoid re-emitting index list again and
* again.
*/
draw->render->draw_arrays( draw->render,
0,
count );
if (0) {
unsigned i;
for (i = 0; i < count; i++) {
debug_printf("\n\n%s vertex %d: (stride %d, offset %d)\n", __FUNCTION__, i,
fse->key.output_stride,
fse->key.output_stride * i);
draw_dump_emitted_vertex( fse->vinfo,
(const uint8_t *)hw_verts + fse->key.output_stride * i );
}
}
draw->render->release_vertices( draw->render,
hw_verts,
fse->key.output_stride,
count );
}
static void
fse_run(struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
unsigned draw_count )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
unsigned alloc_count = align(fetch_count, 4);
void *hw_verts;
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
hw_verts = draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)alloc_count );
if (!hw_verts) {
assert(0);
return;
}
/* Single routine to fetch vertices, run shader and emit HW verts.
*/
fse->active->run_elts( fse->active,
fetch_elts,
fetch_count,
hw_verts );
draw->render->draw( draw->render,
draw_elts,
draw_count );
if (0) {
unsigned i;
for (i = 0; i < fetch_count; i++) {
debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i);
draw_dump_emitted_vertex( fse->vinfo,
(const uint8_t *)hw_verts +
fse->key.output_stride * i );
}
}
draw->render->release_vertices( draw->render,
hw_verts,
fse->key.output_stride,
fetch_count );
}
static void fse_finish( struct draw_pt_middle_end *middle )
{
}
static void
fse_destroy( struct draw_pt_middle_end *middle )
{
FREE(middle);
}
struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw )
{
struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit);
if (!fse)
return NULL;
fse->base.prepare = fse_prepare;
fse->base.run = fse_run;
fse->base.run_linear = fse_run_linear;
fse->base.finish = fse_finish;
fse->base.destroy = fse_destroy;
fse->draw = draw;
return &fse->base;
}

View File

@ -55,7 +55,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *vs = draw->vertex_shader;
struct draw_vertex_shader *vs = draw->vs.vertex_shader;
/* Add one to num_outputs because the pipeline occasionally tags on
* an additional texcoord, eg for AA lines.
@ -107,7 +107,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *shader = draw->vertex_shader;
struct draw_vertex_shader *shader = draw->vs.vertex_shader;
unsigned opt = fpme->opt;
unsigned alloc_count = align_int( fetch_count, 4 );
@ -162,7 +162,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
fpme->vertex_size,
draw_elts,
draw_count );
}
}
else {
draw_pt_emit( fpme->emit,
(const float (*)[4])pipeline_verts->data,
@ -177,6 +177,79 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
}
static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count)
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *shader = draw->vs.vertex_shader;
unsigned opt = fpme->opt;
unsigned alloc_count = align_int( count, 4 );
struct vertex_header *pipeline_verts =
(struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
if (!pipeline_verts) {
/* Not much we can do here - just skip the rendering.
*/
assert(0);
return;
}
/* Fetch into our vertex buffer
*/
draw_pt_fetch_run_linear( fpme->fetch,
start,
count,
(char *)pipeline_verts );
/* Run the shader, note that this overwrites the data[] parts of
* the pipeline verts. If there is no shader, ie a bypass shader,
* then the inputs == outputs, and are already in the correct
* place.
*/
if (opt & PT_SHADE)
{
shader->run_linear(shader,
(const float (*)[4])pipeline_verts->data,
( float (*)[4])pipeline_verts->data,
(const float (*)[4])draw->pt.user.constants,
count,
fpme->vertex_size,
fpme->vertex_size);
}
if (draw_pt_post_vs_run( fpme->post_vs,
pipeline_verts,
count,
fpme->vertex_size ))
{
opt |= PT_PIPELINE;
}
/* Do we need to run the pipeline?
*/
if (opt & PT_PIPELINE) {
draw_pipeline_run_linear( fpme->draw,
fpme->prim,
pipeline_verts,
count,
fpme->vertex_size);
}
else {
draw_pt_emit_linear( fpme->emit,
(const float (*)[4])pipeline_verts->data,
count,
fpme->vertex_size,
0, /*start*/
count );
}
FREE(pipeline_verts);
}
static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
{
@ -206,10 +279,11 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *
if (!fpme)
goto fail;
fpme->base.prepare = fetch_pipeline_prepare;
fpme->base.run = fetch_pipeline_run;
fpme->base.finish = fetch_pipeline_finish;
fpme->base.destroy = fetch_pipeline_destroy;
fpme->base.prepare = fetch_pipeline_prepare;
fpme->base.run = fetch_pipeline_run;
fpme->base.run_linear = fetch_pipeline_linear_run;
fpme->base.finish = fetch_pipeline_finish;
fpme->base.destroy = fetch_pipeline_destroy;
fpme->draw = draw;

View File

@ -0,0 +1,103 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "pipe/p_util.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
{
switch (prim) {
case PIPE_PRIM_POINTS:
*first = 1;
*incr = 1;
break;
case PIPE_PRIM_LINES:
*first = 2;
*incr = 2;
break;
case PIPE_PRIM_LINE_STRIP:
case PIPE_PRIM_LINE_LOOP:
*first = 2;
*incr = 1;
break;
case PIPE_PRIM_TRIANGLES:
*first = 3;
*incr = 3;
break;
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_TRIANGLE_FAN:
case PIPE_PRIM_POLYGON:
*first = 3;
*incr = 1;
break;
case PIPE_PRIM_QUADS:
*first = 4;
*incr = 4;
break;
case PIPE_PRIM_QUAD_STRIP:
*first = 4;
*incr = 2;
break;
default:
assert(0);
*first = 0;
*incr = 1; /* set to one so that count % incr works */
break;
}
}
unsigned draw_pt_reduced_prim(unsigned prim)
{
switch (prim) {
case PIPE_PRIM_POINTS:
return PIPE_PRIM_POINTS;
case PIPE_PRIM_LINES:
case PIPE_PRIM_LINE_STRIP:
case PIPE_PRIM_LINE_LOOP:
return PIPE_PRIM_LINES;
case PIPE_PRIM_TRIANGLES:
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_TRIANGLE_FAN:
case PIPE_PRIM_POLYGON:
case PIPE_PRIM_QUADS:
case PIPE_PRIM_QUAD_STRIP:
return PIPE_PRIM_TRIANGLES;
default:
assert(0);
return PIPE_PRIM_POINTS;
}
}

View File

@ -43,6 +43,8 @@ struct varray_frontend {
unsigned draw_count;
unsigned fetch_count;
unsigned fetch_start;
struct draw_pt_middle_end *middle;
unsigned input_prim;
@ -56,6 +58,11 @@ static void varray_flush(struct varray_frontend *varray)
debug_printf("FLUSH fc = %d, dc = %d\n",
varray->fetch_count,
varray->draw_count);
debug_printf("\telt0 = %d, eltx = %d, draw0 = %d, drawx = %d\n",
varray->fetch_elts[0],
varray->fetch_elts[varray->fetch_count-1],
varray->draw_elts[0],
varray->draw_elts[varray->draw_count-1]);
#endif
varray->middle->run(varray->middle,
varray->fetch_elts,
@ -68,20 +75,43 @@ static void varray_flush(struct varray_frontend *varray)
varray->draw_count = 0;
}
#if 0
static void varray_check_flush(struct varray_frontend *varray)
static void varray_flush_linear(struct varray_frontend *varray,
unsigned start, unsigned count)
{
if (varray->draw_count + 6 >= DRAW_MAX/* ||
varray->fetch_count + 4 >= FETCH_MAX*/) {
varray_flush(varray);
if (count) {
#if 0
debug_printf("FLUSH LINEAR start = %d, count = %d\n",
start,
count);
#endif
assert(varray->middle->run_linear);
varray->middle->run_linear(varray->middle, start, count);
}
}
static INLINE void fetch_init(struct varray_frontend *varray,
unsigned count)
{
unsigned idx;
#if 0
debug_printf("FETCH INIT c = %d, fs = %d\n",
count,
varray->fetch_start);
#endif
for (idx = 0; idx < count; ++idx) {
varray->fetch_elts[idx] = varray->fetch_start + idx;
}
varray->fetch_start += idx;
varray->fetch_count = idx;
}
static INLINE void add_draw_el(struct varray_frontend *varray,
int idx, ushort flags)
int idx)
{
varray->draw_elts[varray->draw_count++] = idx | flags;
varray->draw_elts[varray->draw_count++] = idx;
}
@ -90,106 +120,52 @@ static INLINE void varray_triangle( struct varray_frontend *varray,
unsigned i1,
unsigned i2 )
{
add_draw_el(varray, i0, 0);
add_draw_el(varray, i1, 0);
add_draw_el(varray, i2, 0);
}
static INLINE void varray_triangle_flags( struct varray_frontend *varray,
ushort flags,
unsigned i0,
unsigned i1,
unsigned i2 )
{
add_draw_el(varray, i0, flags);
add_draw_el(varray, i1, 0);
add_draw_el(varray, i2, 0);
add_draw_el(varray, i0);
add_draw_el(varray, i1);
add_draw_el(varray, i2);
}
static INLINE void varray_line( struct varray_frontend *varray,
unsigned i0,
unsigned i1 )
{
add_draw_el(varray, i0, 0);
add_draw_el(varray, i1, 0);
}
static INLINE void varray_line_flags( struct varray_frontend *varray,
ushort flags,
unsigned i0,
unsigned i1 )
{
add_draw_el(varray, i0, flags);
add_draw_el(varray, i1, 0);
add_draw_el(varray, i0);
add_draw_el(varray, i1);
}
static INLINE void varray_point( struct varray_frontend *varray,
unsigned i0 )
{
add_draw_el(varray, i0, 0);
add_draw_el(varray, i0);
}
static INLINE void varray_quad( struct varray_frontend *varray,
unsigned i0,
unsigned i1,
unsigned i2,
unsigned i3 )
{
varray_triangle( varray, i0, i1, i3 );
varray_triangle( varray, i1, i2, i3 );
}
static INLINE void varray_ef_quad( struct varray_frontend *varray,
unsigned i0,
unsigned i1,
unsigned i2,
unsigned i3 )
{
const ushort omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2;
const ushort omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1;
varray_triangle_flags( varray,
DRAW_PIPE_RESET_STIPPLE | omitEdge1,
i0, i1, i3 );
varray_triangle_flags( varray,
omitEdge2,
i1, i2, i3 );
}
/* At least for now, we're back to using a template include file for
* this. The two paths aren't too different though - it may be
* possible to reunify them.
*/
#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle_flags(vc,flags,i0,i1,i2)
#define QUAD(vc,i0,i1,i2,i3) varray_ef_quad(vc,i0,i1,i2,i3)
#define LINE(vc,flags,i0,i1) varray_line_flags(vc,flags,i0,i1)
#define POINT(vc,i0) varray_point(vc,i0)
#define FUNC varray_run_extras
#include "draw_pt_varray_tmp.h"
#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle(vc,i0,i1,i2)
#define QUAD(vc,i0,i1,i2,i3) varray_quad(vc,i0,i1,i2,i3)
#define LINE(vc,flags,i0,i1) varray_line(vc,i0,i1)
#if 0
#define TRIANGLE(flags,i0,i1,i2) varray_triangle(varray,i0,i1,i2)
#define LINE(flags,i0,i1) varray_line(varray,i0,i1)
#define POINT(i0) varray_point(varray,i0)
#define FUNC varray_decompose
#include "draw_pt_decompose.h"
#else
#define TRIANGLE(vc,i0,i1,i2) varray_triangle(vc,i0,i1,i2)
#define LINE(vc,i0,i1) varray_line(vc,i0,i1)
#define POINT(vc,i0) varray_point(vc,i0)
#define FUNC varray_run
#include "draw_pt_varray_tmp.h"
#include "draw_pt_varray_tmp_linear.h"
#endif
static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = {
PIPE_PRIM_POINTS,
PIPE_PRIM_LINES,
PIPE_PRIM_LINES,
PIPE_PRIM_LINES,
PIPE_PRIM_LINES, /* decomposed LINELOOP */
PIPE_PRIM_LINE_STRIP,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES
PIPE_PRIM_TRIANGLE_STRIP,
PIPE_PRIM_TRIANGLES, /* decomposed TRI_FAN */
PIPE_PRIM_QUADS,
PIPE_PRIM_QUAD_STRIP,
PIPE_PRIM_TRIANGLES /* decomposed POLYGON */
};
@ -201,17 +177,10 @@ static void varray_prepare(struct draw_pt_front_end *frontend,
{
struct varray_frontend *varray = (struct varray_frontend *)frontend;
if (opt & PT_PIPELINE)
{
varray->base.run = varray_run_extras;
}
else
{
varray->base.run = varray_run;
}
varray->base.run = varray_run;
varray->input_prim = prim;
varray->output_prim = reduced_prim[prim];
varray->output_prim = decompose_prim[prim];
varray->middle = middle;
middle->prepare(middle, varray->output_prim, opt);

View File

@ -10,32 +10,44 @@ static void FUNC(struct draw_pt_front_end *frontend,
boolean flatfirst = (draw->rasterizer->flatshade &&
draw->rasterizer->flatshade_first);
unsigned i, flags;
unsigned i, j, flags;
unsigned first, incr;
varray->fetch_start = start;
draw_pt_split_prim(varray->input_prim, &first, &incr);
#if 0
debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count);
#endif
#if 0
debug_printf("INPUT PRIM = %d (start = %d, count = %d)\n", varray->input_prim,
debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
varray->input_prim,
start, count);
#endif
for (i = 0; i < count; ++i) {
varray->fetch_elts[i] = start + i;
}
varray->fetch_count = count;
switch (varray->input_prim) {
case PIPE_PRIM_POINTS:
for (i = 0; i < count; i ++) {
POINT(varray, i + 0);
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i < end; i++) {
POINT(varray, i + 0);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
break;
case PIPE_PRIM_LINES:
for (i = 0; i+1 < count; i += 2) {
LINE(varray, DRAW_PIPE_RESET_STIPPLE,
i + 0, i + 1);
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+1 < end; i += 2) {
LINE(varray, DRAW_PIPE_RESET_STIPPLE,
i + 0, i + 1);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
break;
@ -43,38 +55,81 @@ static void FUNC(struct draw_pt_front_end *frontend,
if (count >= 2) {
flags = DRAW_PIPE_RESET_STIPPLE;
for (i = 1; i < count; i++, flags = 0) {
LINE(varray, flags, i - 1, i);
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 1; i < end; i++, flags = 0) {
LINE(varray, flags, i - 1, i);
}
LINE(varray, flags, i - 1, 0);
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
LINE(varray, flags, i - 1, 0);
}
break;
case PIPE_PRIM_LINE_STRIP:
flags = DRAW_PIPE_RESET_STIPPLE;
for (i = 1; i < count; i++, flags = 0) {
LINE(varray, flags, i - 1, i);
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 1; i < end; i++, flags = 0) {
LINE(varray, flags, i - 1, i);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
break;
case PIPE_PRIM_TRIANGLES:
for (i = 0; i+2 < count; i += 3) {
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0, i + 1, i + 2);
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+2 < end; i += 3) {
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0, i + 1, i + 2);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
break;
case PIPE_PRIM_TRIANGLE_STRIP:
if (flatfirst) {
for (i = 0; i+2 < count; i++) {
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0, i + 1 + (i&1), i + 2 - (i&1));
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+2 < end; i++) {
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0, i + 1 + (i&1), i + 2 - (i&1));
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
if (j + first + i <= count) {
varray->fetch_start -= 2;
i -= 2;
}
}
}
else {
for (i = 0; i+2 < count; i++) {
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0 + (i&1), i + 1 - (i&1), i + 2);
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i + 2 < end; i++) {
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0 + (i&1), i + 1 - (i&1), i + 2);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
if (j + first + i <= count) {
varray->fetch_start -= 2;
i -= 2;
}
}
}
break;
@ -83,51 +138,89 @@ static void FUNC(struct draw_pt_front_end *frontend,
if (count >= 3) {
if (flatfirst) {
flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
for (i = 0; i+2 < count; i++) {
TRIANGLE(varray, flags, i + 1, i + 2, 0);
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+2 < end; i++) {
TRIANGLE(varray, flags, i + 1, i + 2, 0);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
}
else {
flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
for (i = 0; i+2 < count; i++) {
TRIANGLE(varray, flags, 0, i + 1, i + 2);
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+2 < end; i++) {
TRIANGLE(varray, flags, 0, i + 1, i + 2);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
}
}
break;
case PIPE_PRIM_QUADS:
for (i = 0; i+3 < count; i += 4) {
QUAD(varray, i + 0, i + 1, i + 2, i + 3);
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+3 < end; i += 4) {
QUAD(varray, i + 0, i + 1, i + 2, i + 3);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
break;
case PIPE_PRIM_QUAD_STRIP:
for (i = 0; i+3 < count; i += 2) {
QUAD(varray, i + 2, i + 0, i + 1, i + 3);
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+3 < end; i += 2) {
QUAD(varray, i + 2, i + 0, i + 1, i + 3);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
if (j + first + i <= count) {
varray->fetch_start -= 2;
i -= 2;
}
}
break;
case PIPE_PRIM_POLYGON:
{
/* These bitflags look a little odd because we submit the
* vertices as (1,2,0) to satisfy flatshade requirements.
*/
const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
/* These bitflags look a little odd because we submit the
* vertices as (1,2,0) to satisfy flatshade requirements.
*/
const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
for (i = 0; i+2 < count; i++, flags = edge_middle) {
flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+2 < end; i++, flags = edge_middle) {
if (i + 3 == count)
flags |= edge_last;
TRIANGLE(varray, flags, i + 1, i + 2, 0);
}
TRIANGLE(varray, flags, i + 1, i + 2, 0);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
break;
}
break;
default:
assert(0);

View File

@ -0,0 +1,94 @@
static unsigned trim( unsigned count, unsigned first, unsigned incr )
{
return count - (count - first) % incr;
}
static void FUNC(struct draw_pt_front_end *frontend,
pt_elt_func get_elt,
const void *elts,
unsigned count)
{
struct varray_frontend *varray = (struct varray_frontend *)frontend;
unsigned start = (unsigned)elts;
unsigned i, j;
unsigned first, incr;
varray->fetch_start = start;
draw_pt_split_prim(varray->input_prim, &first, &incr);
/* Sanitize primitive length:
*/
count = trim(count, first, incr);
if (count < first)
return;
#if 0
debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
varray->input_prim,
start, count);
#endif
switch (varray->input_prim) {
case PIPE_PRIM_POINTS:
case PIPE_PRIM_LINES:
case PIPE_PRIM_TRIANGLES:
case PIPE_PRIM_LINE_STRIP:
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_QUADS:
case PIPE_PRIM_QUAD_STRIP:
for (j = 0; j < count;) {
unsigned remaining = count - j;
unsigned nr = trim( MIN2(FETCH_MAX, remaining), first, incr );
varray_flush_linear(varray, start + j, nr);
j += nr;
if (nr != remaining)
j -= (first - incr);
}
break;
case PIPE_PRIM_LINE_LOOP:
if (count >= 2) {
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 1; i < end; i++) {
LINE(varray, i - 1, i);
}
LINE(varray, i - 1, 0);
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
}
break;
case PIPE_PRIM_POLYGON:
case PIPE_PRIM_TRIANGLE_FAN:
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 2; i < end; i++) {
TRIANGLE(varray, 0, i - 1, i);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
}
break;
default:
assert(0);
break;
}
varray_flush(varray);
}
#undef TRIANGLE
#undef QUAD
#undef POINT
#undef LINE
#undef FUNC

View File

@ -171,15 +171,15 @@ static void vcache_ef_quad( struct vcache_frontend *vcache,
unsigned i2,
unsigned i3 )
{
const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2;
const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1;
vcache_triangle_flags( vcache,
DRAW_PIPE_RESET_STIPPLE | omitEdge1,
vcache_triangle_flags( vcache,
( DRAW_PIPE_RESET_STIPPLE |
DRAW_PIPE_EDGE_FLAG_0 |
DRAW_PIPE_EDGE_FLAG_2 ),
i0, i1, i3 );
vcache_triangle_flags( vcache,
omitEdge2,
vcache_triangle_flags( vcache,
( DRAW_PIPE_EDGE_FLAG_0 |
DRAW_PIPE_EDGE_FLAG_1 ),
i1, i2, i3 );
}
@ -204,19 +204,6 @@ static void vcache_ef_quad( struct vcache_frontend *vcache,
static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
PIPE_PRIM_POINTS,
PIPE_PRIM_LINES,
PIPE_PRIM_LINES,
PIPE_PRIM_LINES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLES
};
static void vcache_prepare( struct draw_pt_front_end *frontend,
@ -236,7 +223,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend,
}
vcache->input_prim = prim;
vcache->output_prim = reduced_prim[prim];
vcache->output_prim = draw_pt_reduced_prim(prim);
vcache->middle = middle;
middle->prepare( middle, vcache->output_prim, opt );

View File

@ -109,4 +109,25 @@ extern void draw_compute_vertex_size(struct vertex_info *vinfo);
void draw_dump_emitted_vertex(const struct vertex_info *vinfo,
const uint8_t *data);
static INLINE unsigned draw_translate_vinfo_format(unsigned format )
{
switch (format) {
case EMIT_1F:
case EMIT_1F_PSIZE:
return PIPE_FORMAT_R32_FLOAT;
case EMIT_2F:
return PIPE_FORMAT_R32G32_FLOAT;
case EMIT_3F:
return PIPE_FORMAT_R32G32B32_FLOAT;
case EMIT_4F:
return PIPE_FORMAT_R32G32B32A32_FLOAT;
case EMIT_4UB:
return PIPE_FORMAT_R8G8B8A8_UNORM;
default:
return PIPE_FORMAT_NONE;
}
}
#endif /* DRAW_VERTEX_H */

View File

@ -36,6 +36,8 @@
#include "draw_private.h"
#include "draw_context.h"
#include "draw_vs.h"
#include "translate/translate.h"
#include "translate/translate_cache.h"
@ -66,13 +68,13 @@ draw_bind_vertex_shader(struct draw_context *draw,
if (dvs)
{
draw->vertex_shader = dvs;
draw->num_vs_outputs = dvs->info.num_outputs;
draw->vs.vertex_shader = dvs;
draw->vs.num_vs_outputs = dvs->info.num_outputs;
dvs->prepare( dvs, draw );
}
else {
draw->vertex_shader = NULL;
draw->num_vs_outputs = 0;
draw->vs.vertex_shader = NULL;
draw->vs.num_vs_outputs = 0;
}
}
@ -83,3 +85,109 @@ draw_delete_vertex_shader(struct draw_context *draw,
{
dvs->delete( dvs );
}
boolean
draw_vs_init( struct draw_context *draw )
{
tgsi_exec_machine_init(&draw->vs.machine);
/* FIXME: give this machine thing a proper constructor:
*/
draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
if (!draw->vs.machine.Inputs)
return FALSE;
draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
if (!draw->vs.machine.Outputs)
return FALSE;
draw->vs.emit_cache = translate_cache_create();
if (!draw->vs.emit_cache)
return FALSE;
draw->vs.fetch_cache = translate_cache_create();
if (!draw->vs.fetch_cache)
return FALSE;
return TRUE;
}
void
draw_vs_destroy( struct draw_context *draw )
{
if (draw->vs.machine.Inputs)
align_free(draw->vs.machine.Inputs);
if (draw->vs.machine.Outputs)
align_free(draw->vs.machine.Outputs);
if (draw->vs.fetch_cache)
translate_cache_destroy(draw->vs.fetch_cache);
if (draw->vs.emit_cache)
translate_cache_destroy(draw->vs.emit_cache);
tgsi_exec_machine_free_data(&draw->vs.machine);
}
struct draw_vs_varient *
draw_vs_lookup_varient( struct draw_vertex_shader *vs,
const struct draw_vs_varient_key *key )
{
struct draw_vs_varient *varient;
unsigned i;
/* Lookup existing varient:
*/
for (i = 0; i < vs->nr_varients; i++)
if (draw_vs_varient_key_compare(key, &vs->varient[i]->key) == 0)
return vs->varient[i];
/* Else have to create a new one:
*/
varient = vs->create_varient( vs, key );
if (varient == NULL)
return NULL;
/* Add it to our list:
*/
assert(vs->nr_varients < Elements(vs->varient));
vs->varient[vs->nr_varients++] = varient;
/* Done
*/
return varient;
}
struct translate *
draw_vs_get_fetch( struct draw_context *draw,
struct translate_key *key )
{
if (!draw->vs.fetch ||
translate_key_compare(&draw->vs.fetch->key, key) != 0)
{
translate_key_sanitize(key);
draw->vs.fetch = translate_cache_find(draw->vs.fetch_cache, key);
}
return draw->vs.fetch;
}
struct translate *
draw_vs_get_emit( struct draw_context *draw,
struct translate_key *key )
{
if (!draw->vs.emit ||
translate_key_compare(&draw->vs.emit->key, key) != 0)
{
translate_key_sanitize(key);
draw->vs.emit = translate_cache_find(draw->vs.emit_cache, key);
}
return draw->vs.emit;
}

View File

@ -38,10 +38,84 @@
struct draw_context;
struct pipe_shader_state;
struct draw_varient_input
{
enum pipe_format format;
unsigned buffer;
unsigned offset;
};
struct draw_varient_output
{
enum pipe_format format; /* output format */
unsigned vs_output:8; /* which vertex shader output is this? */
unsigned offset:24; /* offset into output vertex */
};
struct draw_varient_element {
struct draw_varient_input in;
struct draw_varient_output out;
};
struct draw_vs_varient_key {
unsigned output_stride;
unsigned nr_elements:8; /* max2(nr_inputs, nr_outputs) */
unsigned nr_inputs:8;
unsigned nr_outputs:8;
unsigned viewport:1;
unsigned clip:1;
unsigned pad:5;
struct draw_varient_element element[PIPE_MAX_ATTRIBS];
};
struct draw_vs_varient;
typedef void (PIPE_CDECL *vsv_run_elts_func)( struct draw_vs_varient *,
const unsigned *elts,
unsigned count,
void *output_buffer);
typedef void (PIPE_CDECL *vsv_run_linear_func)( struct draw_vs_varient *,
unsigned start,
unsigned count,
void *output_buffer);
struct draw_vs_varient {
struct draw_vs_varient_key key;
struct draw_vertex_shader *vs;
void (*set_input)( struct draw_vs_varient *,
unsigned i,
const void *ptr,
unsigned stride );
void (*set_constants)( struct draw_vs_varient *,
const float (*constants)[4] );
void (*set_viewport)( struct draw_vs_varient *,
const struct pipe_viewport_state * );
void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader,
unsigned start,
unsigned count,
void *output_buffer );
void (PIPE_CDECL *run_elts)( struct draw_vs_varient *shader,
const unsigned *elts,
unsigned count,
void *output_buffer );
void (*destroy)( struct draw_vs_varient * );
};
/**
* Private version of the compiled vertex_shader
*/
struct draw_vertex_shader {
struct draw_context *draw;
/* This member will disappear shortly:
*/
@ -49,6 +123,14 @@ struct draw_vertex_shader {
struct tgsi_shader_info info;
/*
*/
struct draw_vs_varient *varient[16];
unsigned nr_varients;
struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader,
const struct draw_vs_varient_key *key );
void (*prepare)( struct draw_vertex_shader *shader,
struct draw_context *draw );
@ -68,6 +150,15 @@ struct draw_vertex_shader {
};
struct draw_vs_varient *
draw_vs_lookup_varient( struct draw_vertex_shader *base,
const struct draw_vs_varient_key *key );
/********************************************************************************
* Internal functions:
*/
struct draw_vertex_shader *
draw_create_vs_exec(struct draw_context *draw,
const struct pipe_shader_state *templ);
@ -81,7 +172,52 @@ draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ);
struct draw_vs_varient_key;
struct draw_vertex_shader;
struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
const struct draw_vs_varient_key *key );
/********************************************************************************
* Helpers for vs implementations that don't do their own fetch/emit varients.
* Means these can be shared between shaders.
*/
struct translate;
struct translate_key;
struct translate *draw_vs_get_fetch( struct draw_context *draw,
struct translate_key *key );
struct translate *draw_vs_get_emit( struct draw_context *draw,
struct translate_key *key );
struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
const struct draw_vs_varient_key *key );
static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key )
{
return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_varient_element);
}
static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a,
const struct draw_vs_varient_key *b )
{
int keysize = draw_vs_varient_keysize(a);
return memcmp(a, b, keysize);
}
#define MAX_TGSI_VERTICES 4
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,222 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/* Authors: Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef DRAW_VS_AOS_H
#define DRAW_VS_AOS_H
struct tgsi_token;
struct x86_function;
#include "pipe/p_state.h"
#include "rtasm/rtasm_x86sse.h"
#define X 0
#define Y 1
#define Z 2
#define W 3
#define MAX_INPUTS PIPE_MAX_ATTRIBS
#define MAX_OUTPUTS PIPE_MAX_ATTRIBS
#define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */
#define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */
#define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */
#define MAX_INTERNALS 8
#define AOS_FILE_INTERNAL TGSI_FILE_COUNT
#define FPU_RND_NEG 1
#define FPU_RND_NEAREST 2
struct aos_machine;
typedef void PIPE_CDECL (*lit_func)( struct aos_machine *,
float *result,
const float *in,
unsigned count );
struct shine_tab {
float exponent;
float values[258];
unsigned last_used;
};
struct lit_info {
lit_func func;
struct shine_tab *shine_tab;
};
#define MAX_SHINE_TAB 4
#define MAX_LIT_INFO 16
/* This is the temporary storage used by all the aos_sse vs varients.
* Create one per context and reuse by passing a pointer in at
* vs_varient creation??
*/
struct aos_machine {
float input [MAX_INPUTS ][4];
float output [MAX_OUTPUTS ][4];
float temp [MAX_TEMPS ][4];
float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */
float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */
float internal [MAX_INTERNALS ][4];
float scale[4]; /* viewport */
float translate[4]; /* viewport */
float tmp[2][4]; /* scratch space for LIT */
struct shine_tab shine_tab[MAX_SHINE_TAB];
struct lit_info lit_info[MAX_LIT_INFO];
unsigned now;
ushort fpu_rnd_nearest;
ushort fpu_rnd_neg_inf;
ushort fpu_restore;
ushort fpucntl; /* one of FPU_* above */
struct {
const void *input_ptr;
unsigned input_stride;
unsigned output_offset;
} attrib[PIPE_MAX_ATTRIBS];
};
struct aos_compilation {
struct x86_function *func;
struct draw_vs_varient_aos_sse *vaos;
unsigned insn_counter;
unsigned num_immediates;
unsigned count;
unsigned lit_count;
struct {
unsigned idx:16;
unsigned file:8;
unsigned dirty:8;
unsigned last_used;
} xmm[8];
boolean input_fetched[PIPE_MAX_ATTRIBS];
unsigned output_last_write[PIPE_MAX_ATTRIBS];
boolean have_sse2;
boolean error;
short fpucntl;
/* these are actually known values, but putting them in a struct
* like this is helpful to keep them in sync across the file.
*/
struct x86_reg tmp_EAX;
struct x86_reg idx_EBX; /* either start+i or &elt[i] */
struct x86_reg outbuf_ECX;
struct x86_reg machine_EDX;
struct x86_reg count_ESI; /* decrements to zero */
};
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx );
void aos_adopt_xmm_reg( struct aos_compilation *cp,
struct x86_reg reg,
unsigned file,
unsigned idx,
unsigned dirty );
struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
unsigned file,
unsigned idx );
boolean aos_fetch_inputs( struct aos_compilation *cp,
boolean linear );
boolean aos_emit_outputs( struct aos_compilation *cp );
#define IMM_ONES 0 /* 1, 1,1,1 */
#define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */
#define IMM_IDENTITY 2 /* 0, 0,0,1 */
#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
#define IMM_255 4 /* 255, 255, 255, 255 */
#define IMM_NEGS 5 /* -1,-1,-1,-1 */
#define IMM_RSQ 6 /* -.5,1.5,_,_ */
#define IMM_PSIZE 7 /* not really an immediate - updated each run */
struct x86_reg aos_get_internal( struct aos_compilation *cp,
unsigned imm );
struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp,
unsigned imm );
#define ERROR(cp, msg) \
do { \
debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \
cp->error = 1; \
assert(0); \
} while (0)
struct draw_vs_varient_aos_sse {
struct draw_vs_varient base;
struct draw_context *draw;
#if 0
struct {
const void *ptr;
unsigned stride;
} attrib[PIPE_MAX_ATTRIBS];
#endif
struct aos_machine *machine; /* XXX: temporarily unshared */
vsv_run_linear_func gen_run_linear;
vsv_run_elts_func gen_run_elts;
struct x86_function func[2];
};
#endif

View File

@ -0,0 +1,326 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/util/tgsi_parse.h"
#include "tgsi/util/tgsi_util.h"
#include "tgsi/exec/tgsi_exec.h"
#include "draw_vs.h"
#include "draw_vs_aos.h"
#include "draw_vertex.h"
#include "rtasm/rtasm_x86sse.h"
#ifdef PIPE_ARCH_X86
/* Note - don't yet have to worry about interacting with the code in
* draw_vs_aos.c as there is no intermingling of generated code...
* That may have to change, we'll see.
*/
static void emit_load_R32G32B32A32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movups(cp->func, data, src_ptr);
}
static void emit_load_R32G32B32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
sse_movlps(cp->func, data, src_ptr);
}
static void emit_load_R32G32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
sse_movlps(cp->func, data, src_ptr);
}
static void emit_load_R32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, src_ptr);
sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
}
static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, src_ptr);
sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
sse2_cvtdq2ps(cp->func, data, data);
sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
}
static void get_src_ptr( struct x86_function *func,
struct x86_reg src,
struct x86_reg machine,
struct x86_reg elt,
unsigned a )
{
struct x86_reg input_ptr =
x86_make_disp(machine,
Offset(struct aos_machine, attrib[a].input_ptr));
struct x86_reg input_stride =
x86_make_disp(machine,
Offset(struct aos_machine, attrib[a].input_stride));
/* Calculate pointer to current attrib:
*/
x86_mov(func, src, input_stride);
x86_imul(func, src, elt);
x86_add(func, src, input_ptr);
}
/* Extended swizzles? Maybe later.
*/
static void emit_swizzle( struct aos_compilation *cp,
struct x86_reg dest,
struct x86_reg src,
unsigned shuffle )
{
sse_shufps(cp->func, dest, src, shuffle);
}
static boolean load_input( struct aos_compilation *cp,
unsigned idx,
boolean linear )
{
unsigned format = cp->vaos->base.key.element[idx].in.format;
struct x86_reg src = cp->tmp_EAX;
struct x86_reg dataXMM = aos_get_xmm_reg(cp);
/* Figure out source pointer address:
*/
get_src_ptr(cp->func,
src,
cp->machine_EDX,
linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
idx);
src = x86_deref(src);
aos_adopt_xmm_reg( cp,
dataXMM,
TGSI_FILE_INPUT,
idx,
TRUE );
switch (format) {
case PIPE_FORMAT_R32_FLOAT:
emit_load_R32(cp, dataXMM, src);
break;
case PIPE_FORMAT_R32G32_FLOAT:
emit_load_R32G32(cp, dataXMM, src);
break;
case PIPE_FORMAT_R32G32B32_FLOAT:
emit_load_R32G32B32(cp, dataXMM, src);
break;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
emit_load_R32G32B32A32(cp, dataXMM, src);
break;
case PIPE_FORMAT_B8G8R8A8_UNORM:
emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
break;
case PIPE_FORMAT_R8G8B8A8_UNORM:
emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
break;
default:
ERROR(cp, "unhandled input format");
return FALSE;
}
return TRUE;
}
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
{
unsigned i;
for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
if (!load_input( cp, i, linear ))
return FALSE;
cp->insn_counter++;
debug_printf("\n");
}
return TRUE;
}
static void emit_store_R32G32B32A32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_movups(cp->func, dst_ptr, dataXMM);
}
static void emit_store_R32G32B32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_movlps(cp->func, dst_ptr, dataXMM);
sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
}
static void emit_store_R32G32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_movlps(cp->func, dst_ptr, dataXMM);
}
static void emit_store_R32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_movss(cp->func, dst_ptr, dataXMM);
}
static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
sse2_packssdw(cp->func, dataXMM, dataXMM);
sse2_packuswb(cp->func, dataXMM, dataXMM);
sse_movss(cp->func, dst_ptr, dataXMM);
}
static boolean emit_output( struct aos_compilation *cp,
struct x86_reg ptr,
struct x86_reg dataXMM,
unsigned format )
{
switch (format) {
case EMIT_1F:
case EMIT_1F_PSIZE:
emit_store_R32(cp, ptr, dataXMM);
break;
case EMIT_2F:
emit_store_R32G32(cp, ptr, dataXMM);
break;
case EMIT_3F:
emit_store_R32G32B32(cp, ptr, dataXMM);
break;
case EMIT_4F:
emit_store_R32G32B32A32(cp, ptr, dataXMM);
break;
case EMIT_4UB:
if (1) {
emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
}
else {
emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
}
break;
default:
ERROR(cp, "unhandled output format");
return FALSE;
}
return TRUE;
}
boolean aos_emit_outputs( struct aos_compilation *cp )
{
unsigned i;
for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) {
unsigned format = cp->vaos->base.key.element[i].out.format;
unsigned offset = cp->vaos->base.key.element[i].out.offset;
unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
struct x86_reg data;
if (format == EMIT_1F_PSIZE) {
data = aos_get_internal_xmm( cp, IMM_PSIZE );
}
else {
data = aos_get_shader_reg( cp,
TGSI_FILE_OUTPUT,
vs_output );
}
if (data.file != file_XMM) {
struct x86_reg tmp = aos_get_xmm_reg( cp );
sse_movups(cp->func, tmp, data);
data = tmp;
}
if (!emit_output( cp,
x86_make_disp( cp->outbuf_ECX, offset ),
data,
format ))
return FALSE;
aos_release_xmm_reg( cp, data.idx );
cp->insn_counter++;
debug_printf("\n");
}
return TRUE;
}
#endif

View File

@ -179,10 +179,12 @@ draw_create_vs_exec(struct draw_context *draw,
tgsi_scan_shader(state->tokens, &vs->base.info);
vs->base.draw = draw;
vs->base.prepare = vs_exec_prepare;
vs->base.run_linear = vs_exec_run_linear;
vs->base.delete = vs_exec_delete;
vs->machine = &draw->machine;
vs->base.create_varient = draw_vs_varient_generic;
vs->machine = &draw->vs.machine;
return &vs->base;
}

View File

@ -114,7 +114,9 @@ draw_create_vs_llvm(struct draw_context *draw,
tgsi_scan_shader(vs->base.state.tokens, &vs->base.info);
vs->base.draw = draw;
vs->base.prepare = vs_llvm_prepare;
vs->base.create_varient = draw_vs_varient_generic;
vs->base.run_linear = vs_llvm_run_linear;
vs->base.delete = vs_llvm_delete;
vs->machine = &draw->machine;

View File

@ -49,9 +49,7 @@
#include "tgsi/util/tgsi_parse.h"
#define SSE_MAX_VERTICES 4
#define SSE_SWIZZLES 1
#if SSE_SWIZZLES
typedef void (XSTDCALL *codegen_function) (
const struct tgsi_exec_vector *input, /* 1 */
struct tgsi_exec_vector *output, /* 2 */
@ -64,14 +62,6 @@ typedef void (XSTDCALL *codegen_function) (
float (*aos_output)[4], /* 9 */
uint num_outputs, /* 10 */
uint output_stride ); /* 11 */
#else
typedef void (XSTDCALL *codegen_function) (
const struct tgsi_exec_vector *input,
struct tgsi_exec_vector *output,
float (*constant)[4],
struct tgsi_exec_vector *temporary,
float (*immediates)[4] );
#endif
struct draw_sse_vertex_shader {
struct draw_vertex_shader base;
@ -113,7 +103,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
#if SSE_SWIZZLES
/* run compiled shader
*/
shader->func(machine->Inputs,
@ -130,43 +119,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
output = (float (*)[4])((char *)output + output_stride * max_vertices);
#else
unsigned int j, slot;
/* Swizzle inputs.
*/
for (j = 0; j < max_vertices; j++) {
for (slot = 0; slot < base->info.num_inputs; slot++) {
machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
}
input = (const float (*)[4])((const char *)input + input_stride);
}
/* run compiled shader
*/
shader->func(machine->Inputs,
machine->Outputs,
(float (*)[4])constants,
machine->Temps,
shader->immediates);
/* Unswizzle all output results.
*/
for (j = 0; j < max_vertices; j++) {
for (slot = 0; slot < base->info.num_outputs; slot++) {
output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
}
output = (float (*)[4])((char *)output + output_stride);
}
#endif
}
}
@ -205,15 +157,18 @@ draw_create_vs_sse(struct draw_context *draw,
tgsi_scan_shader(templ->tokens, &vs->base.info);
vs->base.draw = draw;
vs->base.create_varient = draw_vs_varient_aos_sse;
// vs->base.create_varient = draw_vs_varient_generic;
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
vs->machine = &draw->machine;
vs->machine = &draw->vs.machine;
x86_init_func( &vs->sse2_program );
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
&vs->sse2_program, vs->immediates, SSE_SWIZZLES ))
&vs->sse2_program, vs->immediates, TRUE ))
goto fail;
vs->func = (codegen_function) x86_get_func( &vs->sse2_program );

View File

@ -0,0 +1,326 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#include "pipe/p_util.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
#include "draw/draw_vs.h"
#include "translate/translate.h"
#include "translate/translate_cache.h"
/* A first pass at incorporating vertex fetch/emit functionality into
*/
struct draw_vs_varient_generic {
struct draw_vs_varient base;
struct pipe_viewport_state viewport;
struct draw_vertex_shader *shader;
struct draw_context *draw;
/* Basic plan is to run these two translate functions before/after
* the vertex shader's existing run_linear() routine to simulate
* the inclusion of this functionality into the shader...
*
* Next will look at actually including it.
*/
struct translate *fetch;
struct translate *emit;
const float (*constants)[4];
};
static void vsvg_set_constants( struct draw_vs_varient *varient,
const float (*constants)[4] )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
vsvg->constants = constants;
}
static void vsvg_set_input( struct draw_vs_varient *varient,
unsigned buffer,
const void *ptr,
unsigned stride )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
vsvg->fetch->set_buffer(vsvg->fetch,
buffer,
ptr,
stride);
}
/* Mainly for debug at this stage:
*/
static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg,
unsigned count,
void *output_buffer )
{
char *ptr = (char *)output_buffer;
const float *scale = vsvg->viewport.scale;
const float *trans = vsvg->viewport.translate;
unsigned stride = vsvg->base.key.output_stride;
unsigned j;
for (j = 0; j < count; j++, ptr += stride) {
float *data = (float *)ptr;
float w = 1.0f / data[3];
data[0] = data[0] * w * scale[0] + trans[0];
data[1] = data[1] * w * scale[1] + trans[1];
data[2] = data[2] * w * scale[2] + trans[2];
data[3] = w;
}
}
static void do_viewport( struct draw_vs_varient_generic *vsvg,
unsigned count,
void *output_buffer )
{
char *ptr = (char *)output_buffer;
const float *scale = vsvg->viewport.scale;
const float *trans = vsvg->viewport.translate;
unsigned stride = vsvg->base.key.output_stride;
unsigned j;
for (j = 0; j < count; j++, ptr += stride) {
float *data = (float *)ptr;
data[0] = data[0] * scale[0] + trans[0];
data[1] = data[1] * scale[1] + trans[1];
data[2] = data[2] * scale[2] + trans[2];
}
}
static void vsvg_run_elts( struct draw_vs_varient *varient,
const unsigned *elts,
unsigned count,
void *output_buffer)
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
/* Want to do this in small batches for cache locality?
*/
vsvg->fetch->run_elts( vsvg->fetch,
elts,
count,
output_buffer );
//if (!vsvg->base.vs->is_passthrough)
{
vsvg->base.vs->run_linear( vsvg->base.vs,
output_buffer,
output_buffer,
vsvg->constants,
count,
vsvg->base.key.output_stride,
vsvg->base.key.output_stride);
if (vsvg->base.key.clip) {
/* not really handling clipping, just do the rhw so we can
* see the results...
*/
do_rhw_viewport( vsvg,
count,
output_buffer );
}
else if (vsvg->base.key.viewport) {
do_viewport( vsvg,
count,
output_buffer );
}
//if (!vsvg->already_in_emit_format)
vsvg->emit->set_buffer( vsvg->emit,
0,
output_buffer,
vsvg->base.key.output_stride );
vsvg->emit->run( vsvg->emit,
0, count,
output_buffer );
}
}
static void vsvg_run_linear( struct draw_vs_varient *varient,
unsigned start,
unsigned count,
void *output_buffer )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
//debug_printf("%s %d %d\n", __FUNCTION__, start, count);
vsvg->fetch->run( vsvg->fetch,
start,
count,
output_buffer );
//if (!vsvg->base.vs->is_passthrough)
{
vsvg->base.vs->run_linear( vsvg->base.vs,
output_buffer,
output_buffer,
vsvg->constants,
count,
vsvg->base.key.output_stride,
vsvg->base.key.output_stride);
if (vsvg->base.key.clip) {
/* not really handling clipping, just do the rhw so we can
* see the results...
*/
do_rhw_viewport( vsvg,
count,
output_buffer );
}
else if (vsvg->base.key.viewport) {
do_viewport( vsvg,
count,
output_buffer );
}
//if (!vsvg->already_in_emit_format)
vsvg->emit->set_buffer( vsvg->emit,
0,
output_buffer,
vsvg->base.key.output_stride );
vsvg->emit->set_buffer( vsvg->emit,
1,
&vsvg->draw->rasterizer->point_size,
0);
vsvg->emit->run( vsvg->emit,
0, count,
output_buffer );
}
}
static void vsvg_set_viewport( struct draw_vs_varient *varient,
const struct pipe_viewport_state *viewport )
{
struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
vsvg->viewport = *viewport;
}
static void vsvg_destroy( struct draw_vs_varient *varient )
{
FREE(varient);
}
struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
const struct draw_vs_varient_key *key )
{
unsigned i;
struct translate_key fetch, emit;
struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic );
if (vsvg == NULL)
return NULL;
vsvg->base.key = *key;
vsvg->base.vs = vs;
vsvg->base.set_input = vsvg_set_input;
vsvg->base.set_constants = vsvg_set_constants;
vsvg->base.set_viewport = vsvg_set_viewport;
vsvg->base.run_elts = vsvg_run_elts;
vsvg->base.run_linear = vsvg_run_linear;
vsvg->base.destroy = vsvg_destroy;
/* Build free-standing fetch and emit functions:
*/
fetch.nr_elements = key->nr_inputs;
fetch.output_stride = 0;
for (i = 0; i < key->nr_inputs; i++) {
fetch.element[i].input_format = key->element[i].in.format;
fetch.element[i].input_buffer = key->element[i].in.buffer;
fetch.element[i].input_offset = key->element[i].in.offset;
fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
fetch.element[i].output_offset = fetch.output_stride;
fetch.output_stride += 4 * sizeof(float);
}
emit.nr_elements = key->nr_outputs;
emit.output_stride = key->output_stride;
for (i = 0; i < key->nr_outputs; i++) {
if (key->element[i].out.format != EMIT_1F_PSIZE)
{
emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
emit.element[i].input_buffer = 0;
emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
emit.element[i].output_offset = key->element[i].out.offset;
}
else {
emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
emit.element[i].input_buffer = 1;
emit.element[i].input_offset = 0;
emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
emit.element[i].output_offset = key->element[i].out.offset;
}
}
vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch );
vsvg->emit = draw_vs_get_emit( vs->draw, &emit );
return &vsvg->base;
}

View File

@ -38,11 +38,8 @@
#define DUMP_SSE 0
#if DUMP_SSE
static void
_print_reg(
struct x86_reg reg )
void x86_print_reg( struct x86_reg reg )
{
if (reg.mod != mod_REG)
debug_printf( "[" );
@ -79,6 +76,7 @@ _print_reg(
debug_printf( "]" );
}
#if DUMP_SSE
#define DUMP_START() debug_printf( "\n" )
#define DUMP_END() debug_printf( "\n" )
@ -89,7 +87,7 @@ _print_reg(
foo++; \
if (*foo) \
foo++; \
debug_printf( "\n% 15s ", foo ); \
debug_printf( "\n% 4x% 15s ", p->csr - p->store, foo ); \
} while (0)
#define DUMP_I( I ) do { \
@ -99,27 +97,27 @@ _print_reg(
#define DUMP_R( R0 ) do { \
DUMP(); \
_print_reg( R0 ); \
x86_print_reg( R0 ); \
} while( 0 )
#define DUMP_RR( R0, R1 ) do { \
DUMP(); \
_print_reg( R0 ); \
x86_print_reg( R0 ); \
debug_printf( ", " ); \
_print_reg( R1 ); \
x86_print_reg( R1 ); \
} while( 0 )
#define DUMP_RI( R0, I ) do { \
DUMP(); \
_print_reg( R0 ); \
x86_print_reg( R0 ); \
debug_printf( ", %u", I ); \
} while( 0 )
#define DUMP_RRI( R0, R1, I ) do { \
DUMP(); \
_print_reg( R0 ); \
x86_print_reg( R0 ); \
debug_printf( ", " ); \
_print_reg( R1 ); \
x86_print_reg( R1 ); \
debug_printf( ", %u", I ); \
} while( 0 )
@ -222,6 +220,8 @@ static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1
/* Build a modRM byte + possible displacement. No treatment of SIB
* indexing. BZZT - no way to encode an absolute address.
*
* This is the "/r" field in the x86 manuals...
*/
static void emit_modrm( struct x86_function *p,
struct x86_reg reg,
@ -260,7 +260,8 @@ static void emit_modrm( struct x86_function *p,
}
}
/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes.
*/
static void emit_modrm_noreg( struct x86_function *p,
unsigned op,
struct x86_reg regmem )
@ -369,8 +370,7 @@ void x86_jcc( struct x86_function *p,
DUMP_I(cc);
if (offset < 0) {
int amt = p->csr - p->store;
assert(amt > -offset);
assert(p->csr - p->store > -offset);
}
if (offset <= 127 && offset >= -128) {
@ -447,6 +447,16 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
emit_1i(p, imm);
}
void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm )
{
DUMP_RI( dst, imm );
assert(dst.mod == mod_REG);
emit_1ub(p, 0x80);
emit_modrm_noreg(p, 0, dst);
emit_1ub(p, imm);
}
void x86_push( struct x86_function *p,
struct x86_reg reg )
{
@ -463,6 +473,17 @@ void x86_push( struct x86_function *p,
p->stack_offset += 4;
}
void x86_push_imm32( struct x86_function *p,
int imm32 )
{
DUMP_I( imm32 );
emit_1ub(p, 0x68);
emit_1i(p, imm32);
p->stack_offset += 4;
}
void x86_pop( struct x86_function *p,
struct x86_reg reg )
{
@ -990,6 +1011,24 @@ void sse2_movd( struct x86_function *p,
/***********************************************************************
* x87 instructions
*/
static void note_x87_pop( struct x86_function *p )
{
p->x87_stack--;
assert(p->x87_stack >= 0);
}
static void note_x87_push( struct x86_function *p )
{
p->x87_stack++;
assert(p->x87_stack <= 7);
}
void x87_assert_stack_empty( struct x86_function *p )
{
assert (p->x87_stack == 0);
}
void x87_fist( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
@ -1002,6 +1041,7 @@ void x87_fistp( struct x86_function *p, struct x86_reg dst )
DUMP_R( dst );
emit_1ub(p, 0xdb);
emit_modrm_noreg(p, 3, dst);
note_x87_pop(p);
}
void x87_fild( struct x86_function *p, struct x86_reg arg )
@ -1009,12 +1049,14 @@ void x87_fild( struct x86_function *p, struct x86_reg arg )
DUMP_R( arg );
emit_1ub(p, 0xdf);
emit_modrm_noreg(p, 0, arg);
note_x87_push(p);
}
void x87_fldz( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xee);
note_x87_push(p);
}
@ -1031,18 +1073,21 @@ void x87_fld1( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xe8);
note_x87_push(p);
}
void x87_fldl2e( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xea);
note_x87_push(p);
}
void x87_fldln2( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xed);
note_x87_push(p);
}
void x87_fwait( struct x86_function *p )
@ -1063,6 +1108,49 @@ void x87_fclex( struct x86_function *p )
x87_fnclex(p);
}
void x87_fcmovb( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xda, 0xc0+arg.idx);
}
void x87_fcmove( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xda, 0xc8+arg.idx);
}
void x87_fcmovbe( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xda, 0xd0+arg.idx);
}
void x87_fcmovnb( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdb, 0xc0+arg.idx);
}
void x87_fcmovne( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdb, 0xc8+arg.idx);
}
void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdb, 0xd0+arg.idx);
}
static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
unsigned char dst0ub0,
@ -1150,6 +1238,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc8+dst.idx);
note_x87_pop(p);
}
void x87_fsubp( struct x86_function *p, struct x86_reg dst )
@ -1158,6 +1247,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe8+dst.idx);
note_x87_pop(p);
}
void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
@ -1166,6 +1256,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe0+dst.idx);
note_x87_pop(p);
}
void x87_faddp( struct x86_function *p, struct x86_reg dst )
@ -1174,6 +1265,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc0+dst.idx);
note_x87_pop(p);
}
void x87_fdivp( struct x86_function *p, struct x86_reg dst )
@ -1182,6 +1274,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf8+dst.idx);
note_x87_pop(p);
}
void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
@ -1190,6 +1283,13 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf0+dst.idx);
note_x87_pop(p);
}
void x87_ftst( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xe4);
}
void x87_fucom( struct x86_function *p, struct x86_reg arg )
@ -1204,12 +1304,15 @@ void x87_fucomp( struct x86_function *p, struct x86_reg arg )
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdd, 0xe8+arg.idx);
note_x87_pop(p);
}
void x87_fucompp( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xda, 0xe9);
note_x87_pop(p); /* pop twice */
note_x87_pop(p); /* pop twice */
}
void x87_fxch( struct x86_function *p, struct x86_reg arg )
@ -1291,6 +1394,7 @@ void x87_fyl2x( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xf1);
note_x87_pop(p);
}
/* st1 = st1 * log2(st0 + 1.0);
@ -1302,6 +1406,7 @@ void x87_fyl2xp1( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xf9);
note_x87_pop(p);
}
@ -1314,6 +1419,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg )
emit_1ub(p, 0xd9);
emit_modrm_noreg(p, 0, arg);
}
note_x87_push(p);
}
void x87_fst( struct x86_function *p, struct x86_reg dst )
@ -1336,8 +1442,15 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst )
emit_1ub(p, 0xd9);
emit_modrm_noreg(p, 3, dst);
}
note_x87_pop(p);
}
void x87_fpop( struct x86_function *p )
{
x87_fstp( p, x86_make_reg( file_x87, 0 ));
}
void x87_fcom( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
@ -1349,6 +1462,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst )
}
}
void x87_fcomp( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
@ -1358,6 +1472,20 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst )
emit_1ub(p, 0xd8);
emit_modrm_noreg(p, 3, dst);
}
note_x87_pop(p);
}
void x87_fcomi( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
emit_2ub(p, 0xdb, 0xf0+arg.idx);
}
void x87_fcomip( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
emit_2ub(p, 0xdb, 0xf0+arg.idx);
note_x87_pop(p);
}
@ -1376,6 +1504,17 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
}
void x87_fnstcw( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
assert(dst.file == file_REG32);
emit_1ub(p, 0x9b); /* WAIT -- needed? */
emit_1ub(p, 0xd9);
emit_modrm_noreg(p, 7, dst);
}
/***********************************************************************
@ -1444,6 +1583,21 @@ void mmx_movq( struct x86_function *p,
*/
void x86_cdecl_caller_push_regs( struct x86_function *p )
{
x86_push(p, x86_make_reg(file_REG32, reg_AX));
x86_push(p, x86_make_reg(file_REG32, reg_CX));
x86_push(p, x86_make_reg(file_REG32, reg_DX));
}
void x86_cdecl_caller_pop_regs( struct x86_function *p )
{
x86_pop(p, x86_make_reg(file_REG32, reg_DX));
x86_pop(p, x86_make_reg(file_REG32, reg_CX));
x86_pop(p, x86_make_reg(file_REG32, reg_AX));
}
/* Retreive a reference to one of the function arguments, taking into
* account any push/pop activity:
*/

View File

@ -43,10 +43,12 @@ struct x86_function {
unsigned size;
unsigned char *store;
unsigned char *csr;
unsigned stack_offset;
int need_emms;
unsigned stack_offset:16;
unsigned need_emms:8;
int x87_stack:8;
unsigned char error_overflow[4];
const char *fn;
};
enum x86_reg_file {
@ -109,6 +111,9 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size );
void x86_release_func( struct x86_function *p );
void (*x86_get_func( struct x86_function *p ))( void );
/* Debugging:
*/
void x86_print_reg( struct x86_reg reg );
/* Create and manipulate registers and regmem values:
@ -152,6 +157,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg);
* I load the immediate into general purpose register and use it.
*/
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm );
/* Macro for sse_shufps() and sse2_pshufd():
@ -222,6 +228,7 @@ void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_pop( struct x86_function *p, struct x86_reg reg );
void x86_push( struct x86_function *p, struct x86_reg reg );
void x86_push_imm32( struct x86_function *p, int imm );
void x86_ret( struct x86_function *p );
void x86_retw( struct x86_function *p, unsigned short imm );
void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@ -229,13 +236,27 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
void x86_cdecl_caller_push_regs( struct x86_function *p );
void x86_cdecl_caller_pop_regs( struct x86_function *p );
void x87_assert_stack_empty( struct x86_function *p );
void x87_f2xm1( struct x86_function *p );
void x87_fabs( struct x86_function *p );
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_faddp( struct x86_function *p, struct x86_reg dst );
void x87_fchs( struct x86_function *p );
void x87_fclex( struct x86_function *p );
void x87_fcmovb( struct x86_function *p, struct x86_reg src );
void x87_fcmovbe( struct x86_function *p, struct x86_reg src );
void x87_fcmove( struct x86_function *p, struct x86_reg src );
void x87_fcmovnb( struct x86_function *p, struct x86_reg src );
void x87_fcmovnbe( struct x86_function *p, struct x86_reg src );
void x87_fcmovne( struct x86_function *p, struct x86_reg src );
void x87_fcom( struct x86_function *p, struct x86_reg dst );
void x87_fcomi( struct x86_function *p, struct x86_reg dst );
void x87_fcomip( struct x86_function *p, struct x86_reg dst );
void x87_fcomp( struct x86_function *p, struct x86_reg dst );
void x87_fcos( struct x86_function *p );
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
@ -255,6 +276,7 @@ void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fmulp( struct x86_function *p, struct x86_reg dst );
void x87_fnclex( struct x86_function *p );
void x87_fprndint( struct x86_function *p );
void x87_fpop( struct x86_function *p );
void x87_fscale( struct x86_function *p );
void x87_fsin( struct x86_function *p );
void x87_fsincos( struct x86_function *p );
@ -265,11 +287,13 @@ void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fsubp( struct x86_function *p, struct x86_reg dst );
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
void x87_ftst( struct x86_function *p );
void x87_fxch( struct x86_function *p, struct x86_reg dst );
void x87_fxtract( struct x86_function *p );
void x87_fyl2x( struct x86_function *p );
void x87_fyl2xp1( struct x86_function *p );
void x87_fwait( struct x86_function *p );
void x87_fnstcw( struct x86_function *p, struct x86_reg dst );
void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
void x87_fucompp( struct x86_function *p );
void x87_fucomp( struct x86_function *p, struct x86_reg arg );

View File

@ -539,9 +539,9 @@ static const char *TGSI_MODULATES[] =
"MODULATE_EIGHTH"
};
static void
dump_declaration_short(
struct tgsi_full_declaration *decl )
void
tgsi_dump_declaration(
const struct tgsi_full_declaration *decl )
{
TXT( "\nDCL " );
ENM( decl->Declaration.File, TGSI_FILES_SHORT );
@ -672,9 +672,9 @@ dump_declaration_verbose(
}
}
static void
dump_immediate_short(
struct tgsi_full_immediate *imm )
void
tgsi_dump_immediate(
const struct tgsi_full_immediate *imm )
{
unsigned i;
@ -727,9 +727,9 @@ dump_immediate_verbose(
}
}
static void
dump_instruction_short(
struct tgsi_full_instruction *inst,
void
tgsi_dump_instruction(
const struct tgsi_full_instruction *inst,
unsigned instno )
{
unsigned i;
@ -1281,17 +1281,17 @@ tgsi_dump(
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
dump_declaration_short(
tgsi_dump_declaration(
&parse.FullToken.FullDeclaration );
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
dump_immediate_short(
tgsi_dump_immediate(
&parse.FullToken.FullImmediate );
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
dump_instruction_short(
tgsi_dump_instruction(
&parse.FullToken.FullInstruction,
instno );
instno++;

View File

@ -14,6 +14,24 @@ tgsi_dump(
const struct tgsi_token *tokens,
unsigned flags );
struct tgsi_full_immediate;
struct tgsi_full_instruction;
struct tgsi_full_declaration;
void
tgsi_dump_immediate(
const struct tgsi_full_immediate *imm );
void
tgsi_dump_instruction(
const struct tgsi_full_instruction *inst,
unsigned instno );
void
tgsi_dump_declaration(
const struct tgsi_full_declaration *decl );
#if defined __cplusplus
}
#endif

View File

@ -71,15 +71,15 @@ struct translate {
const void *ptr,
unsigned stride );
void (*run_elts)( struct translate *,
const unsigned *elts,
unsigned count,
void *output_buffer);
void (PIPE_CDECL *run_elts)( struct translate *,
const unsigned *elts,
unsigned count,
void *output_buffer);
void (*run)( struct translate *,
unsigned start,
unsigned count,
void *output_buffer);
void (PIPE_CDECL *run)( struct translate *,
unsigned start,
unsigned count,
void *output_buffer);
};

View File

@ -541,10 +541,10 @@ static emit_func get_emit_func( enum pipe_format format )
/**
* Fetch vertex attributes for 'count' vertices.
*/
static void generic_run_elts( struct translate *translate,
const unsigned *elts,
unsigned count,
void *output_buffer )
static void PIPE_CDECL generic_run_elts( struct translate *translate,
const unsigned *elts,
unsigned count,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
@ -580,10 +580,10 @@ static void generic_run_elts( struct translate *translate,
static void generic_run( struct translate *translate,
unsigned start,
unsigned count,
void *output_buffer )
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
unsigned count,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;

View File

@ -46,22 +46,16 @@
#define W 3
#ifdef WIN32
#define RTASM __cdecl
#else
#define RTASM
#endif
typedef void (RTASM *run_func)( struct translate *translate,
unsigned start,
unsigned count,
void *output_buffer );
typedef void (RTASM *run_elts_func)( struct translate *translate,
const unsigned *elts,
typedef void (PIPE_CDECL *run_func)( struct translate *translate,
unsigned start,
unsigned count,
void *output_buffer );
typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
const unsigned *elts,
unsigned count,
void *output_buffer );
struct translate_sse {
@ -473,13 +467,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
x86_lea(p->func, vertexECX, x86_make_disp(vertexECX, p->translate.key.output_stride));
/* Incr index
*/ /* Emit code for each of the attributes. Currently routes
* everything through SSE registers, even when it might be more
* efficient to stick with regular old x86. No optimization or
* other tricks - enough new ground to cover here just getting
* things working.
*/
*/
if (linear) {
x86_inc(p->func, idxEBX);
}

View File

@ -88,7 +88,8 @@ static void softpipe_destroy( struct pipe_context *pipe )
struct pipe_winsys *ws = pipe->winsys;
uint i;
draw_destroy( softpipe->draw );
if (softpipe->draw)
draw_destroy( softpipe->draw );
softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple );
softpipe->quad.earlyz->destroy( softpipe->quad.earlyz );
@ -216,8 +217,12 @@ softpipe_create( struct pipe_screen *screen,
* Create drawing context and plug our rendering stage into it.
*/
softpipe->draw = draw_create();
assert(softpipe->draw);
if (!softpipe->draw)
goto fail;
softpipe->setup = sp_draw_render_stage(softpipe);
if (!softpipe->setup)
goto fail;
if (GETENV( "SP_NO_RAST" ) != NULL)
softpipe->no_rast = TRUE;
@ -241,4 +246,8 @@ softpipe_create( struct pipe_screen *screen,
sp_init_surface_functions(softpipe);
return &softpipe->pipe;
fail:
softpipe_destroy(&softpipe->pipe);
return NULL;
}

View File

@ -64,16 +64,17 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
}
typedef const float (*cptrf4)[4];
static void
do_tri(struct draw_stage *stage, struct prim_header *prim)
{
struct setup_stage *setup = setup_stage( stage );
setup_tri( setup->setup,
prim->v[0]->data,
prim->v[1]->data,
prim->v[2]->data );
(cptrf4)prim->v[0]->data,
(cptrf4)prim->v[1]->data,
(cptrf4)prim->v[2]->data );
}
static void
@ -82,8 +83,8 @@ do_line(struct draw_stage *stage, struct prim_header *prim)
struct setup_stage *setup = setup_stage( stage );
setup_line( setup->setup,
prim->v[0]->data,
prim->v[1]->data );
(cptrf4)prim->v[0]->data,
(cptrf4)prim->v[1]->data );
}
static void
@ -92,7 +93,7 @@ do_point(struct draw_stage *stage, struct prim_header *prim)
struct setup_stage *setup = setup_stage( stage );
setup_point( setup->setup,
prim->v[0]->data );
(cptrf4)prim->v[0]->data );
}

View File

@ -116,30 +116,28 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
if (prim == PIPE_PRIM_TRIANGLES ||
prim == PIPE_PRIM_LINES ||
prim == PIPE_PRIM_POINTS) {
cvbr->prim = prim;
return TRUE;
}
else {
return FALSE;
}
cvbr->prim = prim;
return TRUE;
}
static INLINE cptrf4 get_vert( const void *vertex_buffer,
int index,
int stride )
{
return (cptrf4)((char *)vertex_buffer + index * stride);
}
static void
sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices)
sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float);
unsigned i, j;
void *vertex_buffer = cvbr->vertex_buffer;
cptrf4 v[3];
unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
unsigned i;
const void *vertex_buffer = cvbr->vertex_buffer;
/* XXX: break this dependency - make setup_context live under
* softpipe, rename the old "setup" draw stage to something else.
@ -149,40 +147,98 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices)
switch (cvbr->prim) {
case PIPE_PRIM_TRIANGLES:
for (i = 0; i < nr_indices; i += 3) {
for (j = 0; j < 3; j++)
v[j] = (cptrf4)((char *)vertex_buffer +
indices[i+j] * vertex_size);
setup_tri( setup_ctx,
v[0],
v[1],
v[2]);
case PIPE_PRIM_POINTS:
for (i = 0; i < nr; i++) {
setup_point( setup_ctx,
get_vert(vertex_buffer, indices[i-0], stride) );
}
break;
case PIPE_PRIM_LINES:
for (i = 0; i < nr_indices; i += 2) {
for (j = 0; j < 2; j++)
v[j] = (cptrf4)((char *)vertex_buffer +
indices[i+j] * vertex_size);
for (i = 1; i < nr; i += 2) {
setup_line( setup_ctx,
v[0],
v[1] );
get_vert(vertex_buffer, indices[i-1], stride),
get_vert(vertex_buffer, indices[i-0], stride) );
}
break;
case PIPE_PRIM_POINTS:
for (i = 0; i < nr_indices; i++) {
v[0] = (cptrf4)((char *)vertex_buffer +
indices[i] * vertex_size);
setup_point( setup_ctx,
v[0] );
case PIPE_PRIM_LINE_STRIP:
for (i = 1; i < nr; i ++) {
setup_line( setup_ctx,
get_vert(vertex_buffer, indices[i-1], stride),
get_vert(vertex_buffer, indices[i-0], stride) );
}
break;
case PIPE_PRIM_LINE_LOOP:
for (i = 1; i < nr; i ++) {
setup_line( setup_ctx,
get_vert(vertex_buffer, indices[i-1], stride),
get_vert(vertex_buffer, indices[i-0], stride) );
}
if (nr) {
setup_line( setup_ctx,
get_vert(vertex_buffer, indices[nr-1], stride),
get_vert(vertex_buffer, indices[0], stride) );
}
break;
case PIPE_PRIM_TRIANGLES:
for (i = 2; i < nr; i += 3) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-2], stride),
get_vert(vertex_buffer, indices[i-1], stride),
get_vert(vertex_buffer, indices[i-0], stride));
}
break;
case PIPE_PRIM_TRIANGLE_STRIP:
for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i+(i&1)-2], stride),
get_vert(vertex_buffer, indices[i-(i&1)-1], stride),
get_vert(vertex_buffer, indices[i-0], stride));
}
break;
case PIPE_PRIM_TRIANGLE_FAN:
case PIPE_PRIM_POLYGON:
for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[0], stride),
get_vert(vertex_buffer, indices[i-1], stride),
get_vert(vertex_buffer, indices[i-0], stride));
}
break;
case PIPE_PRIM_QUADS:
for (i = 3; i < nr; i += 4) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-3], stride),
get_vert(vertex_buffer, indices[i-2], stride),
get_vert(vertex_buffer, indices[i-0], stride));
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-2], stride),
get_vert(vertex_buffer, indices[i-1], stride),
get_vert(vertex_buffer, indices[i-0], stride));
}
break;
case PIPE_PRIM_QUAD_STRIP:
for (i = 3; i < nr; i += 2) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-3], stride),
get_vert(vertex_buffer, indices[i-2], stride),
get_vert(vertex_buffer, indices[i-0], stride));
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-1], stride),
get_vert(vertex_buffer, indices[i-3], stride),
get_vert(vertex_buffer, indices[i-0], stride));
}
break;
default:
assert(0);
}
/* XXX: why are we calling this??? If we had to call something, it
@ -202,131 +258,107 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
struct draw_stage *setup = softpipe->setup;
const void *vertex_buffer = cvbr->vertex_buffer;
const unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float);
const void *vertex_buffer = NULL;
const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
unsigned i;
struct setup_context *setup_ctx = sp_draw_setup_context(setup);
cptrf4 v[3];
#define VERTEX(I) \
(cptrf4) ((char *) vertex_buffer + (I) * vertex_size)
vertex_buffer = (void *)get_vert(cvbr->vertex_buffer, start, stride);
switch (cvbr->prim) {
case PIPE_PRIM_POINTS:
for (i = 0; i < nr; i++) {
v[0] = VERTEX(i);
setup_point( setup_ctx, v[0] );
setup_point( setup_ctx,
get_vert(vertex_buffer, i-0, stride) );
}
break;
case PIPE_PRIM_LINES:
assert(nr % 2 == 0);
for (i = 0; i < nr; i += 2) {
v[0] = VERTEX(i);
v[1] = VERTEX(i + 1);
setup_line( setup_ctx, v[0], v[1] );
for (i = 1; i < nr; i += 2) {
setup_line( setup_ctx,
get_vert(vertex_buffer, i-1, stride),
get_vert(vertex_buffer, i-0, stride) );
}
break;
case PIPE_PRIM_LINE_STRIP:
for (i = 1; i < nr; i++) {
v[0] = VERTEX(i - 1);
v[1] = VERTEX(i);
setup_line( setup_ctx, v[0], v[1] );
for (i = 1; i < nr; i ++) {
setup_line( setup_ctx,
get_vert(vertex_buffer, i-1, stride),
get_vert(vertex_buffer, i-0, stride) );
}
break;
case PIPE_PRIM_LINE_LOOP:
for (i = 1; i < nr; i ++) {
setup_line( setup_ctx,
get_vert(vertex_buffer, i-1, stride),
get_vert(vertex_buffer, i-0, stride) );
}
if (nr) {
setup_line( setup_ctx,
get_vert(vertex_buffer, nr-1, stride),
get_vert(vertex_buffer, 0, stride) );
}
break;
case PIPE_PRIM_TRIANGLES:
assert(nr % 3 == 0);
for (i = 0; i < nr; i += 3) {
v[0] = VERTEX(i + 0);
v[1] = VERTEX(i + 1);
v[2] = VERTEX(i + 2);
for (i = 2; i < nr; i += 3) {
setup_tri( setup_ctx,
v[0],
v[1],
v[2] );
get_vert(vertex_buffer, i-2, stride),
get_vert(vertex_buffer, i-1, stride),
get_vert(vertex_buffer, i-0, stride));
}
break;
case PIPE_PRIM_TRIANGLE_STRIP:
assert(nr >= 3);
for (i = 2; i < nr; i++) {
v[0] = VERTEX(i - 2);
v[1] = VERTEX(i - 1);
v[2] = VERTEX(i);
for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
v[0],
v[1],
v[2] );
get_vert(vertex_buffer, i+(i&1)-2, stride),
get_vert(vertex_buffer, i-(i&1)-1, stride),
get_vert(vertex_buffer, i-0, stride));
}
break;
case PIPE_PRIM_TRIANGLE_FAN:
assert(nr >= 3);
for (i = 2; i < nr; i++) {
v[0] = VERTEX(0);
v[1] = VERTEX(i - 1);
v[2] = VERTEX(i);
case PIPE_PRIM_POLYGON:
for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
v[0],
v[1],
v[2] );
get_vert(vertex_buffer, 0, stride),
get_vert(vertex_buffer, i-1, stride),
get_vert(vertex_buffer, i-0, stride));
}
break;
case PIPE_PRIM_QUADS:
assert(nr % 4 == 0);
for (i = 0; i < nr; i += 4) {
v[0] = VERTEX(i + 0);
v[1] = VERTEX(i + 1);
v[2] = VERTEX(i + 2);
for (i = 3; i < nr; i += 4) {
setup_tri( setup_ctx,
v[0],
v[1],
v[2] );
get_vert(vertex_buffer, i-3, stride),
get_vert(vertex_buffer, i-2, stride),
get_vert(vertex_buffer, i-0, stride));
v[0] = VERTEX(i + 0);
v[1] = VERTEX(i + 2);
v[2] = VERTEX(i + 3);
setup_tri( setup_ctx,
v[0],
v[1],
v[2] );
get_vert(vertex_buffer, i-2, stride),
get_vert(vertex_buffer, i-1, stride),
get_vert(vertex_buffer, i-0, stride));
}
break;
case PIPE_PRIM_QUAD_STRIP:
assert(nr >= 4);
for (i = 2; i < nr; i += 2) {
v[0] = VERTEX(i - 2);
v[1] = VERTEX(i);
v[2] = VERTEX(i + 1);
for (i = 3; i < nr; i += 2) {
setup_tri( setup_ctx,
v[0],
v[1],
v[2] );
get_vert(vertex_buffer, i-3, stride),
get_vert(vertex_buffer, i-2, stride),
get_vert(vertex_buffer, i-0, stride));
v[0] = VERTEX(i - 2);
v[1] = VERTEX(i + 1);
v[2] = VERTEX(i - 1);
setup_tri( setup_ctx,
v[0],
v[1],
v[2] );
}
break;
case PIPE_PRIM_POLYGON:
/* draw as tri fan */
for (i = 2; i < nr; i++) {
v[0] = VERTEX(0);
v[1] = VERTEX(i - 1);
v[2] = VERTEX(i);
setup_tri( setup_ctx,
v[0],
v[1],
v[2] );
get_vert(vertex_buffer, i-1, stride),
get_vert(vertex_buffer, i-3, stride),
get_vert(vertex_buffer, i-0, stride));
}
break;
default:
/* XXX finish remaining prim types */
assert(0);
}
#undef VERTEX
}

View File

@ -119,6 +119,17 @@ typedef unsigned char boolean;
#endif
/* This should match linux gcc cdecl semantics everywhere, so that we
* just codegen one calling convention on all platforms.
*/
#ifdef WIN32
#define PIPE_CDECL __cdecl
#else
#define PIPE_CDECL
#endif
#if defined __GNUC__
#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) ))
#define ALIGN16_ASSIGN(NAME) NAME##___aligned
@ -131,12 +142,16 @@ typedef unsigned char boolean;
/** For calling code-gen'd functions */
/**
* For calling code-gen'd functions, phase out in favor of
* PIPE_CDECL, above, which really means cdecl on all platforms, not
* like the below...
*/
#if !defined(XSTDCALL)
#if defined(WIN32)
#define XSTDCALL __stdcall
#define XSTDCALL __stdcall /* phase this out */
#else
#define XSTDCALL
#define XSTDCALL /* XXX: NOTE! not STDCALL! */
#endif
#endif

View File

@ -797,8 +797,14 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
pipe = xmesa_create_i965simple(xmesa_get_pipe_winsys_aub(v));
}
if (pipe == NULL)
goto fail;
c->st = st_create_context(pipe, &v->mesa_visual,
share_list ? share_list->st : NULL);
if (c->st == NULL)
goto fail;
mesaCtx = c->st->ctx;
c->st->ctx->DriverCtx = c;
@ -818,6 +824,14 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
#endif
return c;
fail:
if (c->st)
st_destroy_context(c->st);
if (pipe)
pipe->destroy(pipe);
FREE(c);
return NULL;
}

View File

@ -53,7 +53,9 @@ struct state_key {
unsigned light_color_material:1;
unsigned light_color_material_mask:12;
unsigned light_material_mask:12;
unsigned material_shininess_is_zero:1;
unsigned need_eye_coords:1;
unsigned normalize:1;
unsigned rescale_normals:1;
unsigned fog_source_is_depth:1;
@ -154,6 +156,26 @@ tnl_get_per_vertex_fog(GLcontext *ctx)
#endif
}
static GLboolean check_active_shininess( GLcontext *ctx,
const struct state_key *key,
GLuint side )
{
GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side);
if (key->light_color_material_mask & bit)
return GL_TRUE;
if (key->light_material_mask & bit)
return GL_TRUE;
if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F)
return GL_TRUE;
return GL_FALSE;
}
static struct state_key *make_state_key( GLcontext *ctx )
{
@ -167,6 +189,8 @@ static struct state_key *make_state_key( GLcontext *ctx )
*/
assert(fp);
key->need_eye_coords = ctx->_NeedEyeCoords;
key->fragprog_inputs_read = fp->Base.InputsRead;
if (ctx->RenderMode == GL_FEEDBACK) {
@ -211,6 +235,17 @@ static struct state_key *make_state_key( GLcontext *ctx )
key->unit[i].light_attenuated = 1;
}
}
if (check_active_shininess(ctx, key, 0)) {
key->material_shininess_is_zero = 0;
}
else if (key->light_twoside &&
check_active_shininess(ctx, key, 1)) {
key->material_shininess_is_zero = 0;
}
else {
key->material_shininess_is_zero = 1;
}
}
if (ctx->Transform.Normalize)
@ -270,7 +305,7 @@ static struct state_key *make_state_key( GLcontext *ctx )
* generated program with line/function references for each
* instruction back into this file:
*/
#define DISASSEM (MESA_VERBOSE&VERBOSE_DISASSEM)
#define DISASSEM 1
/* Should be tunable by the driver - do we want to do matrix
* multiplications with DP4's or with MUL/MAD's? SSE works better
@ -309,8 +344,9 @@ struct tnl_program {
GLuint temp_reserved;
struct ureg eye_position;
struct ureg eye_position_z;
struct ureg eye_position_normalized;
struct ureg eye_normal;
struct ureg transformed_normal;
struct ureg identity;
GLuint materials;
@ -653,9 +689,9 @@ static void emit_normalize_vec3( struct tnl_program *p,
struct ureg src )
{
struct ureg tmp = get_temp(p);
emit_op2(p, OPCODE_DP3, tmp, 0, src, src);
emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
emit_op2(p, OPCODE_MUL, dest, 0, src, tmp);
emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
release_temp(p, tmp);
}
@ -693,6 +729,28 @@ static struct ureg get_eye_position( struct tnl_program *p )
}
static struct ureg get_eye_position_z( struct tnl_program *p )
{
if (!is_undef(p->eye_position))
return swizzle1(p->eye_position, Z);
if (is_undef(p->eye_position_z)) {
struct ureg pos = register_input( p, VERT_ATTRIB_POS );
struct ureg modelview[4];
p->eye_position_z = reserve_temp(p);
register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
0, modelview );
emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]);
}
return p->eye_position_z;
}
static struct ureg get_eye_position_normalized( struct tnl_program *p )
{
if (is_undef(p->eye_position_normalized)) {
@ -705,36 +763,52 @@ static struct ureg get_eye_position_normalized( struct tnl_program *p )
}
static struct ureg get_eye_normal( struct tnl_program *p )
static struct ureg get_transformed_normal( struct tnl_program *p )
{
if (is_undef(p->eye_normal)) {
if (is_undef(p->transformed_normal) &&
!p->state->need_eye_coords &&
!p->state->normalize &&
!(p->state->need_eye_coords == p->state->rescale_normals))
{
p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
}
else if (is_undef(p->transformed_normal))
{
struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
struct ureg mvinv[3];
struct ureg transformed_normal = reserve_temp(p);
register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
STATE_MATRIX_INVTRANS, mvinv );
if (p->state->need_eye_coords) {
register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
STATE_MATRIX_INVTRANS, mvinv );
p->eye_normal = reserve_temp(p);
/* Transform to eye space:
*/
emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal );
/* Transform to eye space:
*/
emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
normal = transformed_normal;
}
/* Normalize/Rescale:
*/
if (p->state->normalize) {
emit_normalize_vec3( p, p->eye_normal, p->eye_normal );
emit_normalize_vec3( p, transformed_normal, normal );
normal = transformed_normal;
}
else if (p->state->rescale_normals) {
else if (p->state->need_eye_coords == p->state->rescale_normals) {
/* This is already adjusted for eye/non-eye rendering:
*/
struct ureg rescale = register_param2(p, STATE_INTERNAL,
STATE_NORMAL_SCALE);
STATE_NORMAL_SCALE);
emit_op2( p, OPCODE_MUL, p->eye_normal, 0, p->eye_normal,
swizzle1(rescale, X));
emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale );
normal = transformed_normal;
}
assert(normal.file == PROGRAM_TEMPORARY);
p->transformed_normal = normal;
}
return p->eye_normal;
return p->transformed_normal;
}
@ -856,7 +930,7 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p,
*/
if (!p->state->unit[i].light_spotcutoff_is_180) {
struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
STATE_SPOT_DIR_NORMALIZED, i);
STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
struct ureg spot = get_temp(p);
struct ureg slt = get_temp(p);
@ -895,7 +969,26 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p,
}
static void emit_degenerate_lit( struct tnl_program *p,
struct ureg lit,
struct ureg dots )
{
struct ureg id = get_identity_param(p);
/* Note that result.x & result.w will not be examined. Note also that
* dots.xyzw == dots.xxxx.
*/
/* result[1] = MAX2(in, 0)
*/
emit_op2(p, OPCODE_MAX, lit, 0, id, dots);
/* result[2] = (in > 0 ? 1 : 0)
*/
emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z,
lit, /* 0 */
dots); /* in[0] */
}
/* Need to add some addtional parameters to allow lighting in object
@ -907,7 +1000,7 @@ static void build_lighting( struct tnl_program *p )
const GLboolean twoside = p->state->light_twoside;
const GLboolean separate = p->state->separate_specular;
GLuint nr_lights = 0, count = 0;
struct ureg normal = get_eye_normal(p);
struct ureg normal = get_transformed_normal(p);
struct ureg lit = get_temp(p);
struct ureg dots = get_temp(p);
struct ureg _col0 = undef, _col1 = undef;
@ -921,9 +1014,11 @@ static void build_lighting( struct tnl_program *p )
set_material_flags(p);
{
struct ureg shininess = get_material(p, 0, STATE_SHININESS);
emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
release_temp(p, shininess);
if (!p->state->material_shininess_is_zero) {
struct ureg shininess = get_material(p, 0, STATE_SHININESS);
emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
release_temp(p, shininess);
}
_col0 = make_temp(p, get_scenecolor(p, 0));
if (separate)
@ -934,10 +1029,12 @@ static void build_lighting( struct tnl_program *p )
}
if (twoside) {
struct ureg shininess = get_material(p, 1, STATE_SHININESS);
emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
negate(swizzle1(shininess,X)));
release_temp(p, shininess);
if (!p->state->material_shininess_is_zero) {
struct ureg shininess = get_material(p, 1, STATE_SHININESS);
emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
negate(swizzle1(shininess,X)));
release_temp(p, shininess);
}
_bfc0 = make_temp(p, get_scenecolor(p, 1));
if (separate)
@ -984,25 +1081,28 @@ static void build_lighting( struct tnl_program *p )
/* Can used precomputed constants in this case.
* Attenuation never applies to infinite lights.
*/
VPpli = register_param3(p, STATE_LIGHT, i,
STATE_POSITION_NORMALIZED);
if (p->state->light_local_viewer) {
struct ureg eye_hat = get_eye_position_normalized(p);
half = get_temp(p);
emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
emit_normalize_vec3(p, half, half);
} else {
half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
VPpli = register_param3(p, STATE_INTERNAL,
STATE_LIGHT_POSITION_NORMALIZED, i);
if (!p->state->material_shininess_is_zero) {
if (p->state->light_local_viewer) {
struct ureg eye_hat = get_eye_position_normalized(p);
half = get_temp(p);
emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
emit_normalize_vec3(p, half, half);
} else {
half = register_param3(p, STATE_INTERNAL,
STATE_LIGHT_HALF_VECTOR, i);
}
}
}
else {
struct ureg Ppli = register_param3(p, STATE_LIGHT, i,
STATE_POSITION);
struct ureg Ppli = register_param3(p, STATE_INTERNAL,
STATE_LIGHT_POSITION, i);
struct ureg V = get_eye_position(p);
struct ureg dist = get_temp(p);
VPpli = get_temp(p);
half = get_temp(p);
/* Calculate VPpli vector
*/
@ -1024,24 +1124,33 @@ static void build_lighting( struct tnl_program *p )
/* Calculate viewer direction, or use infinite viewer:
*/
if (p->state->light_local_viewer) {
struct ureg eye_hat = get_eye_position_normalized(p);
emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
}
else {
struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
}
if (!p->state->material_shininess_is_zero) {
half = get_temp(p);
emit_normalize_vec3(p, half, half);
if (p->state->light_local_viewer) {
struct ureg eye_hat = get_eye_position_normalized(p);
emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
}
else {
struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
}
emit_normalize_vec3(p, half, half);
}
release_temp(p, dist);
}
/* Calculate dot products:
*/
emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
if (p->state->material_shininess_is_zero) {
emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
}
else {
emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
}
/* Front face lighting:
*/
@ -1052,11 +1161,6 @@ static void build_lighting( struct tnl_program *p )
struct ureg res0, res1;
GLuint mask0, mask1;
emit_op1(p, OPCODE_LIT, lit, 0, dots);
if (!is_undef(att))
emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
if (count == nr_lights) {
if (separate) {
@ -1078,7 +1182,21 @@ static void build_lighting( struct tnl_program *p )
res1 = _col1;
}
emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
if (!is_undef(att)) {
emit_op1(p, OPCODE_LIT, lit, 0, dots);
emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
}
else if (!p->state->material_shininess_is_zero) {
emit_op1(p, OPCODE_LIT, lit, 0, dots);
emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
}
else {
emit_degenerate_lit(p, lit, dots);
emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
}
emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
@ -1096,11 +1214,6 @@ static void build_lighting( struct tnl_program *p )
struct ureg res0, res1;
GLuint mask0, mask1;
emit_op1(p, OPCODE_LIT, lit, 0, negate(swizzle(dots,X,Y,W,Z)));
if (!is_undef(att))
emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
if (count == nr_lights) {
if (separate) {
mask0 = WRITEMASK_XYZ;
@ -1121,7 +1234,23 @@ static void build_lighting( struct tnl_program *p )
mask1 = 0;
}
emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
dots = negate(swizzle(dots,X,Y,W,Z));
if (!is_undef(att)) {
emit_op1(p, OPCODE_LIT, lit, 0, dots);
emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
}
else if (!p->state->material_shininess_is_zero) {
emit_op1(p, OPCODE_LIT, lit, 0, dots);
emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
}
else {
emit_degenerate_lit(p, lit, dots);
emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
}
emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
@ -1146,7 +1275,7 @@ static void build_fog( struct tnl_program *p )
struct ureg input;
if (p->state->fog_source_is_depth) {
input = swizzle1(get_eye_position(p), Z);
input = get_eye_position_z(p);
}
else {
input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
@ -1201,7 +1330,7 @@ static void build_reflect_texgen( struct tnl_program *p,
struct ureg dest,
GLuint writemask )
{
struct ureg normal = get_eye_normal(p);
struct ureg normal = get_transformed_normal(p);
struct ureg eye_hat = get_eye_position_normalized(p);
struct ureg tmp = get_temp(p);
@ -1219,7 +1348,7 @@ static void build_sphere_texgen( struct tnl_program *p,
struct ureg dest,
GLuint writemask )
{
struct ureg normal = get_eye_normal(p);
struct ureg normal = get_transformed_normal(p);
struct ureg eye_hat = get_eye_position_normalized(p);
struct ureg tmp = get_temp(p);
struct ureg half = register_scalar_const(p, .5);
@ -1338,7 +1467,7 @@ static void build_texture_transform( struct tnl_program *p )
}
if (normal_mask) {
struct ureg normal = get_eye_normal(p);
struct ureg normal = get_transformed_normal(p);
emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
}
@ -1376,7 +1505,7 @@ static void build_texture_transform( struct tnl_program *p )
static void build_pointsize( struct tnl_program *p )
{
struct ureg eye = get_eye_position(p);
struct ureg eye = get_eye_position_z(p);
struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
struct ureg out = register_output(p, VERT_RESULT_PSIZ);
@ -1474,8 +1603,9 @@ create_new_program( const struct state_key *key,
p.state = key;
p.program = program;
p.eye_position = undef;
p.eye_position_z = undef;
p.eye_position_normalized = undef;
p.eye_normal = undef;
p.transformed_normal = undef;
p.identity = undef;
p.temp_in_use = 0;

View File

@ -1357,6 +1357,7 @@ _mesa_init_lighting( GLcontext *ctx )
/* Miscellaneous */
ctx->Light._NeedEyeCoords = GL_FALSE;
ctx->_NeedEyeCoords = GL_FALSE;
ctx->_ForceEyeCoords = GL_TRUE;
ctx->_ModelViewInvScale = 1.0;
}

View File

@ -1209,18 +1209,6 @@ _mesa_update_state_locked( GLcontext *ctx )
| _NEW_STENCIL | _DD_NEW_SEPARATE_SPECULAR))
update_tricaps( ctx, new_state );
if (ctx->FragmentProgram._MaintainTexEnvProgram) {
prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
}
if (ctx->VertexProgram._MaintainTnlProgram) {
prog_flags |= (_NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
_NEW_TRANSFORM | _NEW_POINT |
_NEW_FOG | _NEW_LIGHT);
}
if (new_state & prog_flags)
update_program( ctx );
/* ctx->_NeedEyeCoords is now up to date.
*
* If the truth value of this variable has changed, update for the
@ -1233,6 +1221,20 @@ _mesa_update_state_locked( GLcontext *ctx )
if (new_state & _MESA_NEW_NEED_EYE_COORDS)
_mesa_update_tnl_spaces( ctx, new_state );
if (ctx->FragmentProgram._MaintainTexEnvProgram) {
prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
}
if (ctx->VertexProgram._MaintainTnlProgram) {
prog_flags |= (_NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
_NEW_TRANSFORM | _NEW_POINT |
_NEW_FOG | _NEW_LIGHT |
_MESA_NEW_NEED_EYE_COORDS);
}
if (new_state & prog_flags)
update_program( ctx );
/*
* Give the driver a chance to act upon the new_state flags.
* The driver might plug in different span functions, for example.

View File

@ -134,10 +134,6 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
value[3] = 1.0;
}
return;
case STATE_POSITION_NORMALIZED:
COPY_4V(value, ctx->Light.Light[ln].EyePosition);
NORMALIZE_3FV( value );
return;
default:
_mesa_problem(ctx, "Invalid light state in fetch_state");
return;
@ -401,7 +397,11 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
case STATE_INTERNAL:
switch (state[1]) {
case STATE_NORMAL_SCALE:
ASSIGN_4V(value, ctx->_ModelViewInvScale, 0, 0, 1);
ASSIGN_4V(value,
ctx->_ModelViewInvScale,
ctx->_ModelViewInvScale,
ctx->_ModelViewInvScale,
1);
return;
case STATE_TEXRECT_SCALE:
{
@ -431,15 +431,46 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
value[2] = ctx->Fog.Density * ONE_DIV_LN2;
value[3] = ctx->Fog.Density * ONE_DIV_SQRT_LN2;
return;
case STATE_SPOT_DIR_NORMALIZED: {
case STATE_LIGHT_SPOT_DIR_NORMALIZED: {
/* here, state[2] is the light number */
/* pre-normalize spot dir */
const GLuint ln = (GLuint) state[2];
COPY_3V(value, ctx->Light.Light[ln].EyeDirection);
NORMALIZE_3FV(value);
COPY_3V(value, ctx->Light.Light[ln]._NormDirection);
value[3] = ctx->Light.Light[ln]._CosCutoff;
return;
}
case STATE_LIGHT_POSITION: {
const GLuint ln = (GLuint) state[2];
COPY_4V(value, ctx->Light.Light[ln]._Position);
return;
}
case STATE_LIGHT_POSITION_NORMALIZED: {
const GLuint ln = (GLuint) state[2];
COPY_4V(value, ctx->Light.Light[ln]._Position);
NORMALIZE_3FV( value );
return;
}
case STATE_LIGHT_HALF_VECTOR: {
const GLuint ln = (GLuint) state[2];
GLfloat p[3];
/* Compute infinite half angle vector:
* halfVector = normalize(normalize(lightPos) + (0, 0, 1))
* light.EyePosition.w should be 0 for infinite lights.
*/
COPY_3V(p, ctx->Light.Light[ln]._Position);
NORMALIZE_3FV(p);
ADD_3V(value, p, ctx->_EyeZDir);
NORMALIZE_3FV(value);
value[3] = 1.0;
return;
}
case STATE_PT_SCALE:
value[0] = ctx->Pixel.RedScale;
value[1] = ctx->Pixel.GreenScale;
@ -696,7 +727,6 @@ append_token(char *dst, gl_state_index k)
append(dst, "normalScale");
break;
case STATE_INTERNAL:
case STATE_POSITION_NORMALIZED:
append(dst, "(internal)");
break;
case STATE_PT_SCALE:

View File

@ -106,9 +106,11 @@ typedef enum gl_state_index_ {
STATE_INTERNAL, /* Mesa additions */
STATE_NORMAL_SCALE,
STATE_TEXRECT_SCALE,
STATE_POSITION_NORMALIZED, /* normalized light position */
STATE_FOG_PARAMS_OPTIMIZED, /* for faster fog calc */
STATE_SPOT_DIR_NORMALIZED, /* pre-normalized spot dir */
STATE_LIGHT_SPOT_DIR_NORMALIZED, /* pre-normalized spot dir */
STATE_LIGHT_POSITION, /* object vs eye space */
STATE_LIGHT_POSITION_NORMALIZED, /* object vs eye space */
STATE_LIGHT_HALF_VECTOR, /* object vs eye space */
STATE_PT_SCALE, /**< Pixel transfer RGBA scale */
STATE_PT_BIAS, /**< Pixel transfer RGBA bias */
STATE_PCM_SCALE, /**< Post color matrix RGBA scale */