From 931210424bc46b2c13919f0ac3e0ef781eff207e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 Oct 2009 15:44:29 +0100 Subject: [PATCH] llvmpipe: wip me harder --- src/gallium/drivers/llvmpipe/lp_prim_vbuf.c | 84 ++--- src/gallium/drivers/llvmpipe/lp_rast.h | 11 + src/gallium/drivers/llvmpipe/lp_setup.c | 345 ++++++++++++++---- src/gallium/drivers/llvmpipe/lp_setup.h | 44 ++- .../drivers/llvmpipe/lp_setup_context.h | 187 +++++----- .../drivers/llvmpipe/lp_setup_rasterize.c | 20 - src/gallium/drivers/llvmpipe/lp_setup_tri.c | 151 ++++---- 7 files changed, 537 insertions(+), 305 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_setup_rasterize.c diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index 8cccb2905b7..6c51d40a8f4 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -171,14 +171,14 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - llvmpipe_setup_point( setup_ctx, + lp_setup_point( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride) ); } break; case PIPE_PRIM_LINES: for (i = 1; i < nr; i += 2) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } @@ -186,7 +186,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } @@ -194,12 +194,12 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_LINE_LOOP: for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } if (nr) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[nr-1], stride), get_vert(vertex_buffer, indices[0], stride) ); } @@ -208,7 +208,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLES: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-2], stride) ); @@ -216,7 +216,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -227,7 +227,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLE_STRIP: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-1], stride), get_vert(vertex_buffer, indices[i-(i&1)], stride), get_vert(vertex_buffer, indices[i-2], stride) ); @@ -235,7 +235,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-2], stride), get_vert(vertex_buffer, indices[i-(i&1)-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -246,7 +246,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLE_FAN: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride) ); @@ -254,7 +254,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -265,11 +265,11 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_QUADS: if (llvmpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); @@ -277,12 +277,12 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -293,11 +293,11 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_QUAD_STRIP: if (llvmpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride)); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); @@ -305,11 +305,11 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -324,7 +324,7 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) * flatshade_first state makes no difference. */ for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[0], stride) ); @@ -355,14 +355,14 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - llvmpipe_setup_point( setup_ctx, + lp_setup_point( setup_ctx, get_vert(vertex_buffer, i-0, stride) ); } break; case PIPE_PRIM_LINES: for (i = 1; i < nr; i += 2) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } @@ -370,7 +370,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } @@ -378,12 +378,12 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_LINE_LOOP: for (i = 1; i < nr; i ++) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } if (nr) { - llvmpipe_setup_line( setup_ctx, + lp_setup_line( setup_ctx, get_vert(vertex_buffer, nr-1, stride), get_vert(vertex_buffer, 0, stride) ); } @@ -392,7 +392,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLES: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-2, stride) ); @@ -400,7 +400,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i += 3) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -411,7 +411,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLE_STRIP: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i++) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-1, stride), get_vert(vertex_buffer, i-(i&1), stride), get_vert(vertex_buffer, i-2, stride) ); @@ -419,7 +419,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i++) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-2, stride), get_vert(vertex_buffer, i-(i&1)-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -430,7 +430,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLE_FAN: if (llvmpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride) ); @@ -438,7 +438,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -449,11 +449,11 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_QUADS: if (llvmpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); @@ -461,11 +461,11 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 3; i < nr; i += 4) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -476,11 +476,11 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_QUAD_STRIP: if (llvmpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); @@ -488,11 +488,11 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 3; i < nr; i += 2) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride) ); - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -507,7 +507,7 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) * flatshade_first state makes no difference. */ for (i = 2; i < nr; i += 1) { - llvmpipe_setup_tri( setup_ctx, + lp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, 0, stride) ); @@ -525,7 +525,7 @@ static void lp_vbuf_destroy(struct vbuf_render *vbr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - llvmpipe_setup_destroy_context(cvbr->setup); + lp_setup_destroy_context(cvbr->setup); FREE(cvbr); } @@ -556,7 +556,7 @@ lp_create_vbuf_backend(struct llvmpipe_context *lp) cvbr->llvmpipe = lp; - cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe); + cvbr->setup = lp_setup_create_context(cvbr->llvmpipe); return &cvbr->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index dadde2e8635..33a6065b89c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -90,6 +90,17 @@ struct lp_rast_triangle { struct lp_rast_shader_inputs inputs; }; +struct clear_tile { + boolean do_color; + boolean do_depth_stencil; + unsigned rgba; + unsigned depth_stencil; +}; + +struct load_tile { + boolean do_color; + boolean do_depth_stencil; +}; struct lp_rasterizer *lp_rast_create( void ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index ac9bfad3f21..514366b71f0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -26,124 +26,337 @@ **************************************************************************/ /** - * \brief Primitive rasterization/rendering (points, lines) + * Tiling engine. * - * \author Keith Whitwell - * \author Brian Paul + * Builds per-tile display lists and executes them on calls to + * lp_setup_flush(). */ -#include "lp_context.h" -#include "lp_quad.h" #include "lp_setup.h" -#include "lp_state.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" +void lp_setup_new_cmd_block( struct cmd_block_list *list ) +{ + struct cmd_block *block = MALLOC_STRUCT(cmd_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->count = 0; +} -#define DEBUG_VERTS 0 +void lp_setup_new_data_block( struct data_block_list *list ) +{ + struct data_block *block = MALLOC_STRUCT(data_block); + list->tail->next = block; + list->tail = block; + block->next = NULL; + block->used = 0; +} + +static void reset_context( struct setup_context *setup ) +{ + for (i = 0; i < setup->tiles_x; i++) { + for (j = 0; j < setup->tiles_y; j++) { + struct cmd_block_list *list = scene->tile[i][j]; + struct cmd_block *block; + struct cmd_block *tmp; + + for (block = list->first; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + list->first = list->tail; + } + } + + { + struct data_block_list *list = &scene->data; + struct data_block *block, *tmp; + + for (block = list->first; block != list->tail; block = tmp) { + tmp = block->next; + FREE(block); + } + + list->first = list->tail; + } +} + + + + +/* Add a command to all active bins. + */ +static void bin_everywhere( struct setup_context *setup, + bin_cmd cmd, + const union lp_rast_cmd_arg *arg ) +{ + unsigned i, j; + for (i = 0; i < setup->tiles_x; i++) + for (j = 0; j < setup->tiles_y; j++) + bin_cmd( setup, &setup->tile[i][j], cmd, arg ); +} + + +static void +rasterize_bins( struct setup_context *setup, + struct lp_rast *rast, + boolean write_depth ) +{ + lp_rast_bind_color( rast, + scene->fb.color, + TRUE ); /* WRITE */ + + lp_rast_bind_depth( rast, + scene->fb.depth, + write_depth ); /* WRITE */ + + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + + lp_rast_start_tile( rast, + i * TILESIZE, + j * TILESIZE ); + + for (block = scene->tile[i][j].first; block; block = block->next) { + for (k = 0; k < block->nr_cmds; k++) { + block->cmd[k].func( rast, block->cmd[k].arg ); + } + } + + lp_rast_finish_tile( rast ); + } + } + + lp_setup_free_data( setup ); +} + + + +static void +begin_binning( struct setup_context *setup ) +{ + if (setup->fb.color) { + if (setup->fb.clear_color) + bin_everywhere( setup, + lp_rast_clear_color, + &setup->clear_data ); + else + bin_everywhere( setup, + lp_rast_load_color, + NULL ); + } + + if (setup->fb.zstencil) { + if (setup->fb.clear_zstencil) + bin_everywhere( setup, + lp_rast_clear_zstencil, + &setup->clear_data ); + else + bin_everywhere( setup, + lp_rast_load_zstencil, + NULL ); + } +} + + +/* This basically bins and then flushes any outstanding full-screen + * clears. + * + * TODO: fast path for fullscreen clears and no triangles. + */ +static void +execute_clears( struct setup_context *setup ) +{ + begin_binning( setup ); + rasterize_bins( setup ); +} + + +static void +set_state( struct setup_context *setup, + unsigned new_state ) +{ + unsigned old_state = setup->state; + + if (old_state == new_state) + return; + + switch (new_state) { + case SETUP_ACTIVE: + if (old_state == SETUP_FLUSHED) + setup_begin_binning( setup ); + break; + + case SETUP_CLEARED: + if (old_state == SETUP_ACTIVE) { + assert(0); + return; + } + break; + + case SETUP_FLUSHED: + if (old_state == SETUP_CLEAR) + execute_clears( setup ); + else + rasterize_bins( setup ); + break; + } + + setup->state = new_state; +} void -llvmpipe_setup_flush() +lp_setup_flush( struct setup_context *setup, + unsigned flags ) { + set_state( setup, SETUP_FLUSHED ); +} + + +void +lp_setup_bind_framebuffer( struct setup_context *setup, + struct pipe_surface *color, + struct pipe_surface *zstencil ) +{ + unsigned width, height; + + set_state( setup, SETUP_FLUSHED ); + + pipe_surface_reference( &setup->fb.color, color ); + pipe_surface_reference( &setup->fb.zstencil, zstencil ); + + width = MAX2( color->width, zstencil->width ); + height = MAX2( color->height, zstencil->height ); + + setup->tiles_x = align( width, TILESIZE ) / TILESIZE; + setup->tiles_y = align( height, TILESIZE ) / TILESIZE; } void -llvmpipe_setup_bind_framebuffer() +lp_setup_clear( struct setup_context *setup, + const float *clear_color, + double clear_depth, + unsigned clear_stencil, + unsigned flags ) { + if (setup->state == SETUP_ACTIVE) { + struct lp_rast_clear_info *clear_info; + unsigned i, j; + + clear_info = alloc_clear_info( setup ); + + if (flags & PIPE_CLEAR_COLOR) { + pack_color( setup, + clear_info->color, + clear_color ); + bin_everywhere(setup, lp_rast_clear_color, clear_info ); + } + + if (flags & PIPE_CLEAR_DEPTH_STENCIL) { + pack_depth_stencil( setup, + clear_info->depth, + clear_depth, + clear_stencil ); + + bin_everywhere(setup, lp_rast_clear_zstencil, clear_info ); + } + } + else { + set_state( setup, SETUP_CLEARED ); + setup->clear.flags |= flags; + + if (flags & PIPE_CLEAR_COLOR) { + memcpy(setup->clear.color, color, sizeof setup->clear.color); + } + + if (flags & PIPE_CLEAR_DEPTH_STENCIL) { + setup->clear.depth = clear_depth; + setup->clear.stencil = clear_stencil; + } + } } + void -llvmpipe_setup_clear() +lp_setup_set_fs_inputs( struct setup_context *setup, + const enum lp_interp *interp, + unsigned nr ) { + memcpy( setup->interp, interp, nr * sizeof interp[0] ); } +static void +first_triangle( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + set_state( setup, STATE_ACTIVE ); + setup_choose_triangle( setup, v0, v1, v2 ); +} + + + /* Stubs for lines & points for now: */ void -llvmpipe_setup_point(struct setup_context *setup, +lp_setup_point(struct setup_context *setup, const float (*v0)[4]) { + setup->point( setup, v0 ); } void -llvmpipe_setup_line(struct setup_context *setup, +lp_setup_line(struct setup_context *setup, const float (*v0)[4], const float (*v1)[4]) { + setup->line( setup, v0, v1 ); } - -/* Called after statechange, before emitting primitives. If binning - * is active, this function should store relevant state in the binning - * context. - * - * That includes: - * - current fragment shader function - * - bound constant buffer contents - * - bound textures - * - blend color - * - etc. - * - * Basically everything needed at some point in the future to - * rasterize triangles for the current state. - * - * Additionally this will set up the state needed for the rasterizer - * to process and bin incoming triangles. That would include such - * things as: - * - cull mode - * - ??? - * - etc. - * - */ -void setup_prepare( struct setup_context *setup ) +void +lp_setup_triangle(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) { - struct llvmpipe_context *lp = setup->llvmpipe; - - if (lp->dirty) { - llvmpipe_update_derived(lp); - } - - lp->quad.first->begin( lp->quad.first ); - - if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && - lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && - lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { - /* we'll do culling */ - setup->winding = lp->rasterizer->cull_mode; - } - else { - /* 'draw' will do culling */ - setup->winding = PIPE_WINDING_NONE; - } - - setup_prepare_tri( setup->llvmpipe ); + setup->triangle( setup, v0, v1, v2 ); } - void setup_destroy_context( struct setup_context *setup ) { + lp_rast_destroy( setup->rast ); FREE( setup ); } /** - * Create a new primitive setup/render stage. + * Create a new primitive tiling engine. Currently also creates a + * rasterizer to use with it. */ -struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe ) +struct setup_context *setup_create_context( void ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); - unsigned i; - setup->llvmpipe = llvmpipe; + setup->rast = lp_rast_create( void ); + if (!setup->rast) + goto fail; + + for (i = 0; i < TILES_X; i++) + for (j = 0; j < TILES_Y; j++) + setup->tile[i][j].first = + setup->tile[i][j].next = CALLOC_STRUCT(cmd_block); return setup; + +fail: + FREE(setup); + return NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 05aaaf83b8e..2542faad36b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -27,28 +27,46 @@ #ifndef LP_SETUP_H #define LP_SETUP_H + +enum lp_interp { + LP_INTERP_CONSTANT, + LP_INTERP_LINEAR, + LP_INTERP_PERSPECTIVE, + LP_INTERP_POSITION, + LP_INTERP_FACING +}; + struct setup_context; -struct llvmpipe_context; -/* Note, not using setup_context currently - */ +struct setup_context * +lp_setup_create( void ); void -llvmpipe_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]); +lp_setup_triangle(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v1)[4]); void -llvmpipe_setup_point( struct setup_context *setup, - const float (*v0)[4] ); +lp_setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]); +void +lp_setup_point( struct setup_context *setup, + const float (*v0)[4] ); -struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe ); +void +lp_setup_set_triangle_state( struct setup_context *setup, + unsigned cullmode, + boolean front_is_ccw ); -void setup_prepare( struct setup_context *setup ); +void +lp_setup_set_fs_inputs( struct setup_context *setup, + const enum lp_interp *interp, + unsigned nr ); -void setup_destroy_context( struct setup_context *setup ); - -void setup_prepare_tri( struct llvmpipe_context *llvmpipe ); +void +lp_setup_destroy( struct setup_context *setup ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 848705e0991..91540d6751e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -27,114 +27,125 @@ #ifndef LP_SETUP_CONTEXT_H #define LP_SETUP_CONTEXT_H -struct clear_tile { - boolean do_color; - boolean do_depth_stencil; - unsigned rgba; - unsigned depth_stencil; -}; -struct load_tile { - boolean do_color; - boolean do_depth_stencil; -}; +#define CMD_BLOCK_MAX 128 +#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *)) -/* Shade tile points directly at this: +/* switch to a non-pointer value for this: */ -struct shader_inputs { - /* Some way of updating rasterizer state: - */ - /* ??? */ - - /* Attribute interpolation: - */ - float oneoverarea; - float x1; - float y1; - - struct tgsi_interp_coef position_coef; - struct tgsi_interp_coef *coef; -}; - -/* Shade triangle points at this: - */ -struct shade_triangle { - /* one-pixel sized trivial accept offsets for each plane */ - float ei1; - float ei2; - float ei3; - - /* one-pixel sized trivial reject offsets for each plane */ - float eo1; - float eo2; - float eo3; - - /* y deltas for vertex pairs */ - float dy12; - float dy23; - float dy31; - - /* x deltas for vertex pairs */ - float dx12; - float dx23; - float dx31; - - struct shader_inputs inputs; -}; - -struct bin_cmd { - enum { - CMD_END = 0, - CMD_CLEAR, - CMD_LOAD_TILE, - CMD_SHADE_TILE, - CMD_SHADE_TRIANGLE, - } cmd; - - union { - struct triangle *tri; - struct clear *clear; - } ptr; -}; +typedef void (*lp_rast_cmd)( struct lp_rast *, const union lp_rast_cmd_arg * ); struct cmd_block { - struct bin_cmd cmds[128]; + union lp_rast_arg *arg[CMD_BLOCK_MAX]; + lp_rast_cmd cmd[CMD_BLOCK_MAX]; unsigned count; struct cmd_block *next; }; -/* Triangles - */ struct data_block { - ubyte data[4096 - sizeof(unsigned) - sizeof(struct cmd_block *)]; - unsigned count; + ubyte data[DATA_BLOCK_SZ]; + unsigned used; struct data_block *next; }; -/* Need to store the state at the time the triangle was drawn, at - * least as it is needed during rasterization. That would include at - * minimum the constant values referred to by the fragment shader, - * blend state, etc. Much of this is code-generated into the shader - * in llvmpipe -- may be easier to do this work there. - */ -struct state_block { +struct cmd_block_list { + struct cmd_block *head; + struct cmd_block *tail; }; +struct data_block_list { + struct data_block *head; + struct data_block *tail; +}; + -/** - * Basically all the data from a binner scene: +/* We're limited to 2K by 2K for 32bit fixed point rasterization. + * Will need a 64-bit version for larger framebuffers. */ -struct binned_scene { - struct llvmpipe_context *llvmpipe; +#define MAXHEIGHT 2048 +#define MAXWIDTH 2048 - struct cmd_block *bin[MAX_HEIGHT / BIN_SIZE][MAX_WIDTH / BIN_SIZE]; - struct data_block *data; +struct setup_context { + + /* When there are multiple threads, will want to double-buffer the + * bin arrays: + */ + struct cmd_block_list bin[MAXHEIGHT / TILESIZE][MAXWIDTH / TILESIZE]; + struct data_block_list data; + + unsigned tiles_x; + unsigned tiles_y; + + struct { + struct pipe_surface *color; + struct pipe_surface *zstencil; + } fb; + + struct { + unsigned flags; + float clear_color[4]; + double clear_depth; + unsigned clear_stencil; + } clear; + + enum { + SETUP_FLUSHED, + SETUP_CLEARED, + SETUP_ACTIVE + } state; + + struct { + enum lp_interp inputs[PIPE_MAX_ATTRIBS]; + unsigned nr_inputs; + } fs; + + void (*point)( struct setup_context *, + const float (*v0)[4]); + + void (*line)( struct setup_context *, + const float (*v0)[4], + const float (*v1)[4]); + + void (*triangle)( struct setup_context *, + const float (*v0)[4], + const float (*v1)[4], + const float (*v1)[4]); }; -static INLINE struct triangle *get_triangle( struct setup_context *setup ) +static INLINE void *get_data( struct data_block_list *list, + unsigned size) { - if (setup->triangles->count == TRIANGLE_BLOCK_COUNT) - return setup_triangle_from_new_block( setup ); - return &setup->triangles[setup->triangles->count++]; + if (list->tail->used + size > DATA_BLOCK_SIZE) { + lp_setup_new_data_block( list ); + } + + { + struct data_block *tail = list->tail; + char *data = tail->data + tail->used; + tail->used += size; + return data; + } } + +/* Add a command to a given bin. + */ +static INLINE void bin_cmd( struct cmd_block_list *list, + bin_cmd cmd, + const union lp_rast_cmd_arg *arg ) +{ + if (list->tail.count == CMD_BLOCK_MAX) { + lp_setup_new_cmd_block( list ) + } + + { + struct cmd_block *tail = list->tail; + unsigned i = tail->count; + tail->cmd[i] = cmd; + tail->arg[i] = arg; + tail->count++; + } +} + + + diff --git a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c b/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c deleted file mode 100644 index bb7a4feb390..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_rasterize.c +++ /dev/null @@ -1,20 +0,0 @@ - -void -lp_setup_rasterize( struct llvmpipe_context *llvmpipe, - struct binned_scene *scene ) -{ - lp_rast_bind_surfaces( rast, scene->framebuffer ); - - for (i = 0; i < scene->tiles_x; i++) { - for (j = 0; j < scene->tiles_y; j++) { - - lp_rast_start_tile( rast, i * TILESIZE, j * TILESIZE ); - - for (block = scene->tile[i][j].first; block; block = block->next) { - for (k = 0; k < block->nr_cmds; k++) { - block->cmd[k].func( rast, block->cmd[k].arg ); - } - } - } - } -} diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 98c87d551f0..75a0ea88881 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -29,14 +29,8 @@ * Binning code for triangles */ -#include "lp_context.h" #include "lp_setup.h" #include "lp_state.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -163,56 +157,55 @@ setup_fragcoord_coef(struct triangle *tri, unsigned slot) /** * Compute the tri->coef[] array dadx, dady, a0 values. */ -static void setup_tri_coefficients( struct llvmpipe_context *llvmpipe, +static void setup_tri_coefficients( struct setup_context *setup, struct triangle *tri, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontface ) { - const struct lp_fragment_shader *fs = llvmpipe->fs; - const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); + const struct vertex_info *vinfo = setup->vinfo; unsigned input; /* z and w are done by linear interpolation: */ - linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 2); - linear_coef(tri, &tri->position_coef, v1, v2, v3, 0, 3); + linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 2); + linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 3); /* setup interpolation for all the remaining attributes: */ - for (input = 0; input < fs->info.num_inputs; input++) { + for (input = 0; input < vinfo->num_fs_inputs; input++) { unsigned vert_attr = vinfo->attrib[input].src_index; unsigned i; switch (vinfo->attrib[input].interp_mode) { case INTERP_CONSTANT: for (i = 0; i < NUM_CHANNELS; i++) - constant_coef(&tri->coef[input], v3, vert_attr, i); + constant_coef(tri->coef[input], v3, vert_attr, i); break; case INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) - linear_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i); + linear_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i); break; case INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) - perspective_coef(tri, &tri->coef[input], v1, v2, v3, vert_attr, i); + perspective_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i); break; case INTERP_POS: setup_fragcoord_coef(tri, input); break; - default: - assert(0); - } - - if (fs->info.input_semantic_name[input] == TGSI_SEMANTIC_FACE) { + case INTERP_FACING: tri->coef[input].a0[0] = 1.0f - frontface; tri->coef[input].dadx[0] = 0.0; tri->coef[input].dady[0] = 0.0; + break; + + default: + assert(0); } } } @@ -262,22 +255,22 @@ do_triangle_ccw(struct lp_setup *setup, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct triangle *tri = allocate_triangle; + struct triangle *tri = allocate_triangle( setup ); float area; float c1, c2, c3; int i; int minx, maxx, miny, maxy; - tri.dx12 = x1 - x2; - tri.dx23 = x2 - x3; - tri.dx31 = x3 - x1; + tri->dx12 = x1 - x2; + tri->dx23 = x2 - x3; + tri->dx31 = x3 - x1; - tri.dy12 = y1 - y2; - tri.dy23 = y2 - y3; - tri.dy31 = y3 - y1; + tri->dy12 = y1 - y2; + tri->dy23 = y2 - y3; + tri->dy31 = y3 - y1; - area = (tri.dx12 * tri.dy31 - - tri.dx31 * tri.dy12); + area = (tri->dx12 * tri->dy31 - + tri->dx31 * tri->dy12); /* Cull non-ccw and zero-sized triangles. */ @@ -302,80 +295,87 @@ do_triangle_ccw(struct lp_setup *setup, /* The only divide in this code. Is it really needed? */ - tri.oneoverarea = 1.0f / area; + tri->oneoverarea = 1.0f / area; /* Setup parameter interpolants: */ - setup_tri_coefficients( setup, &tri, v1, v2, v3, frontfacing ); + setup_tri_coefficients( setup, tri, v1, v2, v3, frontfacing ); /* half-edge constants, will be interated over the whole * rendertarget. */ - c1 = tri.dy12 * x1 - tri.dx12 * y1; - c2 = tri.dy23 * x2 - tri.dx23 * y2; - c3 = tri.dy31 * x3 - tri.dx31 * y3; + c1 = tri->dy12 * x1 - tri->dx12 * y1; + c2 = tri->dy23 * x2 - tri->dx23 * y2; + c3 = tri->dy31 * x3 - tri->dx31 * y3; /* correct for top-left fill convention: */ - if (tri.dy12 < 0 || (tri.dy12 == 0 && tri.dx12 > 0)) c1++; - if (tri.dy23 < 0 || (tri.dy23 == 0 && tri.dx23 > 0)) c2++; - if (tri.dy31 < 0 || (tri.dy31 == 0 && tri.dx31 > 0)) c3++; + if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) c1++; + if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) c2++; + if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) c3++; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if * the blocks are square. */ - tri.eo1 = 0; - if (tri.dy12 < 0) tri.eo1 -= tri.dy12; - if (tri.dx12 > 0) tri.eo1 += tri.dx12; + tri->eo1 = 0; + if (tri->dy12 < 0) tri->eo1 -= tri->dy12; + if (tri->dx12 > 0) tri->eo1 += tri->dx12; - tri.eo2 = 0; - if (tri.dy23 < 0) tri.eo2 -= tri.dy23; - if (tri.dx23 > 0) tri.eo2 += tri.dx23; + tri->eo2 = 0; + if (tri->dy23 < 0) tri->eo2 -= tri->dy23; + if (tri->dx23 > 0) tri->eo2 += tri->dx23; - tri.eo3 = 0; - if (tri.dy31 < 0) tri.eo3 -= tri.dy31; - if (tri.dx31 > 0) tri.eo3 += tri.dx31; + tri->eo3 = 0; + if (tri->dy31 < 0) tri->eo3 -= tri->dy31; + if (tri->dx31 > 0) tri->eo3 += tri->dx31; /* Calculate trivial accept offsets from the above. */ - tri.ei1 = tri.dx12 - tri.dy12 - tri.eo1; - tri.ei2 = tri.dx23 - tri.dy23 - tri.eo2; - tri.ei3 = tri.dx31 - tri.dy31 - tri.eo3; + tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1; + tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; + tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; minx &= ~(TILESIZE-1); /* aligned blocks */ miny &= ~(TILESIZE-1); /* aligned blocks */ - c1 += tri.dx12 * miny - tri.dy12 * minx; - c2 += tri.dx23 * miny - tri.dy23 * minx; - c3 += tri.dx31 * miny - tri.dy31 * minx; + c1 += tri->dx12 * miny - tri->dy12 * minx; + c2 += tri->dx23 * miny - tri->dy23 * minx; + c3 += tri->dx31 * miny - tri->dy31 * minx; - if (miny + TILESIZE > maxy && - minx + TILESIZE > maxx) + /* Convert to tile coordinates: + */ + minx /= TILESIZE; + maxx /= TILESIZE; + miny /= TILESIZE; + maxy /= TILESIZE; + + if (miny == maxy && minx == maxx) { /* Triangle is contained in a single tile: */ + bin_command(setup->tile[minx][miny], lp_rast_triangle, tri ); } else { const int step = TILESIZE; - float ei1 = tri.ei1 * step; - float ei2 = tri.ei2 * step; - float ei3 = tri.ei3 * step; + float ei1 = tri->ei1 * step; + float ei2 = tri->ei2 * step; + float ei3 = tri->ei3 * step; - float eo1 = tri.eo1 * step; - float eo2 = tri.eo2 * step; - float eo3 = tri.eo3 * step; + float eo1 = tri->eo1 * step; + float eo2 = tri->eo2 * step; + float eo3 = tri->eo3 * step; - float xstep1 = -step * tri.dy12; - float xstep2 = -step * tri.dy23; - float xstep3 = -step * tri.dy31; + float xstep1 = -step * tri->dy12; + float xstep2 = -step * tri->dy23; + float xstep3 = -step * tri->dy31; - float ystep1 = step * tri.dx12; - float ystep2 = step * tri.dx23; - float ystep3 = step * tri.dx31; + float ystep1 = step * tri->dx12; + float ystep2 = step * tri->dx23; + float ystep3 = step * tri->dx31; int x, y; @@ -385,13 +385,13 @@ do_triangle_ccw(struct lp_setup *setup, * Trivially accept or reject blocks, else jump to per-pixel * examination above. */ - for (y = miny; y < maxy; y += step) + for (y = miny; y < maxy; y++) { float cx1 = c1; float cx2 = c2; float cx3 = c3; - for (x = minx; x < maxx; x += step) + for (x = minx; x < maxx; x++) { if (cx1 + eo1 < 0 || cx2 + eo2 < 0 || @@ -404,12 +404,12 @@ do_triangle_ccw(struct lp_setup *setup, cx3 + ei3 > 0) { /* shade whole tile */ - bin_command(tile[x][y], lp_rast_shade_tile, &tri->inputs ); + bin_command(setup->tile[x][y], lp_rast_shade_tile, &tri->inputs ); } else { /* shade partial tile */ - bin_command(tile[x][y], lp_rast_triangle, &tri ); + bin_command(setup->tile[x][y], lp_rast_triangle, tri ); } /* Iterate cx values across the region: @@ -469,14 +469,13 @@ static void triangle_nop( struct setup_context *setup, { } -void setup_prepare_tri( struct setup_context *setup ) +void setup_set_tri_state( struct setup_context *setup, + unsigned cull_mode, + boolean ccw_is_frontface) { - struct llvmpipe_context *llvmpipe = setup->llvmpipe; + setup->ccw_is_frontface = ccw_is_frontface; - setup->ccw_is_frontface = (llvmpipe->rasterizer->front_winding == - PIPE_WINDING_CW); - - switch (llvmpipe->rasterizer->cull_mode) { + switch (cull_mode) { case PIPE_WINDING_NONE: setup->triangle = triangle_both; break;