mesa: use pipe_vertex_state in vbo and st/mesa for lower display list overhead
Acked-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13050>
This commit is contained in:
parent
f32cefdb87
commit
e78d7fe7d5
|
@ -60,12 +60,17 @@ struct gl_texture_image;
|
|||
struct gl_texture_object;
|
||||
struct gl_memory_info;
|
||||
struct gl_transform_feedback_object;
|
||||
struct gl_vertex_array_object;
|
||||
struct ati_fragment_shader;
|
||||
struct util_queue_monitoring;
|
||||
struct _mesa_prim;
|
||||
struct _mesa_index_buffer;
|
||||
struct pipe_draw_info;
|
||||
struct pipe_draw_start_count_bias;
|
||||
struct pipe_vertex_state;
|
||||
struct pipe_draw_vertex_state_info;
|
||||
struct pipe_vertex_buffer;
|
||||
struct pipe_vertex_element;
|
||||
|
||||
/* GL_ARB_vertex_buffer_object */
|
||||
/* Modifies GL_MAP_UNSYNCHRONIZED_BIT to allow driver to fail (return
|
||||
|
@ -641,8 +646,21 @@ struct dd_function_table {
|
|||
void (*DrawTransformFeedback)(struct gl_context *ctx, GLenum mode,
|
||||
unsigned num_instances, unsigned stream,
|
||||
struct gl_transform_feedback_object *tfb_vertcount);
|
||||
|
||||
void (*DrawGalliumVertexState)(struct gl_context *ctx,
|
||||
struct pipe_vertex_state *state,
|
||||
struct pipe_draw_vertex_state_info info,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
const uint8_t *mode,
|
||||
unsigned num_draws,
|
||||
bool per_vertex_edgeflags);
|
||||
/*@}*/
|
||||
|
||||
struct pipe_vertex_state *
|
||||
(*CreateGalliumVertexState)(struct gl_context *ctx,
|
||||
const struct gl_vertex_array_object *vao,
|
||||
struct gl_buffer_object *indexbuf,
|
||||
uint32_t enabled_attribs);
|
||||
|
||||
/**
|
||||
* \name State-changing functions.
|
||||
|
|
|
@ -73,7 +73,7 @@
|
|||
#include "vbo/vbo_util.h"
|
||||
#include "vbo/vbo_save.h"
|
||||
#include "util/format_r11g11b10f.h"
|
||||
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
#define USE_BITMAP_ATLAS 1
|
||||
|
@ -797,8 +797,15 @@ void mesa_print_display_list(GLuint list);
|
|||
static void
|
||||
vbo_destroy_vertex_list(struct gl_context *ctx, struct vbo_save_vertex_list *node)
|
||||
{
|
||||
for (gl_vertex_processing_mode vpm = VP_MODE_FF; vpm < VP_MODE_MAX; ++vpm)
|
||||
_mesa_reference_vao(ctx, &node->VAO[vpm], NULL);
|
||||
for (gl_vertex_processing_mode mode = VP_MODE_FF; mode < VP_MODE_MAX; ++mode) {
|
||||
_mesa_reference_vao(ctx, &node->VAO[mode], NULL);
|
||||
if (node->merged.gallium.private_refcount[mode]) {
|
||||
assert(node->merged.gallium.private_refcount[mode] > 0);
|
||||
p_atomic_add(&node->merged.gallium.state[mode]->reference.count,
|
||||
-node->merged.gallium.private_refcount[mode]);
|
||||
}
|
||||
pipe_vertex_state_reference(&node->merged.gallium.state[mode], NULL);
|
||||
}
|
||||
|
||||
if (node->merged.mode) {
|
||||
free(node->merged.mode);
|
||||
|
|
|
@ -140,24 +140,21 @@ static void check_program_state( struct st_context *st )
|
|||
st->dirty |= dirty;
|
||||
}
|
||||
|
||||
static void check_attrib_edgeflag(struct st_context *st)
|
||||
void st_update_edgeflags(struct st_context *st, bool per_vertex_edgeflags)
|
||||
{
|
||||
GLboolean vertdata_edgeflags, edgeflag_culls_prims, edgeflags_enabled;
|
||||
struct gl_program *vp = st->ctx->VertexProgram._Current;
|
||||
|
||||
edgeflags_enabled = st->ctx->Polygon.FrontMode != GL_FILL ||
|
||||
bool edgeflags_enabled = st->ctx->Polygon.FrontMode != GL_FILL ||
|
||||
st->ctx->Polygon.BackMode != GL_FILL;
|
||||
|
||||
vertdata_edgeflags = edgeflags_enabled &&
|
||||
_mesa_draw_edge_flag_array_enabled(st->ctx);
|
||||
bool vertdata_edgeflags = edgeflags_enabled && per_vertex_edgeflags;
|
||||
|
||||
if (vertdata_edgeflags != st->vertdata_edgeflags) {
|
||||
st->vertdata_edgeflags = vertdata_edgeflags;
|
||||
|
||||
struct gl_program *vp = st->ctx->VertexProgram._Current;
|
||||
if (vp)
|
||||
st->dirty |= ST_NEW_VERTEX_PROGRAM(st, st_program(vp));
|
||||
}
|
||||
|
||||
edgeflag_culls_prims = edgeflags_enabled && !vertdata_edgeflags &&
|
||||
bool edgeflag_culls_prims = edgeflags_enabled && !vertdata_edgeflags &&
|
||||
!st->ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0];
|
||||
if (edgeflag_culls_prims != st->edgeflag_culls_prims) {
|
||||
st->edgeflag_culls_prims = edgeflag_culls_prims;
|
||||
|
@ -165,6 +162,11 @@ static void check_attrib_edgeflag(struct st_context *st)
|
|||
}
|
||||
}
|
||||
|
||||
static void check_attrib_edgeflag(struct st_context *st)
|
||||
{
|
||||
st_update_edgeflags(st, _mesa_draw_edge_flag_array_enabled(st->ctx));
|
||||
}
|
||||
|
||||
|
||||
/***********************************************************************
|
||||
* Update all derived state:
|
||||
|
|
|
@ -58,6 +58,7 @@ enum st_pipeline {
|
|||
void st_init_atoms( struct st_context *st );
|
||||
void st_destroy_atoms( struct st_context *st );
|
||||
void st_validate_state( struct st_context *st, enum st_pipeline pipeline );
|
||||
void st_update_edgeflags(struct st_context *st, bool per_vertex_edgeflags);
|
||||
|
||||
void
|
||||
st_setup_arrays(struct st_context *st,
|
||||
|
@ -74,6 +75,12 @@ st_setup_current_user(struct st_context *st,
|
|||
struct cso_velems_state *velements,
|
||||
struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers);
|
||||
|
||||
struct pipe_vertex_state *
|
||||
st_create_gallium_vertex_state(struct gl_context *ctx,
|
||||
const struct gl_vertex_array_object *vao,
|
||||
struct gl_buffer_object *indexbuf,
|
||||
uint32_t enabled_attribs);
|
||||
|
||||
/* Define ST_NEW_xxx_INDEX */
|
||||
enum {
|
||||
#define ST_STATE(FLAG, st_update) FLAG##_INDEX,
|
||||
|
|
|
@ -322,3 +322,40 @@ st_update_array(struct st_context *st)
|
|||
vbuffer);
|
||||
st->last_num_vbuffers = num_vbuffers;
|
||||
}
|
||||
|
||||
struct pipe_vertex_state *
|
||||
st_create_gallium_vertex_state(struct gl_context *ctx,
|
||||
const struct gl_vertex_array_object *vao,
|
||||
struct gl_buffer_object *indexbuf,
|
||||
uint32_t enabled_attribs)
|
||||
{
|
||||
struct st_context *st = st_context(ctx);
|
||||
const GLbitfield inputs_read = enabled_attribs;
|
||||
const GLbitfield dual_slot_inputs = 0; /* always zero */
|
||||
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
|
||||
unsigned num_vbuffers = 0;
|
||||
struct cso_velems_state velements;
|
||||
bool uses_user_vertex_buffers;
|
||||
|
||||
setup_arrays(st, vao, dual_slot_inputs, inputs_read, 0, inputs_read, 0,
|
||||
&velements, vbuffer, &num_vbuffers, &uses_user_vertex_buffers);
|
||||
|
||||
if (num_vbuffers != 1 || uses_user_vertex_buffers) {
|
||||
assert(!"this should never happen with display lists");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
velements.count = util_bitcount(inputs_read);
|
||||
|
||||
struct pipe_screen *screen = st->screen;
|
||||
struct pipe_vertex_state *state =
|
||||
screen->create_vertex_state(screen, &vbuffer[0], velements.velems,
|
||||
velements.count,
|
||||
indexbuf ?
|
||||
st_buffer_object(indexbuf)->buffer : NULL,
|
||||
enabled_attribs);
|
||||
|
||||
for (unsigned i = 0; i < num_vbuffers; i++)
|
||||
pipe_vertex_buffer_unreference(&vbuffer[i]);
|
||||
return state;
|
||||
}
|
||||
|
|
|
@ -285,7 +285,7 @@ st_RenderMode(struct gl_context *ctx, GLenum newMode )
|
|||
|
||||
if (newMode == GL_RENDER) {
|
||||
/* restore normal VBO draw function */
|
||||
st_init_draw_functions(&ctx->Driver);
|
||||
st_init_draw_functions(st->screen, &ctx->Driver);
|
||||
}
|
||||
else if (newMode == GL_SELECT) {
|
||||
if (!st->selection_stage)
|
||||
|
|
|
@ -960,7 +960,7 @@ st_init_driver_functions(struct pipe_screen *screen,
|
|||
{
|
||||
_mesa_init_sampler_object_functions(functions);
|
||||
|
||||
st_init_draw_functions(functions);
|
||||
st_init_draw_functions(screen, functions);
|
||||
st_init_blit_functions(functions);
|
||||
st_init_bufferobject_functions(screen, functions);
|
||||
st_init_clear_functions(functions);
|
||||
|
|
|
@ -309,14 +309,73 @@ st_draw_transform_feedback(struct gl_context *ctx, GLenum mode,
|
|||
cso_draw_vbo(st->cso_context, &info, 0, &indirect, draw);
|
||||
}
|
||||
|
||||
static void
|
||||
st_draw_gallium_vertex_state(struct gl_context *ctx,
|
||||
struct pipe_vertex_state *state,
|
||||
struct pipe_draw_vertex_state_info info,
|
||||
const struct pipe_draw_start_count_bias *draws,
|
||||
const uint8_t *mode,
|
||||
unsigned num_draws,
|
||||
bool per_vertex_edgeflags)
|
||||
{
|
||||
struct st_context *st = st_context(ctx);
|
||||
bool old_vertdata_edgeflags = st->vertdata_edgeflags;
|
||||
|
||||
/* We don't flag any other states to make st_validate state update edge
|
||||
* flags, so we need to update them here.
|
||||
*/
|
||||
st_update_edgeflags(st, per_vertex_edgeflags);
|
||||
|
||||
prepare_draw(st, ctx, ST_PIPELINE_RENDER_STATE_MASK_NO_VARRAYS,
|
||||
ST_PIPELINE_RENDER_NO_VARRAYS);
|
||||
|
||||
struct pipe_context *pipe = st->pipe;
|
||||
uint32_t velem_mask = ctx->VertexProgram._Current->info.inputs_read;
|
||||
|
||||
if (!mode) {
|
||||
pipe->draw_vertex_state(pipe, state, velem_mask, info, draws, num_draws);
|
||||
} else {
|
||||
/* Find consecutive draws where mode doesn't vary. */
|
||||
for (unsigned i = 0, first = 0; i <= num_draws; i++) {
|
||||
if (i == num_draws || mode[i] != mode[first]) {
|
||||
unsigned current_num_draws = i - first;
|
||||
|
||||
/* Increase refcount to be able to use take_vertex_state_ownership
|
||||
* with all draws.
|
||||
*/
|
||||
if (i != num_draws && info.take_vertex_state_ownership)
|
||||
p_atomic_inc(&state->reference.count);
|
||||
|
||||
info.mode = mode[first];
|
||||
pipe->draw_vertex_state(pipe, state, velem_mask, info, &draws[first],
|
||||
current_num_draws);
|
||||
first = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If per-vertex edge flags are different than the non-display-list state,
|
||||
* just flag ST_NEW_VERTEX_ARRAY, which will also completely revalidate
|
||||
* edge flags in st_validate_state.
|
||||
*/
|
||||
if (st->vertdata_edgeflags != old_vertdata_edgeflags)
|
||||
st->dirty |= ST_NEW_VERTEX_ARRAYS;
|
||||
}
|
||||
|
||||
void
|
||||
st_init_draw_functions(struct dd_function_table *functions)
|
||||
st_init_draw_functions(struct pipe_screen *screen,
|
||||
struct dd_function_table *functions)
|
||||
{
|
||||
functions->Draw = NULL;
|
||||
functions->DrawGallium = st_draw_gallium;
|
||||
functions->DrawGalliumMultiMode = st_draw_gallium_multimode;
|
||||
functions->DrawIndirect = st_indirect_draw_vbo;
|
||||
functions->DrawTransformFeedback = st_draw_transform_feedback;
|
||||
|
||||
if (screen->get_param(screen, PIPE_CAP_DRAW_VERTEX_STATE)) {
|
||||
functions->DrawGalliumVertexState = st_draw_gallium_vertex_state;
|
||||
functions->CreateGalliumVertexState = st_create_gallium_vertex_state;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -41,7 +41,8 @@ struct _mesa_prim;
|
|||
struct gl_context;
|
||||
struct st_context;
|
||||
|
||||
void st_init_draw_functions(struct dd_function_table *functions);
|
||||
void st_init_draw_functions(struct pipe_screen *screen,
|
||||
struct dd_function_table *functions);
|
||||
|
||||
void st_destroy_draw( struct st_context *st );
|
||||
|
||||
|
|
|
@ -64,6 +64,14 @@ struct vbo_save_vertex_list {
|
|||
struct pipe_draw_start_count_bias start_count;
|
||||
};
|
||||
unsigned num_draws;
|
||||
|
||||
struct {
|
||||
struct gl_context *ctx;
|
||||
struct pipe_vertex_state *state[VP_MODE_MAX];
|
||||
int private_refcount[VP_MODE_MAX];
|
||||
GLbitfield enabled_attribs[VP_MODE_MAX];
|
||||
struct pipe_draw_vertex_state_info info;
|
||||
} gallium;
|
||||
} merged;
|
||||
|
||||
/* Cold: used during construction or to handle egde-cases */
|
||||
|
|
|
@ -900,6 +900,25 @@ end:
|
|||
_mesa_reference_vao(ctx, &node->VAO[vpm], save->VAO[vpm]);
|
||||
}
|
||||
|
||||
/* Prepare for DrawGalliumVertexState */
|
||||
if (node->merged.num_draws && ctx->Driver.DrawGalliumVertexState) {
|
||||
for (unsigned i = 0; i < VP_MODE_MAX; i++) {
|
||||
uint32_t enabled_attribs = _vbo_get_vao_filter(i) &
|
||||
node->VAO[i]->_EnabledWithMapMode;
|
||||
|
||||
node->merged.gallium.state[i] =
|
||||
ctx->Driver.CreateGalliumVertexState(ctx, node->VAO[i],
|
||||
node->cold->ib.obj,
|
||||
enabled_attribs);
|
||||
node->merged.gallium.private_refcount[i] = 0;
|
||||
node->merged.gallium.enabled_attribs[i] = enabled_attribs;
|
||||
}
|
||||
|
||||
node->merged.gallium.ctx = ctx;
|
||||
node->merged.gallium.info.mode = node->merged.info.mode;
|
||||
node->merged.gallium.info.take_vertex_state_ownership = false;
|
||||
assert(node->merged.info.index_size == 4);
|
||||
}
|
||||
|
||||
/* Deal with GL_COMPILE_AND_EXECUTE:
|
||||
*/
|
||||
|
|
|
@ -180,6 +180,114 @@ vbo_save_playback_vertex_list_loopback(struct gl_context *ctx, void *data)
|
|||
loopback_vertex_list(ctx, node);
|
||||
}
|
||||
|
||||
enum vbo_save_status {
|
||||
DONE,
|
||||
USE_SLOW_PATH,
|
||||
};
|
||||
|
||||
static enum vbo_save_status
|
||||
vbo_save_playback_vertex_list_gallium(struct gl_context *ctx,
|
||||
const struct vbo_save_vertex_list *node,
|
||||
bool copy_to_current)
|
||||
{
|
||||
/* Don't use this if selection or feedback mode is enabled. st/mesa can't
|
||||
* handle it.
|
||||
*/
|
||||
if (!ctx->Driver.DrawGalliumVertexState || ctx->RenderMode != GL_RENDER)
|
||||
return USE_SLOW_PATH;
|
||||
|
||||
const gl_vertex_processing_mode mode = ctx->VertexProgram._VPMode;
|
||||
|
||||
/* This sets which vertex arrays are enabled, which determines
|
||||
* which attribs have stride = 0 and whether edge flags are enabled.
|
||||
*/
|
||||
const GLbitfield enabled = node->merged.gallium.enabled_attribs[mode];
|
||||
ctx->Array._DrawVAOEnabledAttribs = enabled;
|
||||
_mesa_set_varying_vp_inputs(ctx, enabled);
|
||||
|
||||
if (ctx->NewState)
|
||||
_mesa_update_state(ctx);
|
||||
|
||||
/* Use the slow path when there are vertex inputs without vertex
|
||||
* elements. This happens with zero-stride attribs and non-fixed-func
|
||||
* shaders.
|
||||
*
|
||||
* Dual-slot inputs are also unsupported because the higher slot is
|
||||
* always missing in vertex elements.
|
||||
*
|
||||
* TODO: Add support for zero-stride attribs.
|
||||
*/
|
||||
struct gl_program *vp = ctx->VertexProgram._Current;
|
||||
|
||||
if (vp->info.inputs_read & ~enabled || vp->DualSlotInputs)
|
||||
return USE_SLOW_PATH;
|
||||
|
||||
struct pipe_vertex_state *state = node->merged.gallium.state[mode];
|
||||
struct pipe_draw_vertex_state_info info = node->merged.gallium.info;
|
||||
|
||||
/* Return precomputed GL errors such as invalid shaders. */
|
||||
if (!ctx->ValidPrimMask) {
|
||||
_mesa_error(ctx, ctx->DrawGLError, "glCallList");
|
||||
return DONE;
|
||||
}
|
||||
|
||||
if (node->merged.gallium.ctx == ctx) {
|
||||
/* This mechanism allows passing references to the driver without
|
||||
* using atomics to increase the reference count.
|
||||
*
|
||||
* This private refcount can be decremented without atomics but only
|
||||
* one context (ctx above) can use this counter (so that it's only
|
||||
* used by 1 thread).
|
||||
*
|
||||
* This number is atomically added to reference.count at
|
||||
* initialization. If it's never used, the same number is atomically
|
||||
* subtracted from reference.count before destruction. If this number
|
||||
* is decremented, we can pass one reference to the driver without
|
||||
* touching reference.count with atomics. At destruction we only
|
||||
* subtract the number of references we have not returned. This can
|
||||
* possibly turn a million atomic increments into 1 add and 1 subtract
|
||||
* atomic op over the whole lifetime of an app.
|
||||
*/
|
||||
int * const private_refcount = (int*)&node->merged.gallium.private_refcount[mode];
|
||||
assert(*private_refcount >= 0);
|
||||
|
||||
if (unlikely(*private_refcount == 0)) {
|
||||
/* pipe_vertex_state can be reused through util_vertex_state_cache,
|
||||
* and there can be many display lists over-incrementing this number,
|
||||
* causing it to overflow.
|
||||
*
|
||||
* Guess that the same state can never be used by N=500000 display
|
||||
* lists, so one display list can only increment it by
|
||||
* INT_MAX / N.
|
||||
*/
|
||||
const int add_refs = INT_MAX / 500000;
|
||||
p_atomic_add(&state->reference.count, add_refs);
|
||||
*private_refcount = add_refs;
|
||||
}
|
||||
|
||||
(*private_refcount)--;
|
||||
info.take_vertex_state_ownership = true;
|
||||
}
|
||||
|
||||
/* Fast path using a pre-built gallium vertex buffer state. */
|
||||
if (node->merged.mode || node->merged.num_draws > 1) {
|
||||
ctx->Driver.DrawGalliumVertexState(ctx, state, info,
|
||||
node->merged.start_counts,
|
||||
node->merged.mode,
|
||||
node->merged.num_draws,
|
||||
enabled & VERT_ATTRIB_EDGEFLAG);
|
||||
} else if (node->merged.num_draws) {
|
||||
ctx->Driver.DrawGalliumVertexState(ctx, state, info,
|
||||
&node->merged.start_count,
|
||||
NULL, 1,
|
||||
enabled & VERT_ATTRIB_EDGEFLAG);
|
||||
}
|
||||
|
||||
if (copy_to_current)
|
||||
playback_copy_to_current(ctx, node);
|
||||
return DONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the buffer and save copied verts.
|
||||
* This is called from the display list code when executing
|
||||
|
@ -202,6 +310,9 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data, bool copy_to_c
|
|||
return;
|
||||
}
|
||||
|
||||
if (vbo_save_playback_vertex_list_gallium(ctx, node, copy_to_current) == DONE)
|
||||
return;
|
||||
|
||||
bind_vertex_list(ctx, node);
|
||||
|
||||
/* Need that at least one time. */
|
||||
|
|
Loading…
Reference in New Issue