mesa: use pipe_vertex_state in vbo and st/mesa for lower display list overhead

Acked-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13050>
This commit is contained in:
Marek Olšák 2021-08-11 23:32:38 -04:00 committed by Marge Bot
parent f32cefdb87
commit e78d7fe7d5
12 changed files with 287 additions and 18 deletions

View File

@ -60,12 +60,17 @@ struct gl_texture_image;
struct gl_texture_object;
struct gl_memory_info;
struct gl_transform_feedback_object;
struct gl_vertex_array_object;
struct ati_fragment_shader;
struct util_queue_monitoring;
struct _mesa_prim;
struct _mesa_index_buffer;
struct pipe_draw_info;
struct pipe_draw_start_count_bias;
struct pipe_vertex_state;
struct pipe_draw_vertex_state_info;
struct pipe_vertex_buffer;
struct pipe_vertex_element;
/* GL_ARB_vertex_buffer_object */
/* Modifies GL_MAP_UNSYNCHRONIZED_BIT to allow driver to fail (return
@ -641,8 +646,21 @@ struct dd_function_table {
void (*DrawTransformFeedback)(struct gl_context *ctx, GLenum mode,
unsigned num_instances, unsigned stream,
struct gl_transform_feedback_object *tfb_vertcount);
void (*DrawGalliumVertexState)(struct gl_context *ctx,
struct pipe_vertex_state *state,
struct pipe_draw_vertex_state_info info,
const struct pipe_draw_start_count_bias *draws,
const uint8_t *mode,
unsigned num_draws,
bool per_vertex_edgeflags);
/*@}*/
struct pipe_vertex_state *
(*CreateGalliumVertexState)(struct gl_context *ctx,
const struct gl_vertex_array_object *vao,
struct gl_buffer_object *indexbuf,
uint32_t enabled_attribs);
/**
* \name State-changing functions.

View File

@ -73,7 +73,7 @@
#include "vbo/vbo_util.h"
#include "vbo/vbo_save.h"
#include "util/format_r11g11b10f.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#define USE_BITMAP_ATLAS 1
@ -797,8 +797,15 @@ void mesa_print_display_list(GLuint list);
static void
vbo_destroy_vertex_list(struct gl_context *ctx, struct vbo_save_vertex_list *node)
{
for (gl_vertex_processing_mode vpm = VP_MODE_FF; vpm < VP_MODE_MAX; ++vpm)
_mesa_reference_vao(ctx, &node->VAO[vpm], NULL);
for (gl_vertex_processing_mode mode = VP_MODE_FF; mode < VP_MODE_MAX; ++mode) {
_mesa_reference_vao(ctx, &node->VAO[mode], NULL);
if (node->merged.gallium.private_refcount[mode]) {
assert(node->merged.gallium.private_refcount[mode] > 0);
p_atomic_add(&node->merged.gallium.state[mode]->reference.count,
-node->merged.gallium.private_refcount[mode]);
}
pipe_vertex_state_reference(&node->merged.gallium.state[mode], NULL);
}
if (node->merged.mode) {
free(node->merged.mode);

View File

@ -140,31 +140,33 @@ static void check_program_state( struct st_context *st )
st->dirty |= dirty;
}
static void check_attrib_edgeflag(struct st_context *st)
void st_update_edgeflags(struct st_context *st, bool per_vertex_edgeflags)
{
GLboolean vertdata_edgeflags, edgeflag_culls_prims, edgeflags_enabled;
struct gl_program *vp = st->ctx->VertexProgram._Current;
edgeflags_enabled = st->ctx->Polygon.FrontMode != GL_FILL ||
st->ctx->Polygon.BackMode != GL_FILL;
vertdata_edgeflags = edgeflags_enabled &&
_mesa_draw_edge_flag_array_enabled(st->ctx);
bool edgeflags_enabled = st->ctx->Polygon.FrontMode != GL_FILL ||
st->ctx->Polygon.BackMode != GL_FILL;
bool vertdata_edgeflags = edgeflags_enabled && per_vertex_edgeflags;
if (vertdata_edgeflags != st->vertdata_edgeflags) {
st->vertdata_edgeflags = vertdata_edgeflags;
struct gl_program *vp = st->ctx->VertexProgram._Current;
if (vp)
st->dirty |= ST_NEW_VERTEX_PROGRAM(st, st_program(vp));
}
edgeflag_culls_prims = edgeflags_enabled && !vertdata_edgeflags &&
!st->ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0];
bool edgeflag_culls_prims = edgeflags_enabled && !vertdata_edgeflags &&
!st->ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0];
if (edgeflag_culls_prims != st->edgeflag_culls_prims) {
st->edgeflag_culls_prims = edgeflag_culls_prims;
st->dirty |= ST_NEW_RASTERIZER;
}
}
static void check_attrib_edgeflag(struct st_context *st)
{
st_update_edgeflags(st, _mesa_draw_edge_flag_array_enabled(st->ctx));
}
/***********************************************************************
* Update all derived state:

View File

@ -58,6 +58,7 @@ enum st_pipeline {
void st_init_atoms( struct st_context *st );
void st_destroy_atoms( struct st_context *st );
void st_validate_state( struct st_context *st, enum st_pipeline pipeline );
void st_update_edgeflags(struct st_context *st, bool per_vertex_edgeflags);
void
st_setup_arrays(struct st_context *st,
@ -74,6 +75,12 @@ st_setup_current_user(struct st_context *st,
struct cso_velems_state *velements,
struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers);
struct pipe_vertex_state *
st_create_gallium_vertex_state(struct gl_context *ctx,
const struct gl_vertex_array_object *vao,
struct gl_buffer_object *indexbuf,
uint32_t enabled_attribs);
/* Define ST_NEW_xxx_INDEX */
enum {
#define ST_STATE(FLAG, st_update) FLAG##_INDEX,

View File

@ -322,3 +322,40 @@ st_update_array(struct st_context *st)
vbuffer);
st->last_num_vbuffers = num_vbuffers;
}
struct pipe_vertex_state *
st_create_gallium_vertex_state(struct gl_context *ctx,
const struct gl_vertex_array_object *vao,
struct gl_buffer_object *indexbuf,
uint32_t enabled_attribs)
{
struct st_context *st = st_context(ctx);
const GLbitfield inputs_read = enabled_attribs;
const GLbitfield dual_slot_inputs = 0; /* always zero */
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers = 0;
struct cso_velems_state velements;
bool uses_user_vertex_buffers;
setup_arrays(st, vao, dual_slot_inputs, inputs_read, 0, inputs_read, 0,
&velements, vbuffer, &num_vbuffers, &uses_user_vertex_buffers);
if (num_vbuffers != 1 || uses_user_vertex_buffers) {
assert(!"this should never happen with display lists");
return NULL;
}
velements.count = util_bitcount(inputs_read);
struct pipe_screen *screen = st->screen;
struct pipe_vertex_state *state =
screen->create_vertex_state(screen, &vbuffer[0], velements.velems,
velements.count,
indexbuf ?
st_buffer_object(indexbuf)->buffer : NULL,
enabled_attribs);
for (unsigned i = 0; i < num_vbuffers; i++)
pipe_vertex_buffer_unreference(&vbuffer[i]);
return state;
}

View File

@ -285,7 +285,7 @@ st_RenderMode(struct gl_context *ctx, GLenum newMode )
if (newMode == GL_RENDER) {
/* restore normal VBO draw function */
st_init_draw_functions(&ctx->Driver);
st_init_draw_functions(st->screen, &ctx->Driver);
}
else if (newMode == GL_SELECT) {
if (!st->selection_stage)

View File

@ -960,7 +960,7 @@ st_init_driver_functions(struct pipe_screen *screen,
{
_mesa_init_sampler_object_functions(functions);
st_init_draw_functions(functions);
st_init_draw_functions(screen, functions);
st_init_blit_functions(functions);
st_init_bufferobject_functions(screen, functions);
st_init_clear_functions(functions);

View File

@ -309,14 +309,73 @@ st_draw_transform_feedback(struct gl_context *ctx, GLenum mode,
cso_draw_vbo(st->cso_context, &info, 0, &indirect, draw);
}
static void
st_draw_gallium_vertex_state(struct gl_context *ctx,
struct pipe_vertex_state *state,
struct pipe_draw_vertex_state_info info,
const struct pipe_draw_start_count_bias *draws,
const uint8_t *mode,
unsigned num_draws,
bool per_vertex_edgeflags)
{
struct st_context *st = st_context(ctx);
bool old_vertdata_edgeflags = st->vertdata_edgeflags;
/* We don't flag any other states to make st_validate state update edge
* flags, so we need to update them here.
*/
st_update_edgeflags(st, per_vertex_edgeflags);
prepare_draw(st, ctx, ST_PIPELINE_RENDER_STATE_MASK_NO_VARRAYS,
ST_PIPELINE_RENDER_NO_VARRAYS);
struct pipe_context *pipe = st->pipe;
uint32_t velem_mask = ctx->VertexProgram._Current->info.inputs_read;
if (!mode) {
pipe->draw_vertex_state(pipe, state, velem_mask, info, draws, num_draws);
} else {
/* Find consecutive draws where mode doesn't vary. */
for (unsigned i = 0, first = 0; i <= num_draws; i++) {
if (i == num_draws || mode[i] != mode[first]) {
unsigned current_num_draws = i - first;
/* Increase refcount to be able to use take_vertex_state_ownership
* with all draws.
*/
if (i != num_draws && info.take_vertex_state_ownership)
p_atomic_inc(&state->reference.count);
info.mode = mode[first];
pipe->draw_vertex_state(pipe, state, velem_mask, info, &draws[first],
current_num_draws);
first = i;
}
}
}
/* If per-vertex edge flags are different than the non-display-list state,
* just flag ST_NEW_VERTEX_ARRAY, which will also completely revalidate
* edge flags in st_validate_state.
*/
if (st->vertdata_edgeflags != old_vertdata_edgeflags)
st->dirty |= ST_NEW_VERTEX_ARRAYS;
}
void
st_init_draw_functions(struct dd_function_table *functions)
st_init_draw_functions(struct pipe_screen *screen,
struct dd_function_table *functions)
{
functions->Draw = NULL;
functions->DrawGallium = st_draw_gallium;
functions->DrawGalliumMultiMode = st_draw_gallium_multimode;
functions->DrawIndirect = st_indirect_draw_vbo;
functions->DrawTransformFeedback = st_draw_transform_feedback;
if (screen->get_param(screen, PIPE_CAP_DRAW_VERTEX_STATE)) {
functions->DrawGalliumVertexState = st_draw_gallium_vertex_state;
functions->CreateGalliumVertexState = st_create_gallium_vertex_state;
}
}

View File

@ -41,7 +41,8 @@ struct _mesa_prim;
struct gl_context;
struct st_context;
void st_init_draw_functions(struct dd_function_table *functions);
void st_init_draw_functions(struct pipe_screen *screen,
struct dd_function_table *functions);
void st_destroy_draw( struct st_context *st );

View File

@ -64,6 +64,14 @@ struct vbo_save_vertex_list {
struct pipe_draw_start_count_bias start_count;
};
unsigned num_draws;
struct {
struct gl_context *ctx;
struct pipe_vertex_state *state[VP_MODE_MAX];
int private_refcount[VP_MODE_MAX];
GLbitfield enabled_attribs[VP_MODE_MAX];
struct pipe_draw_vertex_state_info info;
} gallium;
} merged;
/* Cold: used during construction or to handle egde-cases */

View File

@ -900,6 +900,25 @@ end:
_mesa_reference_vao(ctx, &node->VAO[vpm], save->VAO[vpm]);
}
/* Prepare for DrawGalliumVertexState */
if (node->merged.num_draws && ctx->Driver.DrawGalliumVertexState) {
for (unsigned i = 0; i < VP_MODE_MAX; i++) {
uint32_t enabled_attribs = _vbo_get_vao_filter(i) &
node->VAO[i]->_EnabledWithMapMode;
node->merged.gallium.state[i] =
ctx->Driver.CreateGalliumVertexState(ctx, node->VAO[i],
node->cold->ib.obj,
enabled_attribs);
node->merged.gallium.private_refcount[i] = 0;
node->merged.gallium.enabled_attribs[i] = enabled_attribs;
}
node->merged.gallium.ctx = ctx;
node->merged.gallium.info.mode = node->merged.info.mode;
node->merged.gallium.info.take_vertex_state_ownership = false;
assert(node->merged.info.index_size == 4);
}
/* Deal with GL_COMPILE_AND_EXECUTE:
*/

View File

@ -180,6 +180,114 @@ vbo_save_playback_vertex_list_loopback(struct gl_context *ctx, void *data)
loopback_vertex_list(ctx, node);
}
enum vbo_save_status {
DONE,
USE_SLOW_PATH,
};
static enum vbo_save_status
vbo_save_playback_vertex_list_gallium(struct gl_context *ctx,
const struct vbo_save_vertex_list *node,
bool copy_to_current)
{
/* Don't use this if selection or feedback mode is enabled. st/mesa can't
* handle it.
*/
if (!ctx->Driver.DrawGalliumVertexState || ctx->RenderMode != GL_RENDER)
return USE_SLOW_PATH;
const gl_vertex_processing_mode mode = ctx->VertexProgram._VPMode;
/* This sets which vertex arrays are enabled, which determines
* which attribs have stride = 0 and whether edge flags are enabled.
*/
const GLbitfield enabled = node->merged.gallium.enabled_attribs[mode];
ctx->Array._DrawVAOEnabledAttribs = enabled;
_mesa_set_varying_vp_inputs(ctx, enabled);
if (ctx->NewState)
_mesa_update_state(ctx);
/* Use the slow path when there are vertex inputs without vertex
* elements. This happens with zero-stride attribs and non-fixed-func
* shaders.
*
* Dual-slot inputs are also unsupported because the higher slot is
* always missing in vertex elements.
*
* TODO: Add support for zero-stride attribs.
*/
struct gl_program *vp = ctx->VertexProgram._Current;
if (vp->info.inputs_read & ~enabled || vp->DualSlotInputs)
return USE_SLOW_PATH;
struct pipe_vertex_state *state = node->merged.gallium.state[mode];
struct pipe_draw_vertex_state_info info = node->merged.gallium.info;
/* Return precomputed GL errors such as invalid shaders. */
if (!ctx->ValidPrimMask) {
_mesa_error(ctx, ctx->DrawGLError, "glCallList");
return DONE;
}
if (node->merged.gallium.ctx == ctx) {
/* This mechanism allows passing references to the driver without
* using atomics to increase the reference count.
*
* This private refcount can be decremented without atomics but only
* one context (ctx above) can use this counter (so that it's only
* used by 1 thread).
*
* This number is atomically added to reference.count at
* initialization. If it's never used, the same number is atomically
* subtracted from reference.count before destruction. If this number
* is decremented, we can pass one reference to the driver without
* touching reference.count with atomics. At destruction we only
* subtract the number of references we have not returned. This can
* possibly turn a million atomic increments into 1 add and 1 subtract
* atomic op over the whole lifetime of an app.
*/
int * const private_refcount = (int*)&node->merged.gallium.private_refcount[mode];
assert(*private_refcount >= 0);
if (unlikely(*private_refcount == 0)) {
/* pipe_vertex_state can be reused through util_vertex_state_cache,
* and there can be many display lists over-incrementing this number,
* causing it to overflow.
*
* Guess that the same state can never be used by N=500000 display
* lists, so one display list can only increment it by
* INT_MAX / N.
*/
const int add_refs = INT_MAX / 500000;
p_atomic_add(&state->reference.count, add_refs);
*private_refcount = add_refs;
}
(*private_refcount)--;
info.take_vertex_state_ownership = true;
}
/* Fast path using a pre-built gallium vertex buffer state. */
if (node->merged.mode || node->merged.num_draws > 1) {
ctx->Driver.DrawGalliumVertexState(ctx, state, info,
node->merged.start_counts,
node->merged.mode,
node->merged.num_draws,
enabled & VERT_ATTRIB_EDGEFLAG);
} else if (node->merged.num_draws) {
ctx->Driver.DrawGalliumVertexState(ctx, state, info,
&node->merged.start_count,
NULL, 1,
enabled & VERT_ATTRIB_EDGEFLAG);
}
if (copy_to_current)
playback_copy_to_current(ctx, node);
return DONE;
}
/**
* Execute the buffer and save copied verts.
* This is called from the display list code when executing
@ -202,6 +310,9 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data, bool copy_to_c
return;
}
if (vbo_save_playback_vertex_list_gallium(ctx, node, copy_to_current) == DONE)
return;
bind_vertex_list(ctx, node);
/* Need that at least one time. */