d3d12: Add a transition flag indicating that state accumulation is needed

Most call sites for transitions will only apply transitions to one or two
resources, and don't need to use the bo set, where each call is guaranteed
to insert the bo, only to walk the set immediately afterwards. Instead, they
can just append the barriers to the dynarray directly and skip the bo set.

Draws and dispatches still use the append approach, to accumulate the full
set of state needed for each subresource for the case where a single
[sub]resource is bound to the pipeline in multiple places.

Reviewed-by: Bill Kristiansen <billkris@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17688>
This commit is contained in:
Jesse Natalie 2022-07-21 10:15:25 -07:00 committed by Marge Bot
parent fa570bda08
commit 74a811382f
3 changed files with 36 additions and 19 deletions

View File

@ -315,6 +315,7 @@ d3d12_flush_cmdlist_and_wait(struct d3d12_context *ctx);
enum d3d12_transition_flags {
D3D12_TRANSITION_FLAG_NONE = 0,
D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS = 1,
D3D12_TRANSITION_FLAG_ACCUMULATE_STATE = 2,
};
void

View File

@ -73,7 +73,7 @@ fill_cbv_descriptors(struct d3d12_context *ctx,
D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = {};
if (buffer && buffer->buffer) {
struct d3d12_resource *res = d3d12_resource(buffer->buffer);
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_TRANSITION_FLAG_NONE);
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
cbv_desc.BufferLocation = d3d12_resource_gpu_virtual_address(res) + buffer->buffer_offset;
cbv_desc.SizeInBytes = MIN2(D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16,
align(buffer->buffer_size, 256));
@ -128,7 +128,7 @@ fill_srv_descriptors(struct d3d12_context *ctx,
if (view->base.texture->target == PIPE_BUFFER) {
d3d12_transition_resource_state(ctx, d3d12_resource(view->base.texture),
state,
D3D12_TRANSITION_FLAG_NONE);
D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
} else {
d3d12_transition_subresources_state(ctx, d3d12_resource(view->base.texture),
view->base.u.tex.first_level, view->mip_levels,
@ -136,7 +136,7 @@ fill_srv_descriptors(struct d3d12_context *ctx,
d3d12_get_format_start_plane(view->base.format),
d3d12_get_format_num_planes(view->base.format),
state,
D3D12_TRANSITION_FLAG_NONE);
D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
}
} else {
descs[desc_idx] = screen->null_srvs[shader->srv_bindings[i].dimension].cpu_handle;
@ -173,7 +173,7 @@ fill_ssbo_descriptors(struct d3d12_context *ctx,
struct d3d12_resource *res = d3d12_resource(view->buffer);
uint64_t res_offset = 0;
d3d12_res = d3d12_resource_underlying(res, &res_offset);
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_NONE);
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
uav_desc.Buffer.FirstElement = (view->buffer_offset + res_offset) / 4;
uav_desc.Buffer.NumElements = DIV_ROUND_UP(view->buffer_size, 4);
d3d12_batch_reference_resource(batch, res, true);
@ -318,7 +318,7 @@ fill_image_descriptors(struct d3d12_context *ctx,
if (!batch->pending_memory_barrier) {
if (res->base.b.target == PIPE_BUFFER) {
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_NONE);
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
} else {
unsigned transition_first_layer = view->u.tex.first_layer;
unsigned transition_array_size = array_size;
@ -331,7 +331,7 @@ fill_image_descriptors(struct d3d12_context *ctx,
transition_first_layer, transition_array_size,
0, 1,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
D3D12_TRANSITION_FLAG_NONE);
D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
}
}
d3d12_batch_reference_resource(batch, res, true);
@ -705,7 +705,7 @@ transition_surface_subresources_state(struct d3d12_context *ctx,
d3d12_get_format_start_plane(psurf->format),
d3d12_get_format_num_planes(psurf->format),
state,
D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS);
D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
}
static bool
@ -1106,7 +1106,7 @@ d3d12_draw_vbo(struct pipe_context *pctx,
for (unsigned i = 0; i < ctx->num_vbs; ++i) {
if (ctx->vbs[i].buffer.resource) {
struct d3d12_resource *res = d3d12_resource(ctx->vbs[i].buffer.resource);
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_TRANSITION_FLAG_NONE);
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
if (ctx->cmdlist_dirty & D3D12_DIRTY_VERTEX_BUFFERS)
d3d12_batch_reference_resource(batch, res, false);
}
@ -1120,7 +1120,7 @@ d3d12_draw_vbo(struct pipe_context *pctx,
ibv.BufferLocation = d3d12_resource_gpu_virtual_address(res) + index_offset;
ibv.SizeInBytes = res->base.b.width0 - index_offset;
ibv.Format = ib_format(dinfo->index_size);
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_INDEX_BUFFER, D3D12_TRANSITION_FLAG_NONE);
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_INDEX_BUFFER, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
if (ctx->cmdlist_dirty & D3D12_DIRTY_INDEX_BUFFER ||
memcmp(&ctx->ibv, &ibv, sizeof(D3D12_INDEX_BUFFER_VIEW)) != 0) {
ctx->ibv = ibv;
@ -1170,8 +1170,8 @@ d3d12_draw_vbo(struct pipe_context *pctx,
d3d12_batch_reference_resource(batch, fill_buffer, true);
}
d3d12_transition_resource_state(ctx, so_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_TRANSITION_FLAG_NONE);
d3d12_transition_resource_state(ctx, fill_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_TRANSITION_FLAG_NONE);
d3d12_transition_resource_state(ctx, so_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
d3d12_transition_resource_state(ctx, fill_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
}
if (ctx->cmdlist_dirty & D3D12_DIRTY_STREAM_OUTPUT)
ctx->cmdlist->SOSetTargets(0, 4, so_buffer_views);
@ -1202,7 +1202,7 @@ d3d12_draw_vbo(struct pipe_context *pctx,
indirect_arg_buf = d3d12_resource_underlying(indirect_buf, &buf_offset);
indirect_arg_offset = indirect->offset + buf_offset;
d3d12_transition_resource_state(ctx, indirect_buf,
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_NONE);
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
d3d12_batch_reference_resource(batch, indirect_buf, false);
}
if (indirect->indirect_draw_count) {
@ -1211,7 +1211,7 @@ d3d12_draw_vbo(struct pipe_context *pctx,
indirect_count_buf = d3d12_resource_underlying(count_buf, &count_offset);
indirect_count_offset = indirect->indirect_draw_count_offset + count_offset;
d3d12_transition_resource_state(ctx, count_buf,
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_NONE);
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
d3d12_batch_reference_resource(batch, count_buf, false);
}
assert(!indirect->count_from_stream_output);
@ -1366,7 +1366,7 @@ d3d12_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
indirect_arg_buf = d3d12_resource_underlying(indirect_buf, &buf_offset);
indirect_arg_offset = indirect_offset + buf_offset;
d3d12_transition_resource_state(ctx, indirect_buf,
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_NONE);
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
d3d12_batch_reference_resource(batch, indirect_buf, false);
}

View File

@ -454,8 +454,16 @@ d3d12_transition_resource_state(struct d3d12_context *ctx,
d3d12_invalidate_context_bindings(ctx, res);
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, res->bo);
d3d12_set_desired_resource_state(&state_entry->desired, state);
_mesa_set_add(ctx->pending_barriers_bos, res->bo);
if (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) {
d3d12_set_desired_resource_state(&state_entry->desired, state);
_mesa_set_add(ctx->pending_barriers_bos, res->bo);
} else if (state_entry->batch_end.homogenous) {
append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false);
} else {
for (unsigned i = 0; i < state_entry->batch_end.num_subresources; ++i) {
append_barrier(ctx, res->bo, state_entry, state, i, false);
}
}
}
void
@ -472,8 +480,12 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx,
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, res->bo);
bool is_whole_resource = num_levels * num_layers * num_planes == state_entry->batch_end.num_subresources;
if (is_whole_resource) {
bool is_accumulate = (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) != 0;
if (is_whole_resource && is_accumulate) {
d3d12_set_desired_resource_state(&state_entry->desired, state);
} else if (is_whole_resource && state_entry->batch_end.homogenous) {
append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false);
} else {
for (uint32_t l = 0; l < num_levels; l++) {
const uint32_t level = start_level + l;
@ -484,13 +496,17 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx,
uint32_t subres_id =
level + (layer * res->mip_levels) + plane * (res->mip_levels * res->base.b.array_size);
assert(subres_id < state_entry->desired.num_subresources);
d3d12_set_desired_subresource_state(&state_entry->desired, subres_id, state);
if (is_accumulate)
d3d12_set_desired_subresource_state(&state_entry->desired, subres_id, state);
else
append_barrier(ctx, res->bo, state_entry, state, subres_id, false);
}
}
}
}
_mesa_set_add(ctx->pending_barriers_bos, res->bo);
if (is_accumulate)
_mesa_set_add(ctx->pending_barriers_bos, res->bo);
}
void