From 74a811382ff0d414673a32dd8b90b65737357386 Mon Sep 17 00:00:00 2001 From: Jesse Natalie Date: Thu, 21 Jul 2022 10:15:25 -0700 Subject: [PATCH] d3d12: Add a transition flag indicating that state accumulation is needed Most call sites for transitions will only apply transitions to one or two resources, and don't need to use the bo set, where each call is guaranteed to insert the bo, only to walk the set immediately afterwards. Instead, they can just append the barriers to the dynarray directly and skip the bo set. Draws and dispatches still use the append approach, to accumulate the full set of state needed for each subresource for the case where a single [sub]resource is bound to the pipeline in multiple places. Reviewed-by: Bill Kristiansen Part-of: --- src/gallium/drivers/d3d12/d3d12_context.h | 1 + src/gallium/drivers/d3d12/d3d12_draw.cpp | 28 +++++++++---------- .../drivers/d3d12/d3d12_resource_state.cpp | 26 +++++++++++++---- 3 files changed, 36 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/d3d12/d3d12_context.h b/src/gallium/drivers/d3d12/d3d12_context.h index 825d08e4e03..2df17a846c1 100644 --- a/src/gallium/drivers/d3d12/d3d12_context.h +++ b/src/gallium/drivers/d3d12/d3d12_context.h @@ -315,6 +315,7 @@ d3d12_flush_cmdlist_and_wait(struct d3d12_context *ctx); enum d3d12_transition_flags { D3D12_TRANSITION_FLAG_NONE = 0, D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS = 1, + D3D12_TRANSITION_FLAG_ACCUMULATE_STATE = 2, }; void diff --git a/src/gallium/drivers/d3d12/d3d12_draw.cpp b/src/gallium/drivers/d3d12/d3d12_draw.cpp index b7fbeb9c050..1f030c23d6e 100644 --- a/src/gallium/drivers/d3d12/d3d12_draw.cpp +++ b/src/gallium/drivers/d3d12/d3d12_draw.cpp @@ -73,7 +73,7 @@ fill_cbv_descriptors(struct d3d12_context *ctx, D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = {}; if (buffer && buffer->buffer) { struct d3d12_resource *res = d3d12_resource(buffer->buffer); - d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_TRANSITION_FLAG_NONE); + d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); cbv_desc.BufferLocation = d3d12_resource_gpu_virtual_address(res) + buffer->buffer_offset; cbv_desc.SizeInBytes = MIN2(D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16, align(buffer->buffer_size, 256)); @@ -128,7 +128,7 @@ fill_srv_descriptors(struct d3d12_context *ctx, if (view->base.texture->target == PIPE_BUFFER) { d3d12_transition_resource_state(ctx, d3d12_resource(view->base.texture), state, - D3D12_TRANSITION_FLAG_NONE); + D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); } else { d3d12_transition_subresources_state(ctx, d3d12_resource(view->base.texture), view->base.u.tex.first_level, view->mip_levels, @@ -136,7 +136,7 @@ fill_srv_descriptors(struct d3d12_context *ctx, d3d12_get_format_start_plane(view->base.format), d3d12_get_format_num_planes(view->base.format), state, - D3D12_TRANSITION_FLAG_NONE); + D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); } } else { descs[desc_idx] = screen->null_srvs[shader->srv_bindings[i].dimension].cpu_handle; @@ -173,7 +173,7 @@ fill_ssbo_descriptors(struct d3d12_context *ctx, struct d3d12_resource *res = d3d12_resource(view->buffer); uint64_t res_offset = 0; d3d12_res = d3d12_resource_underlying(res, &res_offset); - d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_NONE); + d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); uav_desc.Buffer.FirstElement = (view->buffer_offset + res_offset) / 4; uav_desc.Buffer.NumElements = DIV_ROUND_UP(view->buffer_size, 4); d3d12_batch_reference_resource(batch, res, true); @@ -318,7 +318,7 @@ fill_image_descriptors(struct d3d12_context *ctx, if (!batch->pending_memory_barrier) { if (res->base.b.target == PIPE_BUFFER) { - d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_NONE); + d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); } else { unsigned transition_first_layer = view->u.tex.first_layer; unsigned transition_array_size = array_size; @@ -331,7 +331,7 @@ fill_image_descriptors(struct d3d12_context *ctx, transition_first_layer, transition_array_size, 0, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, - D3D12_TRANSITION_FLAG_NONE); + D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); } } d3d12_batch_reference_resource(batch, res, true); @@ -705,7 +705,7 @@ transition_surface_subresources_state(struct d3d12_context *ctx, d3d12_get_format_start_plane(psurf->format), d3d12_get_format_num_planes(psurf->format), state, - D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS); + D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); } static bool @@ -1106,7 +1106,7 @@ d3d12_draw_vbo(struct pipe_context *pctx, for (unsigned i = 0; i < ctx->num_vbs; ++i) { if (ctx->vbs[i].buffer.resource) { struct d3d12_resource *res = d3d12_resource(ctx->vbs[i].buffer.resource); - d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_TRANSITION_FLAG_NONE); + d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); if (ctx->cmdlist_dirty & D3D12_DIRTY_VERTEX_BUFFERS) d3d12_batch_reference_resource(batch, res, false); } @@ -1120,7 +1120,7 @@ d3d12_draw_vbo(struct pipe_context *pctx, ibv.BufferLocation = d3d12_resource_gpu_virtual_address(res) + index_offset; ibv.SizeInBytes = res->base.b.width0 - index_offset; ibv.Format = ib_format(dinfo->index_size); - d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_INDEX_BUFFER, D3D12_TRANSITION_FLAG_NONE); + d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_INDEX_BUFFER, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); if (ctx->cmdlist_dirty & D3D12_DIRTY_INDEX_BUFFER || memcmp(&ctx->ibv, &ibv, sizeof(D3D12_INDEX_BUFFER_VIEW)) != 0) { ctx->ibv = ibv; @@ -1170,8 +1170,8 @@ d3d12_draw_vbo(struct pipe_context *pctx, d3d12_batch_reference_resource(batch, fill_buffer, true); } - d3d12_transition_resource_state(ctx, so_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_TRANSITION_FLAG_NONE); - d3d12_transition_resource_state(ctx, fill_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_TRANSITION_FLAG_NONE); + d3d12_transition_resource_state(ctx, so_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); + d3d12_transition_resource_state(ctx, fill_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); } if (ctx->cmdlist_dirty & D3D12_DIRTY_STREAM_OUTPUT) ctx->cmdlist->SOSetTargets(0, 4, so_buffer_views); @@ -1202,7 +1202,7 @@ d3d12_draw_vbo(struct pipe_context *pctx, indirect_arg_buf = d3d12_resource_underlying(indirect_buf, &buf_offset); indirect_arg_offset = indirect->offset + buf_offset; d3d12_transition_resource_state(ctx, indirect_buf, - D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_NONE); + D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); d3d12_batch_reference_resource(batch, indirect_buf, false); } if (indirect->indirect_draw_count) { @@ -1211,7 +1211,7 @@ d3d12_draw_vbo(struct pipe_context *pctx, indirect_count_buf = d3d12_resource_underlying(count_buf, &count_offset); indirect_count_offset = indirect->indirect_draw_count_offset + count_offset; d3d12_transition_resource_state(ctx, count_buf, - D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_NONE); + D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); d3d12_batch_reference_resource(batch, count_buf, false); } assert(!indirect->count_from_stream_output); @@ -1366,7 +1366,7 @@ d3d12_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) indirect_arg_buf = d3d12_resource_underlying(indirect_buf, &buf_offset); indirect_arg_offset = indirect_offset + buf_offset; d3d12_transition_resource_state(ctx, indirect_buf, - D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_NONE); + D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE); d3d12_batch_reference_resource(batch, indirect_buf, false); } diff --git a/src/gallium/drivers/d3d12/d3d12_resource_state.cpp b/src/gallium/drivers/d3d12/d3d12_resource_state.cpp index c70df8a75d3..17b22422ac5 100644 --- a/src/gallium/drivers/d3d12/d3d12_resource_state.cpp +++ b/src/gallium/drivers/d3d12/d3d12_resource_state.cpp @@ -454,8 +454,16 @@ d3d12_transition_resource_state(struct d3d12_context *ctx, d3d12_invalidate_context_bindings(ctx, res); d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, res->bo); - d3d12_set_desired_resource_state(&state_entry->desired, state); - _mesa_set_add(ctx->pending_barriers_bos, res->bo); + if (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) { + d3d12_set_desired_resource_state(&state_entry->desired, state); + _mesa_set_add(ctx->pending_barriers_bos, res->bo); + } else if (state_entry->batch_end.homogenous) { + append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false); + } else { + for (unsigned i = 0; i < state_entry->batch_end.num_subresources; ++i) { + append_barrier(ctx, res->bo, state_entry, state, i, false); + } + } } void @@ -472,8 +480,12 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx, d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, res->bo); bool is_whole_resource = num_levels * num_layers * num_planes == state_entry->batch_end.num_subresources; - if (is_whole_resource) { + bool is_accumulate = (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) != 0; + + if (is_whole_resource && is_accumulate) { d3d12_set_desired_resource_state(&state_entry->desired, state); + } else if (is_whole_resource && state_entry->batch_end.homogenous) { + append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false); } else { for (uint32_t l = 0; l < num_levels; l++) { const uint32_t level = start_level + l; @@ -484,13 +496,17 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx, uint32_t subres_id = level + (layer * res->mip_levels) + plane * (res->mip_levels * res->base.b.array_size); assert(subres_id < state_entry->desired.num_subresources); - d3d12_set_desired_subresource_state(&state_entry->desired, subres_id, state); + if (is_accumulate) + d3d12_set_desired_subresource_state(&state_entry->desired, subres_id, state); + else + append_barrier(ctx, res->bo, state_entry, state, subres_id, false); } } } } - _mesa_set_add(ctx->pending_barriers_bos, res->bo); + if (is_accumulate) + _mesa_set_add(ctx->pending_barriers_bos, res->bo); } void