diff --git a/meson.build b/meson.build index a887f5c235b..d8ac6e87e06 100644 --- a/meson.build +++ b/meson.build @@ -235,6 +235,7 @@ with_gallium_virgl = gallium_drivers.contains('virgl') with_gallium_swr = gallium_drivers.contains('swr') with_gallium_lima = gallium_drivers.contains('lima') with_gallium_zink = gallium_drivers.contains('zink') +with_gallium_d3d12 = gallium_drivers.contains('d3d12') with_gallium = gallium_drivers.length() != 0 diff --git a/meson_options.txt b/meson_options.txt index 04a0c2bcb95..7db6907857a 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -68,7 +68,7 @@ option( choices : [ 'auto', 'kmsro', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno', 'swrast', 'v3d', 'vc4', 'etnaviv', 'tegra', 'i915', 'svga', 'virgl', - 'swr', 'panfrost', 'iris', 'lima', 'zink' + 'swr', 'panfrost', 'iris', 'lima', 'zink', 'd3d12' ], description : 'List of gallium drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built' ) diff --git a/src/gallium/drivers/d3d12/d3d12_batch.cpp b/src/gallium/drivers/d3d12/d3d12_batch.cpp new file mode 100644 index 00000000000..a1b5aa0ff4c --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_batch.cpp @@ -0,0 +1,256 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_batch.h" +#include "d3d12_context.h" +#include "d3d12_fence.h" +#include "d3d12_query.h" +#include "d3d12_resource.h" +#include "d3d12_screen.h" +#include "d3d12_surface.h" + +#include "util/hash_table.h" +#include "util/set.h" +#include "util/u_inlines.h" + +bool +d3d12_init_batch(struct d3d12_context *ctx, struct d3d12_batch *batch) +{ + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + + batch->bos = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + batch->sampler_views = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + batch->surfaces = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + batch->objects = _mesa_set_create(NULL, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + if (!batch->bos || !batch->sampler_views || !batch->surfaces || !batch->objects) + return false; + + util_dynarray_init(&batch->zombie_samplers, NULL); + + if (FAILED(screen->dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, + __uuidof(batch->cmdalloc), + (void **)&batch->cmdalloc))) + return false; + + + batch->sampler_heap = + d3d12_descriptor_heap_new(screen->dev, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, + 128); + + batch->view_heap = + d3d12_descriptor_heap_new(screen->dev, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, + 1024); + + if (!batch->sampler_heap && !batch->view_heap) + return false; + + return true; +} + +static void +delete_bo(set_entry *entry) +{ + struct d3d12_bo *bo = (struct d3d12_bo *)entry->key; + d3d12_bo_unreference(bo); +} + +static void +delete_sampler_view(set_entry *entry) +{ + struct pipe_sampler_view *pres = (struct pipe_sampler_view *)entry->key; + pipe_sampler_view_reference(&pres, NULL); +} + +static void +delete_surface(set_entry *entry) +{ + struct pipe_surface *surf = (struct pipe_surface *)entry->key; + pipe_surface_reference(&surf, NULL); +} + +static void +delete_object(set_entry *entry) +{ + ID3D12Object *object = (ID3D12Object *)entry->key; + object->Release(); +} + +bool +d3d12_reset_batch(struct d3d12_context *ctx, struct d3d12_batch *batch, uint64_t timeout_ns) +{ + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + + // batch hasn't been submitted before + if (!batch->fence && !batch->has_errors) + return true; + + if (batch->fence) { + if (!d3d12_fence_finish(batch->fence, timeout_ns)) + return false; + d3d12_fence_reference(&batch->fence, NULL); + } + + _mesa_set_clear(batch->bos, delete_bo); + _mesa_set_clear(batch->sampler_views, delete_sampler_view); + _mesa_set_clear(batch->surfaces, delete_surface); + _mesa_set_clear(batch->objects, delete_object); + + util_dynarray_foreach(&batch->zombie_samplers, d3d12_descriptor_handle, handle) + d3d12_descriptor_handle_free(handle); + util_dynarray_clear(&batch->zombie_samplers); + + d3d12_descriptor_heap_clear(batch->view_heap); + d3d12_descriptor_heap_clear(batch->sampler_heap); + + if (FAILED(batch->cmdalloc->Reset())) { + debug_printf("D3D12: resetting ID3D12CommandAllocator failed\n"); + return false; + } + batch->has_errors = false; + return true; +} + +void +d3d12_destroy_batch(struct d3d12_context *ctx, struct d3d12_batch *batch) +{ + d3d12_reset_batch(ctx, batch, PIPE_TIMEOUT_INFINITE); + batch->cmdalloc->Release(); + d3d12_descriptor_heap_free(batch->sampler_heap); + d3d12_descriptor_heap_free(batch->view_heap); + _mesa_set_destroy(batch->bos, NULL); + _mesa_set_destroy(batch->sampler_views, NULL); + _mesa_set_destroy(batch->surfaces, NULL); + _mesa_set_destroy(batch->objects, NULL); + util_dynarray_fini(&batch->zombie_samplers); +} + +void +d3d12_start_batch(struct d3d12_context *ctx, struct d3d12_batch *batch) +{ + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + ID3D12DescriptorHeap* heaps[2] = { d3d12_descriptor_heap_get(batch->view_heap), + d3d12_descriptor_heap_get(batch->sampler_heap) }; + + d3d12_reset_batch(ctx, batch, PIPE_TIMEOUT_INFINITE); + + /* Create or reset global command list */ + if (ctx->cmdlist) { + if (FAILED(ctx->cmdlist->Reset(batch->cmdalloc, NULL))) { + debug_printf("D3D12: resetting ID3D12GraphicsCommandList failed\n"); + batch->has_errors = true; + return; + } + } else { + if (FAILED(screen->dev->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, + batch->cmdalloc, NULL, + __uuidof(ctx->cmdlist), + (void **)&ctx->cmdlist))) { + debug_printf("D3D12: creating ID3D12GraphicsCommandList failed\n"); + batch->has_errors = true; + return; + } + } + + ctx->cmdlist->SetDescriptorHeaps(2, heaps); + ctx->cmdlist_dirty = ~0; + for (int i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) + ctx->shader_dirty[i] = ~0; + + if (!ctx->queries_disabled) + d3d12_resume_queries(ctx); +} + +void +d3d12_end_batch(struct d3d12_context *ctx, struct d3d12_batch *batch) +{ + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + + if (!ctx->queries_disabled) + d3d12_suspend_queries(ctx); + + if (FAILED(ctx->cmdlist->Close())) { + debug_printf("D3D12: closing ID3D12GraphicsCommandList failed\n"); + batch->has_errors = true; + return; + } + + ID3D12CommandList* cmdlists[] = { ctx->cmdlist }; + screen->cmdqueue->ExecuteCommandLists(1, cmdlists); + batch->fence = d3d12_create_fence(screen, ctx); +} + +bool +d3d12_batch_has_references(struct d3d12_batch *batch, + struct d3d12_bo *bo) +{ + return (_mesa_set_search(batch->bos, bo) != NULL); +} + +void +d3d12_batch_reference_resource(struct d3d12_batch *batch, + struct d3d12_resource *res) +{ + if (!d3d12_batch_has_references(batch, res->bo)) { + _mesa_set_add(batch->bos, res->bo); + d3d12_bo_reference(res->bo); + } +} + +void +d3d12_batch_reference_sampler_view(struct d3d12_batch *batch, + struct d3d12_sampler_view *sv) +{ + struct set_entry *entry = _mesa_set_search(batch->sampler_views, sv); + if (!entry) { + entry = _mesa_set_add(batch->sampler_views, sv); + pipe_reference(NULL, &sv->base.reference); + } +} + +void +d3d12_batch_reference_surface_texture(struct d3d12_batch *batch, + struct d3d12_surface *surf) +{ + d3d12_batch_reference_resource(batch, d3d12_resource(surf->base.texture)); +} + +void +d3d12_batch_reference_object(struct d3d12_batch *batch, + ID3D12Object *object) +{ + struct set_entry *entry = _mesa_set_search(batch->objects, object); + if (!entry) { + entry = _mesa_set_add(batch->objects, object); + object->AddRef(); + } +} diff --git a/src/gallium/drivers/d3d12/d3d12_batch.h b/src/gallium/drivers/d3d12/d3d12_batch.h new file mode 100644 index 00000000000..0f90a4bd893 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_batch.h @@ -0,0 +1,88 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_BATCH_H +#define D3D12_BATCH_H + +#include "util/u_dynarray.h" +#include + +#define D3D12_IGNORE_SDK_LAYERS +#include + +struct d3d12_bo; +struct d3d12_descriptor_heap; +struct d3d12_fence; + +struct d3d12_batch { + struct d3d12_fence *fence; + + struct set *bos; + struct set *sampler_views; + struct set *surfaces; + struct set *objects; + + struct util_dynarray zombie_samplers; + + ID3D12CommandAllocator *cmdalloc; + struct d3d12_descriptor_heap *sampler_heap; + struct d3d12_descriptor_heap *view_heap; + bool has_errors; +}; + +bool +d3d12_init_batch(struct d3d12_context *ctx, struct d3d12_batch *batch); + +void +d3d12_destroy_batch(struct d3d12_context *ctx, struct d3d12_batch *batch); + +void +d3d12_start_batch(struct d3d12_context *ctx, struct d3d12_batch *batch); + +void +d3d12_end_batch(struct d3d12_context *ctx, struct d3d12_batch *batch); + +bool +d3d12_reset_batch(struct d3d12_context *ctx, struct d3d12_batch *batch, uint64_t timeout_ns); + +bool +d3d12_batch_has_references(struct d3d12_batch *batch, + struct d3d12_bo *bo); + +void +d3d12_batch_reference_resource(struct d3d12_batch *batch, + struct d3d12_resource *res); + +void +d3d12_batch_reference_sampler_view(struct d3d12_batch *batch, + struct d3d12_sampler_view *sv); + +void +d3d12_batch_reference_surface_texture(struct d3d12_batch *batch, + struct d3d12_surface *surf); + +void +d3d12_batch_reference_object(struct d3d12_batch *batch, + ID3D12Object *object); + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_blit.cpp b/src/gallium/drivers/d3d12/d3d12_blit.cpp new file mode 100644 index 00000000000..302248787fa --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_blit.cpp @@ -0,0 +1,975 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_context.h" +#include "d3d12_compiler.h" +#include "d3d12_debug.h" +#include "d3d12_format.h" +#include "d3d12_resource.h" +#include "d3d12_screen.h" + +#include "util/u_blitter.h" +#include "util/format/u_format.h" + +#include "nir_to_dxil.h" +#include "nir_builder.h" + +static void +copy_buffer_region_no_barriers(struct d3d12_context *ctx, + struct d3d12_resource *dst, + uint64_t dst_offset, + struct d3d12_resource *src, + uint64_t src_offset, + uint64_t size) +{ + uint64_t dst_off, src_off; + ID3D12Resource *dst_buf = d3d12_resource_underlying(dst, &dst_off); + ID3D12Resource *src_buf = d3d12_resource_underlying(src, &src_off); + + ctx->cmdlist->CopyBufferRegion(dst_buf, dst_offset + dst_off, + src_buf, src_offset + src_off, + size); +} + +static bool +is_resolve(const struct pipe_blit_info *info) +{ + return info->src.resource->nr_samples > 1 && + info->dst.resource->nr_samples <= 1; +} + +static bool +resolve_supported(const struct pipe_blit_info *info) +{ + assert(is_resolve(info)); + + // check for unsupported operations + if (util_format_is_depth_or_stencil(info->src.format) && + info->mask != PIPE_MASK_Z) { + return false; + } else { + if (util_format_get_mask(info->dst.format) != info->mask || + util_format_get_mask(info->src.format) != info->mask) + return false; + } + + if (info->filter != PIPE_TEX_FILTER_NEAREST || + info->scissor_enable || + info->num_window_rectangles > 0 || + info->alpha_blend) + return false; + + // formats need to match + struct d3d12_resource *src = d3d12_resource(info->src.resource); + struct d3d12_resource *dst = d3d12_resource(info->dst.resource); + if (src->dxgi_format != dst->dxgi_format) + return false; + + if (util_format_is_pure_integer(src->base.format)) + return false; + + // sizes needs to match + if (info->src.box.width != info->dst.box.width || + info->src.box.height != info->dst.box.height) + return false; + + // can only resolve full subresource + if (info->src.box.width != u_minify(info->src.resource->width0, + info->src.level) || + info->src.box.height != u_minify(info->src.resource->height0, + info->src.level) || + info->dst.box.width != u_minify(info->dst.resource->width0, + info->dst.level) || + info->dst.box.height != u_minify(info->dst.resource->height0, + info->dst.level)) + return false; + + return true; +} + +static void +blit_resolve(struct d3d12_context *ctx, const struct pipe_blit_info *info) +{ + struct d3d12_batch *batch = d3d12_current_batch(ctx); + struct d3d12_resource *src = d3d12_resource(info->src.resource); + struct d3d12_resource *dst = d3d12_resource(info->dst.resource); + + d3d12_transition_resource_state(ctx, src, + D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + d3d12_transition_resource_state(ctx, dst, + D3D12_RESOURCE_STATE_RESOLVE_DEST); + + d3d12_apply_resource_states(ctx); + + d3d12_batch_reference_resource(batch, src); + d3d12_batch_reference_resource(batch, dst); + + DXGI_FORMAT dxgi_format = d3d12_get_resource_srv_format(src->base.format, src->base.target); + + assert(src->dxgi_format == dst->dxgi_format); + ctx->cmdlist->ResolveSubresource( + d3d12_resource_resource(dst), info->dst.level, + d3d12_resource_resource(src), info->src.level, + dxgi_format); +} + +static bool +formats_are_copy_compatible(enum pipe_format src, enum pipe_format dst) +{ + if (src == dst) + return true; + + /* We can skip the stencil copy */ + if (util_format_get_depth_only(src) == dst || + util_format_get_depth_only(dst) == src) + return true; + + return false; +} + +static bool +box_fits(const struct pipe_box *box, const struct pipe_resource *res, int level) +{ + unsigned lwidth = u_minify(res->width0, level); + unsigned lheight= u_minify(res->height0, level); + unsigned ldepth = res->target == PIPE_TEXTURE_3D ? u_minify(res->depth0, level) : + res->array_size; + + unsigned wb = box->x; + unsigned we = box->x + box->width; + + unsigned hb = box->y; + unsigned he = box->y + box->height; + + unsigned db = box->z; + unsigned de = box->z + box->depth; + + return (wb <= lwidth && we <= lwidth && + hb <= lheight && he <= lheight && + db <= ldepth && de <= ldepth); +} + +static bool +direct_copy_supported(struct d3d12_screen *screen, + const struct pipe_blit_info *info, + bool have_predication) +{ + if (info->scissor_enable || info->alpha_blend || + (have_predication && info->render_condition_enable) || + MAX2(info->src.resource->nr_samples, 1) != MAX2(info->dst.resource->nr_samples, 1)) { + return false; + } + + if (!formats_are_copy_compatible(info->src.format, info->dst.format)) + return false; + + if (util_format_is_depth_or_stencil(info->src.format) && !(info->mask & PIPE_MASK_ZS)) { + return false; + } + + if (!util_format_is_depth_or_stencil(info->src.format)) { + if (util_format_get_mask(info->dst.format) != info->mask || + util_format_get_mask(info->src.format) != info->mask) + return false; + } + + if (abs(info->src.box.height) != info->dst.box.height) { + return false; + } + + if (info->src.box.height != info->dst.box.height && + (!util_format_is_depth_or_stencil(info->src.format) || + screen->opts2.ProgrammableSamplePositionsTier == + D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED)) { + return false; + } + + if (!box_fits(&info->dst.box, info->dst.resource, info->dst.level)) { + return false; + } + if (!box_fits(&info->src.box, info->src.resource, info->src.level)) { + return false; + } + + if (info->src.box.width != info->dst.box.width) { + return false; + } + + if (info->src.box.depth != info->dst.box.depth) { + return false; + } + + if ((screen->opts2.ProgrammableSamplePositionsTier == + D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED && + (info->src.resource->bind & PIPE_BIND_DEPTH_STENCIL || + info->dst.resource->bind & PIPE_BIND_DEPTH_STENCIL)) || + info->src.resource->nr_samples > 1) { + + if (info->dst.box.x != 0 || + info->dst.box.y != 0 || + info->dst.box.z != 0) + return false; + + if (info->src.box.x != 0 || + info->src.box.y != 0 || + info->src.box.z != 0 || + info->src.box.width != u_minify(info->src.resource->width0, + info->src.level) || + info->src.box.height != u_minify(info->src.resource->height0, + info->src.level) || + info->src.box.depth != u_minify(info->src.resource->depth0, + info->src.level)) + return false; + } + + return true; +} + +inline static unsigned +get_subresource_id(enum pipe_texture_target target, unsigned subres, unsigned stride, + unsigned z, unsigned *updated_z) +{ + if (d3d12_subresource_id_uses_layer(target)) { + subres += stride * z; + if (updated_z) + *updated_z = 0; + } + return subres; +} + +static void +copy_subregion_no_barriers(struct d3d12_context *ctx, + struct d3d12_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct d3d12_resource *src, + unsigned src_level, + const struct pipe_box *psrc_box, + unsigned mask) +{ + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + D3D12_TEXTURE_COPY_LOCATION src_loc, dst_loc; + unsigned src_z = psrc_box->z; + + int src_subres_stride = src->base.last_level + 1; + int dst_subres_stride = dst->base.last_level + 1; + + int src_array_size = src->base.array_size; + int dst_array_size = dst->base.array_size; + + if (dst->base.target == PIPE_TEXTURE_CUBE) + dst_array_size *= 6; + + if (src->base.target == PIPE_TEXTURE_CUBE) + src_array_size *= 6; + + int stencil_src_res_offset = 1; + int stencil_dst_res_offset = 1; + + int src_nres = 1; + int dst_nres = 1; + + if (dst->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT || + dst->base.format == PIPE_FORMAT_S8_UINT_Z24_UNORM || + dst->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { + stencil_dst_res_offset = dst_subres_stride * dst_array_size; + src_nres = 2; + } + + if (src->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT || + src->base.format == PIPE_FORMAT_S8_UINT_Z24_UNORM || + dst->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { + stencil_src_res_offset = src_subres_stride * src_array_size; + dst_nres = 2; + } + + static_assert(PIPE_MASK_S == 0x20 && PIPE_MASK_Z == 0x10, "unexpected ZS format mask"); + int nsubres = min(src_nres, dst_nres); + unsigned subresource_copy_mask = nsubres > 1 ? mask >> 4 : 1; + + for (int subres = 0; subres < nsubres; ++subres) { + + if (!(subresource_copy_mask & (1 << subres))) + continue; + + src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src_loc.SubresourceIndex = get_subresource_id(src->base.target, src_level, src_subres_stride, src_z, &src_z) + + subres * stencil_src_res_offset; + src_loc.pResource = d3d12_resource_resource(src); + + dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst_loc.SubresourceIndex = get_subresource_id(dst->base.target, dst_level, dst_subres_stride, dstz, &dstz) + + subres * stencil_dst_res_offset; + dst_loc.pResource = d3d12_resource_resource(dst); + + if (psrc_box->x == 0 && psrc_box->y == 0 && psrc_box->z == 0 && + psrc_box->width == u_minify(src->base.width0, src_level) && + psrc_box->height == u_minify(src->base.height0, src_level) && + psrc_box->depth == u_minify(src->base.depth0, src_level)) { + + assert((dstx == 0 && dsty == 0 && dstz == 0) || + screen->opts2.ProgrammableSamplePositionsTier != + D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED || + (!util_format_is_depth_or_stencil(dst->base.format) && + !util_format_is_depth_or_stencil(src->base.format) && + dst->base.nr_samples <= 1 && + src->base.nr_samples <= 1)); + + ctx->cmdlist->CopyTextureRegion(&dst_loc, dstx, dsty, dstz, + &src_loc, NULL); + + } else { + D3D12_BOX src_box; + src_box.left = psrc_box->x; + src_box.right = MIN2(psrc_box->x + psrc_box->width, u_minify(src->base.width0, src_level)); + src_box.top = psrc_box->y; + src_box.bottom = MIN2(psrc_box->y + psrc_box->height, u_minify(src->base.height0, src_level)); + src_box.front = src_z; + src_box.back = src_z + psrc_box->depth; + + assert((screen->opts2.ProgrammableSamplePositionsTier != + D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED || + (!util_format_is_depth_or_stencil(dst->base.format) && + !util_format_is_depth_or_stencil(src->base.format))) && + dst->base.nr_samples <= 1 && + src->base.nr_samples <= 1); + + ctx->cmdlist->CopyTextureRegion(&dst_loc, dstx, dsty, dstz, + &src_loc, &src_box); + } + } +} + +static void +copy_resource_y_flipped_no_barriers(struct d3d12_context *ctx, + struct d3d12_resource *dst, + unsigned dst_level, + const struct pipe_box *pdst_box, + struct d3d12_resource *src, + unsigned src_level, + const struct pipe_box *psrc_box, + unsigned mask) +{ + if (D3D12_DEBUG_BLIT & d3d12_debug) { + debug_printf("D3D12 BLIT as COPY: from %s@%d %dx%dx%d + %dx%dx%d\n", + util_format_name(src->base.format), src_level, + psrc_box->x, psrc_box->y, psrc_box->z, + psrc_box->width, psrc_box->height, psrc_box->depth); + debug_printf(" to %s@%d %dx%dx%d\n", + util_format_name(dst->base.format), dst_level, + pdst_box->x, pdst_box->y, pdst_box->z); + } + + struct pipe_box src_box = *psrc_box; + int src_inc = psrc_box->height > 0 ? 1 : -1; + int dst_inc = pdst_box->height > 0 ? 1 : -1; + src_box.height = 1; + int rows_to_copy = abs(psrc_box->height); + + if (psrc_box->height < 0) + --src_box.y; + + for (int y = 0, dest_y = pdst_box->y; y < rows_to_copy; + ++y, src_box.y += src_inc, dest_y += dst_inc) { + copy_subregion_no_barriers(ctx, dst, dst_level, + pdst_box->x, dest_y, pdst_box->z, + src, src_level, &src_box, mask); + } +} + +void +d3d12_direct_copy(struct d3d12_context *ctx, + struct d3d12_resource *dst, + unsigned dst_level, + const struct pipe_box *pdst_box, + struct d3d12_resource *src, + unsigned src_level, + const struct pipe_box *psrc_box, + unsigned mask) +{ + struct d3d12_batch *batch = d3d12_current_batch(ctx); + + unsigned src_subres = get_subresource_id(src->base.target, src_level, src->base.last_level + 1, + psrc_box->z, nullptr); + unsigned dst_subres = get_subresource_id(dst->base.target, dst_level, dst->base.last_level + 1, + pdst_box->z, nullptr); + + if (D3D12_DEBUG_BLIT & d3d12_debug) + debug_printf("BLIT: Direct copy from subres %d to subres %d\n", + src_subres, dst_subres); + + + d3d12_transition_subresources_state(ctx, src, src_subres, 1, 0, 1, 0, + d3d12_get_format_num_planes(src->base.format), + D3D12_RESOURCE_STATE_COPY_SOURCE); + + d3d12_transition_subresources_state(ctx, dst, dst_subres, 1, 0, 1, 0, + d3d12_get_format_num_planes(dst->base.format), + D3D12_RESOURCE_STATE_COPY_DEST); + + d3d12_apply_resource_states(ctx); + + d3d12_batch_reference_resource(batch, src); + d3d12_batch_reference_resource(batch, dst); + + if (src->base.target == PIPE_BUFFER) { + copy_buffer_region_no_barriers(ctx, dst, pdst_box->x, + src, psrc_box->x, psrc_box->width); + } else if (psrc_box->height == pdst_box->height) { + /* No flipping, we can forward this directly to resource_copy_region */ + copy_subregion_no_barriers(ctx, dst, dst_level, + pdst_box->x, pdst_box->y, pdst_box->z, + src, src_level, psrc_box, mask); + } else { + assert(psrc_box->height == -pdst_box->height); + copy_resource_y_flipped_no_barriers(ctx, dst, dst_level, pdst_box, + src, src_level, psrc_box, mask); + } +} + +static bool +is_same_resource(const struct pipe_blit_info *info) +{ + return d3d12_resource_resource(d3d12_resource(info->src.resource)) == + d3d12_resource_resource(d3d12_resource(info->dst.resource)) && + info->src.level == info->dst.level; +} + +static struct pipe_resource * +create_staging_resource(struct d3d12_context *ctx, + struct d3d12_resource *src, + unsigned src_level, + const struct pipe_box *src_box, + struct pipe_box *dst_box, + unsigned mask) + +{ + struct pipe_resource templ = {{0}}; + struct pipe_resource *staging_res; + struct pipe_box copy_src; + + u_box_3d(MIN2(src_box->x, src_box->x + src_box->width), + MIN2(src_box->y, src_box->y + src_box->height), + MIN2(src_box->z, src_box->z + src_box->depth), + abs(src_box->width), abs(src_box->height), abs(src_box->depth), + ©_src); + + templ.format = src->base.format; + templ.width0 = copy_src.width; + templ.height0 = copy_src.height; + templ.depth0 = copy_src.depth; + templ.array_size = 1; + templ.nr_samples = 1; + templ.nr_storage_samples = 1; + templ.usage = PIPE_USAGE_STAGING; + templ.bind = util_format_is_depth_or_stencil(templ.format) ? PIPE_BIND_DEPTH_STENCIL : PIPE_BIND_RENDER_TARGET; + templ.target = src->base.target; + + staging_res = ctx->base.screen->resource_create(ctx->base.screen, &templ); + + dst_box->x = 0; + dst_box->y = 0; + dst_box->z = 0; + dst_box->width = copy_src.width; + dst_box->height = copy_src.height; + dst_box->depth = copy_src.depth; + + d3d12_direct_copy(ctx, d3d12_resource(staging_res), 0, dst_box, + src, src_level, ©_src, mask); + + if (src_box->width < 0) { + dst_box->x = dst_box->width; + dst_box->width = src_box->width; + } + + if (src_box->height < 0) { + dst_box->y = dst_box->height; + dst_box->height = src_box->height; + } + + if (src_box->depth < 0) { + dst_box->z = dst_box->depth; + dst_box->depth = src_box->depth; + } + return staging_res; +} + +static void +blit_same_resource(struct d3d12_context *ctx, + const struct pipe_blit_info *info) +{ + struct pipe_blit_info dst_info = *info; + + dst_info.src.level = 0; + dst_info.src.resource = create_staging_resource(ctx, d3d12_resource(info->src.resource), + info->src.level, + &info->src.box, + &dst_info.src.box, PIPE_MASK_RGBAZS); + ctx->base.blit(&ctx->base, &dst_info); + pipe_resource_reference(&dst_info.src.resource, NULL); +} + +static void +util_blit_save_state(struct d3d12_context *ctx) +{ + util_blitter_save_blend(ctx->blitter, ctx->gfx_pipeline_state.blend); + util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->gfx_pipeline_state.zsa); + util_blitter_save_vertex_elements(ctx->blitter, ctx->gfx_pipeline_state.ves); + util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref); + util_blitter_save_rasterizer(ctx->blitter, ctx->gfx_pipeline_state.rast); + util_blitter_save_fragment_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_FRAGMENT]); + util_blitter_save_vertex_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_VERTEX]); + util_blitter_save_geometry_shader(ctx->blitter, ctx->gfx_stages[PIPE_SHADER_GEOMETRY]); + + util_blitter_save_framebuffer(ctx->blitter, &ctx->fb); + util_blitter_save_viewport(ctx->blitter, ctx->viewport_states); + util_blitter_save_scissor(ctx->blitter, ctx->scissor_states); + util_blitter_save_fragment_sampler_states(ctx->blitter, + ctx->num_samplers[PIPE_SHADER_FRAGMENT], + (void **)ctx->samplers[PIPE_SHADER_FRAGMENT]); + util_blitter_save_fragment_sampler_views(ctx->blitter, + ctx->num_sampler_views[PIPE_SHADER_FRAGMENT], + ctx->sampler_views[PIPE_SHADER_FRAGMENT]); + util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, ctx->cbufs[PIPE_SHADER_FRAGMENT]); + util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vbs); + util_blitter_save_sample_mask(ctx->blitter, ctx->gfx_pipeline_state.sample_mask); + util_blitter_save_so_targets(ctx->blitter, ctx->gfx_pipeline_state.num_so_targets, ctx->so_targets); +} + +static void +util_blit(struct d3d12_context *ctx, + const struct pipe_blit_info *info) +{ + util_blit_save_state(ctx); + + util_blitter_blit(ctx->blitter, info); +} + +static bool +resolve_stencil_supported(struct d3d12_context *ctx, + const struct pipe_blit_info *info) +{ + assert(is_resolve(info)); + + if (!util_format_is_depth_or_stencil(info->src.format) || + !(info->mask & PIPE_MASK_S)) + return false; + + if (info->mask & PIPE_MASK_Z) { + struct pipe_blit_info new_info = *info; + new_info.mask = PIPE_MASK_Z; + if (!resolve_supported(&new_info) && + !util_blitter_is_blit_supported(ctx->blitter, &new_info)) + return false; + } + + struct pipe_blit_info new_info = *info; + new_info.dst.format = PIPE_FORMAT_R8_UINT; + return util_blitter_is_blit_supported(ctx->blitter, &new_info); +} + +static struct pipe_resource * +create_tmp_resource(struct pipe_screen *screen, + const struct pipe_blit_info *info) +{ + struct pipe_resource tpl = { 0 }; + tpl.width0 = info->dst.box.width; + tpl.height0 = info->dst.box.height; + tpl.depth0 = info->dst.box.depth; + tpl.array_size = 1; + tpl.format = PIPE_FORMAT_R8_UINT; + tpl.target = info->dst.resource->target; + tpl.nr_samples = info->dst.resource->nr_samples; + tpl.nr_storage_samples = info->dst.resource->nr_storage_samples; + tpl.usage = PIPE_USAGE_STREAM; + tpl.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + return screen->resource_create(screen, &tpl); +} + +static void * +get_stencil_resolve_vs(struct d3d12_context *ctx) +{ + if (ctx->stencil_resolve_vs) + return ctx->stencil_resolve_vs; + + nir_builder b; + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, + dxil_get_nir_compiler_options()); + b.shader->info.name = ralloc_strdup(b.shader, "linear_blit_vs"); + + const struct glsl_type *vec4 = glsl_vec4_type(); + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "pos"); + + nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "gl_Position"); + pos_out->data.location = VARYING_SLOT_POS; + + nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf); + + struct pipe_shader_state state = { 0 }; + state.type = PIPE_SHADER_IR_NIR; + state.ir.nir = b.shader; + ctx->stencil_resolve_vs = ctx->base.create_vs_state(&ctx->base, &state); + + return ctx->stencil_resolve_vs; +} + +static void * +get_stencil_resolve_fs(struct d3d12_context *ctx) +{ + if (ctx->stencil_resolve_fs) + return ctx->stencil_resolve_fs; + + nir_builder b; + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, + dxil_get_nir_compiler_options()); + + nir_variable *stencil_out = nir_variable_create(b.shader, + nir_var_shader_out, + glsl_uint_type(), + "stencil_out"); + stencil_out->data.location = FRAG_RESULT_COLOR; + + const struct glsl_type *sampler_type = + glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_UINT); + nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, + sampler_type, "stencil_tex"); + sampler->data.binding = 0; + sampler->data.explicit_binding = true; + + nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa; + + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + glsl_vec4_type(), "pos"); + pos_in->data.location = VARYING_SLOT_POS; // VARYING_SLOT_VAR0? + nir_ssa_def *pos = nir_load_var(&b, pos_in); + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); + tex->sampler_dim = GLSL_SAMPLER_DIM_MS; + tex->op = nir_texop_txf_ms; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(nir_channels(&b, nir_f2i32(&b, pos), 0x3)); + tex->src[1].src_type = nir_tex_src_ms_index; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); /* just use first sample */ + tex->src[2].src_type = nir_tex_src_texture_deref; + tex->src[2].src = nir_src_for_ssa(tex_deref); + tex->dest_type = nir_type_uint; + tex->is_array = false; + tex->coord_components = 2; + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_store_var(&b, stencil_out, nir_channel(&b, &tex->dest.ssa, 1), 0x1); + + struct pipe_shader_state state = { 0 }; + state.type = PIPE_SHADER_IR_NIR; + state.ir.nir = b.shader; + ctx->stencil_resolve_fs = ctx->base.create_fs_state(&ctx->base, &state); + + return ctx->stencil_resolve_fs; +} + +static void * +get_sampler_state(struct d3d12_context *ctx) +{ + if (ctx->sampler_state) + return ctx->sampler_state; + + struct pipe_sampler_state state; + memset(&state, 0, sizeof(state)); + state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + state.normalized_coords = 1; + + return ctx->sampler_state = ctx->base.create_sampler_state(&ctx->base, &state); +} + +static struct pipe_resource * +resolve_stencil_to_temp(struct d3d12_context *ctx, + const struct pipe_blit_info *info) +{ + struct pipe_context *pctx = &ctx->base; + struct pipe_resource *tmp = create_tmp_resource(pctx->screen, info); + if (!tmp) { + debug_printf("D3D12: failed to create stencil-resolve temp-resource\n"); + return NULL; + } + assert(tmp->nr_samples < 2); + + /* resolve stencil into tmp */ + struct pipe_surface dst_tmpl; + util_blitter_default_dst_texture(&dst_tmpl, tmp, 0, 0); + dst_tmpl.format = tmp->format; + struct pipe_surface *dst_surf = pctx->create_surface(pctx, tmp, &dst_tmpl); + if (!dst_surf) { + debug_printf("D3D12: failed to create stencil-resolve dst-surface\n"); + return NULL; + } + + struct pipe_sampler_view src_templ, *src_view; + util_blitter_default_src_texture(ctx->blitter, &src_templ, + info->src.resource, info->src.level); + src_templ.format = util_format_stencil_only(info->src.format); + src_view = pctx->create_sampler_view(pctx, info->src.resource, &src_templ); + + void *sampler_state = get_sampler_state(ctx); + + util_blit_save_state(ctx); + pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 1, &src_view); + pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 1, &sampler_state); + util_blitter_custom_shader(ctx->blitter, dst_surf, + get_stencil_resolve_vs(ctx), + get_stencil_resolve_fs(ctx)); + util_blitter_restore_textures(ctx->blitter); + pipe_surface_reference(&dst_surf, NULL); + pipe_sampler_view_reference(&src_view, NULL); + return tmp; +} + +static void +blit_resolve_stencil(struct d3d12_context *ctx, + const struct pipe_blit_info *info) +{ + assert(info->mask & PIPE_MASK_S); + + if (D3D12_DEBUG_BLIT & d3d12_debug) + debug_printf("D3D12 BLIT: blit_resolve_stencil\n"); + + if (info->mask & PIPE_MASK_Z) { + /* resolve depth into dst */ + struct pipe_blit_info new_info = *info; + new_info.mask = PIPE_MASK_Z; + + if (resolve_supported(&new_info)) + blit_resolve(ctx, &new_info); + else + util_blit(ctx, &new_info); + } + + struct pipe_resource *tmp = resolve_stencil_to_temp(ctx, info); + + + /* copy resolved stencil into dst */ + struct d3d12_resource *dst = d3d12_resource(info->dst.resource); + d3d12_transition_subresources_state(ctx, d3d12_resource(tmp), + 0, 1, 0, 1, 0, 1, + D3D12_RESOURCE_STATE_COPY_SOURCE); + d3d12_transition_subresources_state(ctx, dst, + 0, 1, 0, 1, 1, 1, + D3D12_RESOURCE_STATE_COPY_DEST); + d3d12_apply_resource_states(ctx); + + struct d3d12_batch *batch = d3d12_current_batch(ctx); + d3d12_batch_reference_resource(batch, d3d12_resource(tmp)); + d3d12_batch_reference_resource(batch, dst); + + D3D12_BOX src_box; + src_box.left = src_box.top = src_box.front = 0; + src_box.right = tmp->width0; + src_box.bottom = tmp->height0; + src_box.back = tmp->depth0; + + D3D12_TEXTURE_COPY_LOCATION src_loc; + src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src_loc.SubresourceIndex = 0; + src_loc.pResource = d3d12_resource_resource(d3d12_resource(tmp)); + + D3D12_TEXTURE_COPY_LOCATION dst_loc; + dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst_loc.SubresourceIndex = 1; + dst_loc.pResource = d3d12_resource_resource(dst); + + ctx->cmdlist->CopyTextureRegion(&dst_loc, info->dst.box.x, + info->dst.box.y, info->dst.box.z, + &src_loc, &src_box); + + pipe_resource_reference(&tmp, NULL); +} + +static bool +replicate_stencil_supported(struct d3d12_context *ctx, + const struct pipe_blit_info *info) +{ + if (!util_format_is_depth_or_stencil(info->src.format) || + !(info->mask & PIPE_MASK_S)) + return false; + + if (info->mask & PIPE_MASK_Z) { + struct pipe_blit_info new_info = *info; + new_info.mask = PIPE_MASK_Z; + if (!util_blitter_is_blit_supported(ctx->blitter, &new_info)) + return false; + } + + return true; +} + +static void +blit_replicate_stencil(struct d3d12_context *ctx, + const struct pipe_blit_info *info) +{ + assert(info->mask & PIPE_MASK_S); + + if (D3D12_DEBUG_BLIT & d3d12_debug) + debug_printf("D3D12 BLIT: blit_replicate_stencil\n"); + + if (info->mask & PIPE_MASK_Z) { + /* resolve depth into dst */ + struct pipe_blit_info new_info = *info; + new_info.mask = PIPE_MASK_Z; + util_blit(ctx, &new_info); + } + + util_blit_save_state(ctx); + util_blitter_stencil_fallback(ctx->blitter, info->dst.resource, + info->dst.level, + &info->dst.box, + info->src.resource, + info->src.level, + &info->src.box, + info->scissor_enable ? &info->scissor : NULL); +} + +void +d3d12_blit(struct pipe_context *pctx, + const struct pipe_blit_info *info) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + + if (!info->render_condition_enable && ctx->current_predication) { + if (D3D12_DEBUG_BLIT & d3d12_debug) + debug_printf("D3D12 BLIT: Disable predication\n"); + ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO); + } + + if (D3D12_DEBUG_BLIT & d3d12_debug) { + debug_printf("D3D12 BLIT: from %s@%d msaa:%d %dx%dx%d + %dx%dx%d\n", + util_format_name(info->src.format), info->src.level, + info->src.resource->nr_samples, + info->src.box.x, info->src.box.y, info->src.box.z, + info->src.box.width, info->src.box.height, info->src.box.depth); + debug_printf(" to %s@%d msaa:%d %dx%dx%d + %dx%dx%d ", + util_format_name(info->dst.format), info->dst.level, + info->dst.resource->nr_samples, + info->dst.box.x, info->dst.box.y, info->dst.box.z, + info->dst.box.width, info->dst.box.height, info->dst.box.depth); + debug_printf("| flags %s%s%s\n", + info->render_condition_enable ? "cond " : "", + info->scissor_enable ? "scissor " : "", + info->alpha_blend ? "blend" : ""); + } + + if (is_same_resource(info)) + blit_same_resource(ctx, info); + else if (is_resolve(info)) { + if (resolve_supported(info)) + blit_resolve(ctx, info); + else if (util_blitter_is_blit_supported(ctx->blitter, info)) + util_blit(ctx, info); + else if (resolve_stencil_supported(ctx, info)) + blit_resolve_stencil(ctx, info); + else + debug_printf("D3D12: resolve unsupported %s -> %s\n", + util_format_short_name(info->src.resource->format), + util_format_short_name(info->dst.resource->format)); + } else if (direct_copy_supported(d3d12_screen(pctx->screen), info, + ctx->current_predication != nullptr)) + d3d12_direct_copy(ctx, d3d12_resource(info->dst.resource), + info->dst.level, &info->dst.box, + d3d12_resource(info->src.resource), + info->src.level, &info->src.box, info->mask); + else if (util_blitter_is_blit_supported(ctx->blitter, info)) + util_blit(ctx, info); + else if (replicate_stencil_supported(ctx, info)) + blit_replicate_stencil(ctx, info); + else + debug_printf("D3D12: blit unsupported %s -> %s\n", + util_format_short_name(info->src.resource->format), + util_format_short_name(info->dst.resource->format)); + + if (!info->render_condition_enable && ctx->current_predication) { + ctx->cmdlist->SetPredication( + d3d12_resource_resource(ctx->current_predication), 0, D3D12_PREDICATION_OP_EQUAL_ZERO); + if (D3D12_DEBUG_BLIT & d3d12_debug) + debug_printf("D3D12 BLIT: Re-enable predication\n"); + } + +} + +static void +d3d12_resource_copy_region(struct pipe_context *pctx, + struct pipe_resource *pdst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *psrc, + unsigned src_level, + const struct pipe_box *psrc_box) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_resource *dst = d3d12_resource(pdst); + struct d3d12_resource *src = d3d12_resource(psrc); + struct pipe_resource *staging_res = NULL; + const struct pipe_box *src_box = psrc_box; + struct pipe_box staging_box, dst_box; + + if (D3D12_DEBUG_BLIT & d3d12_debug) { + debug_printf("D3D12 COPY: from %s@%d msaa:%d mips:%d %dx%dx%d + %dx%dx%d\n", + util_format_name(psrc->format), src_level, psrc->nr_samples, + psrc->last_level, + psrc_box->x, psrc_box->y, psrc_box->z, + psrc_box->width, psrc_box->height, psrc_box->depth); + debug_printf(" to %s@%d msaa:%d mips:%d %dx%dx%d\n", + util_format_name(pdst->format), dst_level, psrc->nr_samples, + psrc->last_level, dstx, dsty, dstz); + } + + /* Use an intermediate resource if copying from/to the same subresource */ + if (d3d12_resource_resource(dst) == d3d12_resource_resource(src) && dst_level == src_level) { + staging_res = create_staging_resource(ctx, src, src_level, psrc_box, &staging_box, PIPE_MASK_RGBAZS); + src = d3d12_resource(staging_res); + src_level = 0; + src_box = &staging_box; + } + + dst_box.x = dstx; + dst_box.y = dsty; + dst_box.z = dstz; + dst_box.width = psrc_box->width; + dst_box.height = psrc_box->height; + + d3d12_direct_copy(ctx, dst, dst_level, &dst_box, + src, src_level, src_box, PIPE_MASK_RGBAZS); + + if (staging_res) + pipe_resource_reference(&staging_res, NULL); +} + +void +d3d12_context_blit_init(struct pipe_context *ctx) +{ + ctx->resource_copy_region = d3d12_resource_copy_region; + ctx->blit = d3d12_blit; +} diff --git a/src/gallium/drivers/d3d12/d3d12_blit.h b/src/gallium/drivers/d3d12/d3d12_blit.h new file mode 100644 index 00000000000..f1ddc64b69d --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_blit.h @@ -0,0 +1,43 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_BLIT_H +#define D3D12_BLIT_H + +struct d3d12_context; +struct d3d12_resource; +struct pipe_box; + +void +d3d12_context_blit_init(struct pipe_context *ctx); + +void +d3d12_direct_copy(struct d3d12_context *ctx, + struct d3d12_resource *dst, + unsigned dst_level, + const struct pipe_box *pdst_box, + struct d3d12_resource *src, + unsigned src_level, + const struct pipe_box *psrc_box, unsigned mask); + +#endif // D3D12_BLIT_H diff --git a/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp b/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp new file mode 100644 index 00000000000..758ff94c8f2 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_bufmgr.cpp @@ -0,0 +1,333 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_bufmgr.h" +#include "d3d12_format.h" +#include "d3d12_screen.h" + +#include "D3D12ResourceState.h" + +#include "pipebuffer/pb_buffer.h" +#include "pipebuffer/pb_bufmgr.h" + +#include "util/format/u_format.h" +#include "util/u_memory.h" + +#include + +struct d3d12_bufmgr { + struct pb_manager base; + + ID3D12Device *dev; +}; + +extern const struct pb_vtbl d3d12_buffer_vtbl; + +static inline struct d3d12_bufmgr * +d3d12_bufmgr(struct pb_manager *mgr) +{ + assert(mgr); + + return (struct d3d12_bufmgr *)mgr; +} + +static struct TransitionableResourceState * +create_trans_state(ID3D12Resource *res, enum pipe_format format) +{ + D3D12_RESOURCE_DESC desc = res->GetDesc(); + + // Calculate the total number of subresources + unsigned arraySize = desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? + 1 : desc.DepthOrArraySize; + unsigned total_subresources = desc.MipLevels * + arraySize * + d3d12_non_opaque_plane_count(desc.Format); + total_subresources *= util_format_has_stencil(util_format_description(format)) ? + 2 : 1; + + return new TransitionableResourceState(res, + total_subresources, + SupportsSimultaneousAccess(desc)); +} + +struct d3d12_bo * +d3d12_bo_wrap_res(ID3D12Resource *res, enum pipe_format format) +{ + struct d3d12_bo *bo; + + bo = CALLOC_STRUCT(d3d12_bo); + if (!bo) + return NULL; + + bo->refcount = 1; + bo->res = res; + bo->trans_state = create_trans_state(res, format); + + return bo; +} + +struct d3d12_bo * +d3d12_bo_new(ID3D12Device *dev, uint64_t size, uint64_t alignment) +{ + ID3D12Resource *res; + + D3D12_RESOURCE_DESC res_desc; + res_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + res_desc.Format = DXGI_FORMAT_UNKNOWN; + res_desc.Alignment = alignment; + res_desc.Width = size; + res_desc.Height = 1; + res_desc.DepthOrArraySize = 1; + res_desc.MipLevels = 1; + res_desc.SampleDesc.Count = 1; + res_desc.SampleDesc.Quality = 0; + res_desc.Flags = D3D12_RESOURCE_FLAG_NONE; + res_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + D3D12_HEAP_PROPERTIES heap_pris = dev->GetCustomHeapProperties(0, D3D12_HEAP_TYPE_UPLOAD); + HRESULT hres = dev->CreateCommittedResource(&heap_pris, + D3D12_HEAP_FLAG_NONE, + &res_desc, + D3D12_RESOURCE_STATE_COMMON, + NULL, + __uuidof(ID3D12Resource), + (void **)&res); + + if (FAILED(hres)) + return NULL; + + return d3d12_bo_wrap_res(res, PIPE_FORMAT_NONE); +} + +struct d3d12_bo * +d3d12_bo_wrap_buffer(struct pb_buffer *buf) +{ + struct d3d12_bo *bo; + + bo = CALLOC_STRUCT(d3d12_bo); + if (!bo) + return NULL; + + bo->refcount = 1; + bo->buffer = buf; + bo->trans_state = NULL; /* State from base BO will be used */ + + return bo; +} + +void +d3d12_bo_unreference(struct d3d12_bo *bo) +{ + if (bo == NULL) + return; + + assert(p_atomic_read(&bo->refcount) > 0); + + if (p_atomic_dec_zero(&bo->refcount)) { + if (bo->buffer) { + pb_reference(&bo->buffer, NULL); + } else { + delete bo->trans_state; + bo->res->Release(); + } + FREE(bo); + } +} + +void * +d3d12_bo_map(struct d3d12_bo *bo, D3D12_RANGE *range) +{ + struct d3d12_bo *base_bo; + D3D12_RANGE offset_range = {0, 0}; + uint64_t offset; + void *ptr; + + base_bo = d3d12_bo_get_base(bo, &offset); + + if (!range || offset == 0) { + /* Nothing to do */ + } else if (range->Begin >= range->End) { + offset_range.Begin = offset; + offset_range.End = offset + d3d12_bo_get_size(bo); + range = &offset_range; + } else { + offset_range.Begin = range->Begin + offset; + offset_range.End = range->End + offset; + range = &offset_range; + } + + if (FAILED(base_bo->res->Map(0, range, &ptr))) + return NULL; + + return (uint8_t *)ptr + (range ? range->Begin : 0); +} + +void +d3d12_bo_unmap(struct d3d12_bo *bo, D3D12_RANGE *range) +{ + struct d3d12_bo *base_bo; + D3D12_RANGE offset_range = {0, 0}; + uint64_t offset; + + base_bo = d3d12_bo_get_base(bo, &offset); + + if (!range || bo == base_bo) + { + /* Nothing to do */ + } else if (range->Begin >= range->End) { + offset_range.Begin = offset; + offset_range.End = offset + base_bo->res->GetDesc().Width; + } else { + offset_range.Begin = range->Begin + offset; + offset_range.End = range->End + offset; + } + + base_bo->res->Unmap(0, range); +} + +static void +d3d12_buffer_destroy(struct pb_buffer *pbuf) +{ + struct d3d12_buffer *buf = d3d12_buffer(pbuf); + + d3d12_bo_unmap(buf->bo, &buf->range); + d3d12_bo_unreference(buf->bo); + FREE(buf); +} + +static void * +d3d12_buffer_map(struct pb_buffer *pbuf, + enum pb_usage_flags flags, + void *flush_ctx) +{ + return d3d12_buffer(pbuf)->map; +} + +static void +d3d12_buffer_unmap(struct pb_buffer *pbuf) +{ +} + +static void +d3d12_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + pb_size *offset) +{ + *base_buf = buf; + *offset = 0; +} + +static enum pipe_error +d3d12_buffer_validate(struct pb_buffer *pbuf, + struct pb_validate *vl, + enum pb_usage_flags flags ) +{ + /* Always pinned */ + return PIPE_OK; +} + +static void +d3d12_buffer_fence(struct pb_buffer *pbuf, + struct pipe_fence_handle *fence ) +{ +} + +const struct pb_vtbl d3d12_buffer_vtbl = { + d3d12_buffer_destroy, + d3d12_buffer_map, + d3d12_buffer_unmap, + d3d12_buffer_validate, + d3d12_buffer_fence, + d3d12_buffer_get_base_buffer +}; + +static struct pb_buffer * +d3d12_bufmgr_create_buffer(struct pb_manager *pmgr, + pb_size size, + const struct pb_desc *pb_desc) +{ + struct d3d12_bufmgr *mgr = d3d12_bufmgr(pmgr); + struct d3d12_buffer *buf; + + buf = CALLOC_STRUCT(d3d12_buffer); + if (!buf) + return NULL; + + // Align the buffer to D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT + // in case it is to be used as a CBV. + size = align64(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + + pipe_reference_init(&buf->base.reference, 1); + buf->base.alignment = pb_desc->alignment; + buf->base.usage = pb_desc->usage; + buf->base.vtbl = &d3d12_buffer_vtbl; + buf->base.size = size; + buf->range.Begin = 0; + buf->range.End = size; + + buf->bo = d3d12_bo_new(mgr->dev, size, pb_desc->alignment); + if (!buf->bo) { + FREE(buf); + return NULL; + } + + buf->map = d3d12_bo_map(buf->bo, &buf->range); + if (!buf->map) { + d3d12_bo_unreference(buf->bo); + FREE(buf); + return NULL; + } + + return &buf->base; +} + +static void +d3d12_bufmgr_flush(struct pb_manager *mgr) +{ + /* No-op */ +} + +static void +d3d12_bufmgr_destroy(struct pb_manager *_mgr) +{ + struct d3d12_bufmgr *mgr = d3d12_bufmgr(_mgr); + FREE(mgr); +} + +struct pb_manager * +d3d12_bufmgr_create(struct d3d12_screen *screen) +{ + struct d3d12_bufmgr *mgr; + + mgr = CALLOC_STRUCT(d3d12_bufmgr); + if (!mgr) + return NULL; + + mgr->base.destroy = d3d12_bufmgr_destroy; + mgr->base.create_buffer = d3d12_bufmgr_create_buffer; + mgr->base.flush = d3d12_bufmgr_flush; + + mgr->dev = screen->dev; + + return &mgr->base; +} diff --git a/src/gallium/drivers/d3d12/d3d12_bufmgr.h b/src/gallium/drivers/d3d12/d3d12_bufmgr.h new file mode 100644 index 00000000000..4aee2c22d18 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_bufmgr.h @@ -0,0 +1,121 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_BUFMGR_H +#define D3D12_BUFMGR_H + +#include "pipebuffer/pb_buffer.h" +#include "util/u_atomic.h" + +#include + +struct d3d12_bufmgr; +struct d3d12_screen; +struct pb_manager; +struct TransitionableResourceState; + +struct d3d12_bo { + int refcount; + ID3D12Resource *res; + struct pb_buffer *buffer; + struct TransitionableResourceState *trans_state; +}; + +struct d3d12_buffer { + struct pb_buffer base; + + struct d3d12_bo *bo; + D3D12_RANGE range; + void *map; +}; + +static inline struct d3d12_buffer * +d3d12_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct d3d12_buffer *)buf; +} + +static inline struct d3d12_bo * +d3d12_bo_get_base(struct d3d12_bo *bo, uint64_t *offset) +{ + if (bo->buffer) { + struct pb_buffer *base_buffer; + pb_get_base_buffer(bo->buffer, &base_buffer, offset); + return d3d12_buffer(base_buffer)->bo; + } else { + *offset = 0; + return bo; + } +} + +static inline uint64_t +d3d12_bo_get_size(struct d3d12_bo *bo) +{ + if (bo->buffer) + return bo->buffer->size; + else + return bo->res->GetDesc().Width; +} + +static inline bool +d3d12_bo_is_suballocated(struct d3d12_bo *bo) +{ + struct d3d12_bo *base_bo; + uint64_t offset; + + if (!bo->buffer) + return false; + + base_bo = d3d12_bo_get_base(bo, &offset); + return d3d12_bo_get_size(base_bo) != d3d12_bo_get_size(bo); +} + +struct d3d12_bo * +d3d12_bo_new(ID3D12Device *dev, uint64_t size, uint64_t alignment); + +struct d3d12_bo * +d3d12_bo_wrap_res(ID3D12Resource *res, enum pipe_format format); + +struct d3d12_bo * +d3d12_bo_wrap_buffer(struct pb_buffer *buf); + +static inline void +d3d12_bo_reference(struct d3d12_bo *bo) +{ + p_atomic_inc(&bo->refcount); +} + +void +d3d12_bo_unreference(struct d3d12_bo *bo); + +void * +d3d12_bo_map(struct d3d12_bo *bo, D3D12_RANGE *range); + +void +d3d12_bo_unmap(struct d3d12_bo *bo, D3D12_RANGE *range); + +struct pb_manager * +d3d12_bufmgr_create(struct d3d12_screen *screen); + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.cpp b/src/gallium/drivers/d3d12/d3d12_compiler.cpp new file mode 100644 index 00000000000..e0e1138ed5e --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_compiler.cpp @@ -0,0 +1,1396 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_compiler.h" +#include "d3d12_context.h" +#include "d3d12_debug.h" +#include "d3d12_screen.h" +#include "d3d12_nir_passes.h" +#include "nir_to_dxil.h" + +#include "pipe/p_state.h" + +#include "nir.h" +#include "nir/nir_draw_helpers.h" +#include "nir/tgsi_to_nir.h" +#include "compiler/nir/nir_builder.h" +#include "tgsi/tgsi_from_mesa.h" +#include "tgsi/tgsi_ureg.h" + +#include "util/u_memory.h" +#include "util/u_prim.h" +#include "util/u_simple_shaders.h" + +#include +#include +#include + +extern "C" { +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_point_sprite.h" +} + +using Microsoft::WRL::ComPtr; + +struct d3d12_validation_tools +{ + d3d12_validation_tools(); + + bool validate_and_sign(struct blob *dxil); + + void disassemble(struct blob *dxil); + + void load_dxil_dll(); + + struct HModule { + HModule(); + ~HModule(); + + bool load(LPCSTR file_name); + operator HMODULE () const; + private: + HMODULE module; + }; + + HModule dxil_module; + HModule dxc_compiler_module; + ComPtr compiler; + ComPtr validator; + ComPtr library; +}; + +struct d3d12_validation_tools *d3d12_validator_create() +{ + return new d3d12_validation_tools(); +} + +void d3d12_validator_destroy(struct d3d12_validation_tools *validator) +{ + delete validator; +} + + +const void * +d3d12_get_compiler_options(struct pipe_screen *screen, + enum pipe_shader_ir ir, + enum pipe_shader_type shader) +{ + assert(ir == PIPE_SHADER_IR_NIR); + return dxil_get_nir_compiler_options(); +} + +static uint32_t +resource_dimension(enum glsl_sampler_dim dim) +{ + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return RESOURCE_DIMENSION_TEXTURE1D; + case GLSL_SAMPLER_DIM_2D: + return RESOURCE_DIMENSION_TEXTURE2D; + case GLSL_SAMPLER_DIM_3D: + return RESOURCE_DIMENSION_TEXTURE3D; + case GLSL_SAMPLER_DIM_CUBE: + return RESOURCE_DIMENSION_TEXTURECUBE; + default: + return RESOURCE_DIMENSION_UNKNOWN; + } +} + +static struct d3d12_shader * +compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel, + struct d3d12_shader_key *key, struct nir_shader *nir) +{ + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + struct d3d12_shader *shader = rzalloc(sel, d3d12_shader); + shader->key = *key; + shader->nir = nir; + sel->current = shader; + + NIR_PASS_V(nir, nir_lower_samplers); + NIR_PASS_V(nir, d3d12_create_bare_samplers); + + if (key->samples_int_textures) + NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex, + key->tex_wrap_states, key->swizzle_state, + screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS)); + + if (key->vs.needs_format_emulation) + d3d12_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion); + + uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos; + uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms; + NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16); + shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 && + nir->info.num_ubos > num_ubos_before_lower_to_ubo; + + if (key->last_vertex_processing_stage) { + if (key->invert_depth) + NIR_PASS_V(nir, d3d12_nir_invert_depth); + NIR_PASS_V(nir, nir_lower_clip_halfz); + NIR_PASS_V(nir, d3d12_lower_yflip); + } + NIR_PASS_V(nir, nir_lower_packed_ubo_loads); + NIR_PASS_V(nir, d3d12_lower_load_first_vertex); + NIR_PASS_V(nir, d3d12_lower_state_vars, shader); + NIR_PASS_V(nir, d3d12_lower_bool_input); + + struct nir_to_dxil_options opts = {}; + opts.interpolate_at_vertex = screen->have_load_at_vertex; + opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported; + opts.ubo_binding_offset = shader->has_default_ubo0 ? 0 : 1; + opts.provoking_vertex = key->fs.provoking_vertex; + + struct blob tmp; + if (!nir_to_dxil(nir, &opts, &tmp)) { + debug_printf("D3D12: nir_to_dxil failed\n"); + return NULL; + } + + // Non-ubo variables + nir_foreach_variable_with_modes(var, nir, nir_var_uniform) { + auto type = glsl_without_array(var->type); + if (glsl_type_is_sampler(type) && glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) { + unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1; + for (unsigned i = 0; i < count; ++i) { + shader->srv_bindings[shader->num_srv_bindings].index = var->data.binding + i; + shader->srv_bindings[shader->num_srv_bindings].binding = var->data.binding; + shader->srv_bindings[shader->num_srv_bindings].dimension = resource_dimension(glsl_get_sampler_dim(type)); + shader->num_srv_bindings++; + } + } + } + + // Ubo variables + if(nir->info.num_ubos) { + // Ignore state_vars ubo as it is bound as root constants + unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0); + for(unsigned i = opts.ubo_binding_offset; i < num_ubo_bindings; ++i) { + shader->cb_bindings[shader->num_cb_bindings++].binding = i; + } + } + ctx->validation_tools->validate_and_sign(&tmp); + + if (d3d12_debug & D3D12_DEBUG_DISASS) { + ctx->validation_tools->disassemble(&tmp); + } + + blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length); + + if (d3d12_debug & D3D12_DEBUG_DXIL) { + char buf[256]; + static int i; + snprintf(buf, sizeof(buf), "dump%02d.dxil", i++); + FILE *fp = fopen(buf, "wb"); + fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp); + fclose(fp); + fprintf(stderr, "wrote '%s'...\n", buf); + } + return shader; +} + +struct d3d12_selection_context { + struct d3d12_context *ctx; + const struct pipe_draw_info *dinfo; + bool needs_point_sprite_lowering; + bool needs_vertex_reordering; + unsigned provoking_vertex; + bool alternate_tri; + unsigned fill_mode_lowered; + unsigned cull_mode_lowered; + bool manual_depth_range; + unsigned missing_dual_src_outputs; + unsigned frag_result_color_lowering; +}; + +static unsigned +missing_dual_src_outputs(struct d3d12_context *ctx) +{ + if (!ctx->gfx_pipeline_state.blend->is_dual_src) + return 0; + + struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; + nir_shader *s = fs->initial; + + unsigned indices_seen = 0; + nir_foreach_function(function, s) { + if (function->impl) { + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_store_deref) + continue; + + nir_variable *var = nir_intrinsic_get_var(intr, 0); + if (var->data.mode != nir_var_shader_out || + (var->data.location != FRAG_RESULT_COLOR && + var->data.location != FRAG_RESULT_DATA0)) + continue; + + indices_seen |= 1u << var->data.index; + if ((indices_seen & 3) == 3) + return 0; + } + } + } + } + + return 3 & ~indices_seen; +} + +static unsigned +frag_result_color_lowering(struct d3d12_context *ctx) +{ + struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; + assert(fs); + + if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) + return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0; + + return 0; +} + +static bool +manual_depth_range(struct d3d12_context *ctx) +{ + if (!d3d12_need_zero_one_depth_range(ctx)) + return false; + + /** + * If we can't use the D3D12 zero-one depth-range, we might have to apply + * depth-range ourselves. + * + * Because we only need to override the depth-range to zero-one range in + * the case where we write frag-depth, we only need to apply manual + * depth-range to gl_FragCoord.z. + * + * No extra care is needed to be taken in the case where gl_FragDepth is + * written conditionally, because the GLSL 4.60 spec states: + * + * If a shader statically assigns a value to gl_FragDepth, and there + * is an execution path through the shader that does not set + * gl_FragDepth, then the value of the fragment’s depth may be + * undefined for executions of the shader that take that path. That + * is, if the set of linked fragment shaders statically contain a + * write to gl_FragDepth, then it is responsible for always writing + * it. + */ + + struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; + return fs && fs->initial->info.inputs_read & VARYING_BIT_POS; +} + +static bool +needs_edge_flag_fix(enum pipe_prim_type mode) +{ + return (mode == PIPE_PRIM_QUADS || + mode == PIPE_PRIM_QUAD_STRIP || + mode == PIPE_PRIM_POLYGON); +} + +static unsigned +fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo) +{ + struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX]; + + if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL && + !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant) || + ctx->gfx_pipeline_state.rast == NULL || + (dinfo->mode != PIPE_PRIM_TRIANGLES && + dinfo->mode != PIPE_PRIM_TRIANGLE_STRIP)) + return PIPE_POLYGON_MODE_FILL; + + /* D3D12 supports line mode (wireframe) but doesn't support edge flags */ + if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE && + ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) || + (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE && + ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) && + (vs->initial->info.outputs_written & VARYING_BIT_EDGE || + needs_edge_flag_fix(ctx->initial_api_prim))) + return PIPE_POLYGON_MODE_LINE; + + if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT) + return PIPE_POLYGON_MODE_POINT; + + return PIPE_POLYGON_MODE_FILL; +} + +static bool +needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo) +{ + struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX]; + struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; + + if (gs != NULL && !gs->is_gs_variant) { + /* There is an user GS; Check if it outputs points with PSIZE */ + return (gs->initial->info.gs.output_primitive == GL_POINTS && + gs->initial->info.outputs_written & VARYING_BIT_PSIZ); + } else { + /* No user GS; check if we are drawing wide points */ + return ((dinfo->mode == PIPE_PRIM_POINTS || + fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) && + (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 || + ctx->gfx_pipeline_state.rast->base.offset_point || + (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex && + vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) && + (vs->initial->info.outputs_written & VARYING_BIT_POS)); + } +} + +static unsigned +cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode) +{ + if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL && + !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant) || + ctx->gfx_pipeline_state.rast == NULL || + ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE) + return PIPE_FACE_NONE; + + return ctx->gfx_pipeline_state.rast->base.cull_face; +} + +static unsigned +get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate) +{ + struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX]; + struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; + struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_gs_variant ? gs : vs; + + /* Make sure GL prims match Gallium prims */ + STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS); + STATIC_ASSERT(GL_LINES == PIPE_PRIM_LINES); + STATIC_ASSERT(GL_LINE_STRIP == PIPE_PRIM_LINE_STRIP); + + enum pipe_prim_type mode; + switch (last_vertex_stage->stage) { + case PIPE_SHADER_GEOMETRY: + mode = (enum pipe_prim_type)last_vertex_stage->current->nir->info.gs.output_primitive; + break; + case PIPE_SHADER_VERTEX: + mode = sel_ctx->dinfo ? sel_ctx->dinfo->mode : PIPE_PRIM_TRIANGLES; + break; + default: + unreachable("Tesselation shaders are not supported"); + } + + bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast && + sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first; + *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) && + (!gs || gs->is_gs_variant || + gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min); + return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1; +} + +static bool +has_flat_varyings(struct d3d12_context *ctx) +{ + struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; + + if (!fs || !fs->current) + return false; + + nir_foreach_variable_with_modes(input, fs->current->nir, + nir_var_shader_in) { + if (input->data.interpolation == INTERP_MODE_FLAT) + return true; + } + + return false; +} + +static bool +needs_vertex_reordering(struct d3d12_selection_context *sel_ctx) +{ + struct d3d12_context *ctx = sel_ctx->ctx; + bool flat = has_flat_varyings(ctx); + bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0; + + if (fill_mode_lowered(ctx, sel_ctx->dinfo) != PIPE_POLYGON_MODE_FILL) + return false; + + /* TODO add support for line primitives */ + + /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex. + If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */ + if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex || + sel_ctx->alternate_tri)) + return true; + + /* When transform feedback is enabled and the output is alternating (triangle strip or triangle + strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This + only works when there is no flat shading involved. In that scenario, we don't care about + the provoking vertex. */ + if (xfb && !flat && sel_ctx->alternate_tri) { + sel_ctx->provoking_vertex = 0; + return true; + } + + return false; +} + +static nir_variable * +create_varying_from_info(nir_shader *nir, struct d3d12_varying_info *info, + unsigned slot, nir_variable_mode mode) +{ + nir_variable *var; + char tmp[100]; + + snprintf(tmp, ARRAY_SIZE(tmp), + mode == nir_var_shader_in ? "in_%d" : "out_%d", + info->vars[slot].driver_location); + var = nir_variable_create(nir, mode, info->vars[slot].type, tmp); + var->data.location = slot; + var->data.driver_location = info->vars[slot].driver_location; + var->data.interpolation = info->vars[slot].interpolation; + + return var; +} + +static void +fill_varyings(struct d3d12_varying_info *info, nir_shader *s, + nir_variable_mode modes, uint64_t mask) +{ + nir_foreach_variable_with_modes(var, s, modes) { + unsigned slot = var->data.location; + uint64_t slot_bit = BITFIELD64_BIT(slot); + + if (!(mask & slot_bit)) + continue; + info->vars[slot].driver_location = var->data.driver_location; + info->vars[slot].type = var->type; + info->vars[slot].interpolation = var->data.interpolation; + info->mask |= slot_bit; + } +} + +static void +fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs) +{ + if (!fs || !fs->current) + return; + + nir_foreach_variable_with_modes(input, fs->current->nir, + nir_var_shader_in) { + if (input->data.interpolation == INTERP_MODE_FLAT) + key->flat_varyings |= BITFIELD64_BIT(input->data.location); + } +} + +static void +validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx) +{ + struct d3d12_context *ctx = sel_ctx->ctx; + d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX]; + d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; + struct d3d12_gs_variant_key key = {0}; + bool variant_needed = false; + + d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; + + /* Nothing to do if there is a user geometry shader bound */ + if (gs != NULL && !gs->is_gs_variant) + return; + + /* Fill the geometry shader variant key */ + if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) { + key.fill_mode = sel_ctx->fill_mode_lowered; + key.cull_mode = sel_ctx->cull_mode_lowered; + key.has_front_face = (fs->initial->info.system_values_read & SYSTEM_BIT_FRONT_FACE) ? 1 : 0; + if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face) + key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0); + key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim); + fill_flat_varyings(&key, fs); + if (key.flat_varyings != 0) + key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first; + variant_needed = true; + } else if (sel_ctx->needs_point_sprite_lowering) { + key.passthrough = true; + variant_needed = true; + } else if (sel_ctx->needs_vertex_reordering) { + /* TODO support cases where flat shading (pv != 0) and xfb are enabled */ + key.provoking_vertex = sel_ctx->provoking_vertex; + key.alternate_tri = sel_ctx->alternate_tri; + variant_needed = true; + } + + if (variant_needed) { + fill_varyings(&key.varyings, vs->initial, nir_var_shader_out, + vs->initial->info.outputs_written); + } + + /* Check if the currently bound geometry shader variant is correct */ + if (gs && memcmp(&gs->gs_key, &key, sizeof(key)) == 0) + return; + + /* Find/create the proper variant and bind it */ + gs = variant_needed ? d3d12_get_gs_variant(ctx, &key) : NULL; + ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs; +} + +static bool +d3d12_compare_shader_keys(const d3d12_shader_key *expect, const d3d12_shader_key *have) +{ + assert(expect->stage == have->stage); + assert(expect); + assert(have); + + /* Because we only add varyings we check that a shader has at least the expected in- + * and outputs. */ + if (memcmp(&expect->required_varying_inputs, &have->required_varying_inputs, + sizeof(struct d3d12_varying_info)) || + memcmp(&expect->required_varying_outputs, &have->required_varying_outputs, + sizeof(struct d3d12_varying_info)) || + (expect->next_varying_inputs != have->next_varying_inputs) || + (expect->prev_varying_outputs != have->prev_varying_outputs)) + return false; + + if (expect->stage == PIPE_SHADER_GEOMETRY) { + if (expect->gs.writes_psize) { + if (!have->gs.writes_psize || + expect->gs.point_pos_stream_out != have->gs.point_pos_stream_out || + expect->gs.sprite_coord_enable != have->gs.sprite_coord_enable || + expect->gs.sprite_origin_upper_left != have->gs.sprite_origin_upper_left || + expect->gs.point_size_per_vertex != have->gs.point_size_per_vertex) + return false; + } else if (have->gs.writes_psize) { + return false; + } + if (expect->gs.primitive_id != have->gs.primitive_id || + expect->gs.triangle_strip != have->gs.triangle_strip) + return false; + } else if (expect->stage == PIPE_SHADER_FRAGMENT) { + if (expect->fs.frag_result_color_lowering != have->fs.frag_result_color_lowering || + expect->fs.manual_depth_range != have->fs.manual_depth_range || + expect->fs.polygon_stipple != have->fs.polygon_stipple || + expect->fs.cast_to_uint != have->fs.cast_to_uint || + expect->fs.cast_to_int != have->fs.cast_to_int) + return false; + } + + if (expect->tex_saturate_s != have->tex_saturate_s || + expect->tex_saturate_r != have->tex_saturate_r || + expect->tex_saturate_t != have->tex_saturate_t) + return false; + + if (expect->samples_int_textures != have->samples_int_textures) + return false; + + if (expect->n_texture_states != have->n_texture_states) + return false; + + if (memcmp(expect->tex_wrap_states, have->tex_wrap_states, + expect->n_texture_states * sizeof(dxil_wrap_sampler_state))) + return false; + + if (memcmp(expect->swizzle_state, have->swizzle_state, + expect->n_texture_states * sizeof(dxil_texture_swizzle_state))) + return false; + + if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs, + expect->n_texture_states * sizeof(enum compare_func))) + return false; + + if (expect->invert_depth != have->invert_depth) + return false; + + if (expect->stage == PIPE_SHADER_VERTEX) { + if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation) + return false; + + if (expect->vs.needs_format_emulation) { + if (memcmp(expect->vs.format_conversion, have->vs.format_conversion, + PIPE_MAX_ATTRIBS * sizeof (enum pipe_format))) + return false; + } + } + + if (expect->fs.provoking_vertex != have->fs.provoking_vertex) + return false; + + return true; +} + +static void +d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx, + d3d12_shader_key *key, d3d12_shader_selector *sel, + d3d12_shader_selector *prev, d3d12_shader_selector *next) +{ + pipe_shader_type stage = sel->stage; + + uint64_t system_generated_in_values = + VARYING_BIT_PNTC | + VARYING_BIT_PRIMITIVE_ID; + + uint64_t system_out_values = + VARYING_BIT_CLIP_DIST0 | + VARYING_BIT_CLIP_DIST1; + + memset(key, 0, sizeof(d3d12_shader_key)); + key->stage = stage; + + if (prev) { + /* We require as inputs what the previous stage has written, + * except certain system values */ + if (stage == PIPE_SHADER_FRAGMENT || stage == PIPE_SHADER_GEOMETRY) + system_out_values |= VARYING_BIT_POS; + if (stage == PIPE_SHADER_FRAGMENT) + system_out_values |= VARYING_BIT_PSIZ; + uint64_t mask = prev->current->nir->info.outputs_written & ~system_out_values; + fill_varyings(&key->required_varying_inputs, prev->current->nir, + nir_var_shader_out, mask); + key->prev_varying_outputs = prev->current->nir->info.outputs_written; + + /* Set the provoking vertex based on the previous shader output. Only set the + * key value if the driver actually supports changing the provoking vertex though */ + if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast && + !sel_ctx->needs_vertex_reordering && + d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex) + key->fs.provoking_vertex = sel_ctx->provoking_vertex; + } + + /* We require as outputs what the next stage reads, + * except certain system values */ + if (next) { + if (!next->is_gs_variant) { + if (stage == PIPE_SHADER_VERTEX) + system_generated_in_values |= VARYING_BIT_POS; + uint64_t mask = next->current->nir->info.inputs_read & ~system_generated_in_values; + fill_varyings(&key->required_varying_outputs, next->current->nir, + nir_var_shader_in, mask); + } + key->next_varying_inputs = next->current->nir->info.inputs_read; + } + + if (stage == PIPE_SHADER_GEOMETRY || + (stage == PIPE_SHADER_VERTEX && (!next || next->stage != PIPE_SHADER_GEOMETRY))) { + key->last_vertex_processing_stage = 1; + key->invert_depth = sel_ctx->ctx->reverse_depth_range; + if (sel_ctx->ctx->pstipple.enabled) + key->next_varying_inputs |= VARYING_BIT_POS; + } + + if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) { + struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base; + if (sel_ctx->needs_point_sprite_lowering) { + key->gs.writes_psize = 1; + key->gs.point_size_per_vertex = rast->point_size_per_vertex; + key->gs.sprite_coord_enable = rast->sprite_coord_enable; + key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT); + if (sel_ctx->ctx->flip_y < 0) + key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left; + key->gs.aa_point = rast->point_smooth; + key->gs.stream_output_factor = 6; + } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) { + key->gs.stream_output_factor = 2; + } else if (sel_ctx->needs_vertex_reordering && !sel->is_gs_variant) { + key->gs.triangle_strip = 1; + } + + if (sel->is_gs_variant && next && next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) + key->gs.primitive_id = 1; + } else if (stage == PIPE_SHADER_FRAGMENT) { + key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs; + key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering; + key->fs.manual_depth_range = sel_ctx->manual_depth_range; + key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled; + if (sel_ctx->ctx->gfx_pipeline_state.blend && + sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable && + !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) { + key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format); + key->fs.cast_to_int = !key->fs.cast_to_uint; + } + } + + if (sel->samples_int_textures) { + key->samples_int_textures = sel->samples_int_textures; + key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage]; + /* Copy only states with integer textures */ + for(int i = 0; i < key->n_texture_states; ++i) { + auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i]; + if (wrap_state.is_int_sampler) { + memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state)); + key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i]; + } + } + } + + for (int i = 0; i < sel_ctx->ctx->num_samplers[stage]; ++i) { + if (sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST) + continue; + + if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP) + key->tex_saturate_r |= 1 << i; + if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP) + key->tex_saturate_s |= 1 << i; + if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP) + key->tex_saturate_t |= 1 << i; + } + + if (sel->compare_with_lod_bias_grad) { + key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage]; + memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage], + key->n_texture_states * sizeof(enum compare_func)); + memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage], + key->n_texture_states * sizeof(dxil_texture_swizzle_state)); + } + + if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) { + key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation; + if (key->vs.needs_format_emulation) { + memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion, + sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format)); + } + } + + if (stage == PIPE_SHADER_FRAGMENT && + sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] && + sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant && + sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) { + key->fs.remap_front_facing = 1; + } +} + +static void +select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel, + d3d12_shader_selector *prev, d3d12_shader_selector *next) +{ + struct d3d12_context *ctx = sel_ctx->ctx; + d3d12_shader_key key; + nir_shader *new_nir_variant; + unsigned pstipple_binding = UINT32_MAX; + + d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next); + + /* Check for an existing variant */ + for (d3d12_shader *variant = sel->first; variant; + variant = variant->next_variant) { + + if (d3d12_compare_shader_keys(&key, &variant->key)) { + sel->current = variant; + return; + } + } + + /* Clone the NIR shader */ + new_nir_variant = nir_shader_clone(sel, sel->initial); + + /* Apply any needed lowering passes */ + if (key.gs.writes_psize) { + NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite, + !key.gs.sprite_origin_upper_left, + key.gs.point_size_per_vertex, + key.gs.sprite_coord_enable, + key.next_varying_inputs); + + nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant); + nir_shader_gather_info(new_nir_variant, impl); + } + + if (key.gs.primitive_id) { + NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id); + + nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant); + nir_shader_gather_info(new_nir_variant, impl); + } + + if (key.gs.triangle_strip) + NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip); + + if (key.fs.polygon_stipple) { + NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs, + &pstipple_binding, 0, false); + + nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant); + nir_shader_gather_info(new_nir_variant, impl); + } + + if (key.fs.remap_front_facing) { + d3d12_forward_front_face(new_nir_variant); + + nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant); + nir_shader_gather_info(new_nir_variant, impl); + } + + if (key.fs.missing_dual_src_outputs) { + NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target, + key.fs.missing_dual_src_outputs); + } else if (key.fs.frag_result_color_lowering) { + NIR_PASS_V(new_nir_variant, d3d12_lower_frag_result, + key.fs.frag_result_color_lowering); + } + + if (key.fs.manual_depth_range) + NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range); + + if (sel->compare_with_lod_bias_grad) + NIR_PASS_V(new_nir_variant, d3d12_lower_sample_tex_compare, key.n_texture_states, + key.sampler_compare_funcs, key.swizzle_state); + + if (key.fs.cast_to_uint) + NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false); + if (key.fs.cast_to_int) + NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true); + + { + struct nir_lower_tex_options tex_options = { }; + tex_options.lower_txp = ~0u; /* No equivalent for textureProj */ + tex_options.lower_rect = true; + tex_options.lower_rect_offset = true; + tex_options.saturate_s = key.tex_saturate_s; + tex_options.saturate_r = key.tex_saturate_r; + tex_options.saturate_t = key.tex_saturate_t; + + NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options); + } + + /* Add the needed in and outputs, and re-sort */ + uint64_t mask = key.required_varying_inputs.mask & ~new_nir_variant->info.inputs_read; + + if (prev) { + while (mask) { + int slot = u_bit_scan64(&mask); + create_varying_from_info(new_nir_variant, &key.required_varying_inputs, slot, nir_var_shader_in); + } + d3d12_reassign_driver_locations(new_nir_variant, nir_var_shader_in, + key.prev_varying_outputs); + } + + mask = key.required_varying_outputs.mask & ~new_nir_variant->info.outputs_written; + + if (next) { + while (mask) { + int slot = u_bit_scan64(&mask); + create_varying_from_info(new_nir_variant, &key.required_varying_outputs, slot, nir_var_shader_out); + } + d3d12_reassign_driver_locations(new_nir_variant, nir_var_shader_out, + key.next_varying_inputs); + } + + d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant); + assert(new_variant); + + /* keep track of polygon stipple texture binding */ + new_variant->pstipple_binding = pstipple_binding; + + /* prepend the new shader in the selector chain and pick it */ + new_variant->next_variant = sel->first; + sel->current = sel->first = new_variant; +} + +static d3d12_shader_selector * +get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current) +{ + /* No TESS_CTRL or TESS_EVAL yet */ + + switch (current) { + case PIPE_SHADER_VERTEX: + return NULL; + case PIPE_SHADER_FRAGMENT: + if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY]) + return ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; + /* fallthrough */ + case PIPE_SHADER_GEOMETRY: + return ctx->gfx_stages[PIPE_SHADER_VERTEX]; + default: + unreachable("shader type not supported"); + } +} + +static d3d12_shader_selector * +get_next_shader(struct d3d12_context *ctx, pipe_shader_type current) +{ + /* No TESS_CTRL or TESS_EVAL yet */ + + switch (current) { + case PIPE_SHADER_VERTEX: + if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY]) + return ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; + /* fallthrough */ + case PIPE_SHADER_GEOMETRY: + return ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; + case PIPE_SHADER_FRAGMENT: + return NULL; + default: + unreachable("shader type not supported"); + } +} + +enum tex_scan_flags { + TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0, + TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1, + TEX_SCAN_ALL_FLAGS = (1 << 2) - 1 +}; + +static unsigned +scan_texture_use(nir_shader *nir) +{ + unsigned result = 0; + nir_foreach_function(func, nir) { + nir_foreach_block(block, func->impl) { + nir_foreach_instr(instr, block) { + if (instr->type == nir_instr_type_tex) { + auto tex = nir_instr_as_tex(instr); + switch (tex->op) { + case nir_texop_txb: + case nir_texop_txl: + case nir_texop_txd: + if (tex->is_shadow) + result |= TEX_CMP_WITH_LOD_BIAS_GRAD; + /* fallthrough */ + case nir_texop_tex: + if (tex->dest_type & (nir_type_int | nir_type_uint)) + result |= TEX_SAMPLE_INTEGER_TEXTURE; + default: + ; + } + } + if (TEX_SCAN_ALL_FLAGS == result) + return result; + } + } + } + return result; +} + +static uint64_t +update_so_info(struct pipe_stream_output_info *so_info, + uint64_t outputs_written) +{ + uint64_t so_outputs = 0; + uint8_t reverse_map[64] = {0}; + unsigned slot = 0; + + while (outputs_written) + reverse_map[slot++] = u_bit_scan64(&outputs_written); + + for (unsigned i = 0; i < so_info->num_outputs; i++) { + struct pipe_stream_output *output = &so_info->output[i]; + + /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ + output->register_index = reverse_map[output->register_index]; + + so_outputs |= 1ull << output->register_index; + } + + return so_outputs; +} + +struct d3d12_shader_selector * +d3d12_create_shader(struct d3d12_context *ctx, + pipe_shader_type stage, + const struct pipe_shader_state *shader) +{ + struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector); + sel->stage = stage; + + struct nir_shader *nir = NULL; + + if (shader->type == PIPE_SHADER_IR_NIR) { + nir = (nir_shader *)shader->ir.nir; + } else { + assert(shader->type == PIPE_SHADER_IR_TGSI); + nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false); + } + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + unsigned tex_scan_result = scan_texture_use(nir); + sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0; + sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0; + + memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info)); + update_so_info(&sel->so_info, nir->info.outputs_written); + + assert(nir != NULL); + d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage); + d3d12_shader_selector *next = get_next_shader(ctx, sel->stage); + + uint64_t in_mask = nir->info.stage == MESA_SHADER_VERTEX ? + 0 : VARYING_BIT_PRIMITIVE_ID; + + uint64_t out_mask = nir->info.stage == MESA_SHADER_FRAGMENT ? + (1ull << FRAG_RESULT_STENCIL) : + VARYING_BIT_PRIMITIVE_ID; + + d3d12_fix_io_uint_type(nir, in_mask, out_mask); + + if (nir->info.stage != MESA_SHADER_VERTEX) + nir->info.inputs_read = + d3d12_reassign_driver_locations(nir, nir_var_shader_in, + prev ? prev->current->nir->info.outputs_written : 0); + else + nir->info.inputs_read = d3d12_sort_by_driver_location(nir, nir_var_shader_in); + + if (nir->info.stage != MESA_SHADER_FRAGMENT) { + nir->info.outputs_written = + d3d12_reassign_driver_locations(nir, nir_var_shader_out, + next ? next->current->nir->info.inputs_read : 0); + } else { + NIR_PASS_V(nir, nir_lower_fragcoord_wtrans); + d3d12_sort_ps_outputs(nir); + } + + /* Integer cube maps are not supported in DirectX because sampling is not supported + * on integer textures and TextureLoad is not supported for cube maps, so we have to + * lower integer cube maps to be handled like 2D textures arrays*/ + NIR_PASS_V(nir, d3d12_lower_int_cubmap_to_array); + + /* Keep this initial shader as the blue print for possible variants */ + sel->initial = nir; + + /* + * We must compile some shader here, because if the previous or a next shaders exists later + * when the shaders are bound, then the key evaluation in the shader selector will access + * the current variant of these prev and next shader, and we can only assign + * a current variant when it has been successfully compiled. + * + * For shaders that require lowering because certain instructions are not available + * and their emulation is state depended (like sampling an integer texture that must be + * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD), + * we must go through the shader selector here to create a compilable variant. + * For shaders that are not depended on the state this is just compiling the original + * shader. + * + * TODO: get rid of having to compiling the shader here if it can be forseen that it will + * be thrown away (i.e. it depends on states that are likely to change before the shader is + * used for the first time) + */ + struct d3d12_selection_context sel_ctx = {0}; + sel_ctx.ctx = ctx; + select_shader_variant(&sel_ctx, sel, prev, next); + + if (!sel->current) { + ralloc_free(sel); + return NULL; + } + + return sel; +} + +void +d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo) +{ + static unsigned order[] = {PIPE_SHADER_VERTEX, PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT}; + struct d3d12_selection_context sel_ctx; + + sel_ctx.ctx = ctx; + sel_ctx.dinfo = dinfo; + sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo); + sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo); + sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered); + sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri); + sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx); + sel_ctx.missing_dual_src_outputs = missing_dual_src_outputs(ctx); + sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx); + sel_ctx.manual_depth_range = manual_depth_range(ctx); + + validate_geometry_shader_variant(&sel_ctx); + + for (int i = 0; i < ARRAY_SIZE(order); ++i) { + auto sel = ctx->gfx_stages[order[i]]; + if (!sel) + continue; + + d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage); + d3d12_shader_selector *next = get_next_shader(ctx, sel->stage); + + select_shader_variant(&sel_ctx, sel, prev, next); + } +} + +void +d3d12_shader_free(struct d3d12_shader_selector *sel) +{ + auto shader = sel->first; + while (shader) { + free(shader->bytecode); + shader = shader->next_variant; + } + ralloc_free(sel->initial); + ralloc_free(sel); +} + +// Used to get path to self +extern "C" extern IMAGE_DOS_HEADER __ImageBase; + +void d3d12_validation_tools::load_dxil_dll() +{ + if (!dxil_module.load("dxil.dll")) { + char selfPath[MAX_PATH] = ""; + uint32_t pathSize = GetModuleFileNameA((HINSTANCE)&__ImageBase, selfPath, sizeof(selfPath)); + if (pathSize == 0 || pathSize == sizeof(selfPath)) { + debug_printf("D3D12: Unable to get path to self"); + return; + } + + auto lastSlash = strrchr(selfPath, '\\'); + if (!lastSlash) { + debug_printf("D3D12: Unable to get path to self"); + return; + } + + *(lastSlash + 1) = '\0'; + if (strcat_s(selfPath, "dxil.dll") != 0) { + debug_printf("D3D12: Unable to get path to dxil.dll next to self"); + return; + } + + dxil_module.load(selfPath); + } +} + +d3d12_validation_tools::d3d12_validation_tools() +{ + load_dxil_dll(); + DxcCreateInstanceProc dxil_create_func = (DxcCreateInstanceProc)GetProcAddress(dxil_module, "DxcCreateInstance"); + assert(dxil_create_func); + + HRESULT hr = dxil_create_func(CLSID_DxcValidator, IID_PPV_ARGS(&validator)); + if (FAILED(hr)) { + debug_printf("D3D12: Unable to create validator\n"); + } + + DxcCreateInstanceProc compiler_create_func = nullptr; + if(dxc_compiler_module.load("dxcompiler.dll")) + compiler_create_func = (DxcCreateInstanceProc)GetProcAddress(dxc_compiler_module, "DxcCreateInstance"); + + if (compiler_create_func) { + hr = compiler_create_func(CLSID_DxcLibrary, IID_PPV_ARGS(&library)); + if (FAILED(hr)) { + debug_printf("D3D12: Unable to create library instance: %x\n", hr); + } + + if (d3d12_debug & D3D12_DEBUG_DISASS) { + hr = compiler_create_func(CLSID_DxcCompiler, IID_PPV_ARGS(&compiler)); + if (FAILED(hr)) { + debug_printf("D3D12: Unable to create compiler instance\n"); + } + } + } else if (d3d12_debug & D3D12_DEBUG_DISASS) { + debug_printf("D3D12: Disassembly requested but compiler couldn't be loaded\n"); + } +} + +d3d12_validation_tools::HModule::HModule(): + module(0) +{ +} + +d3d12_validation_tools::HModule::~HModule() +{ + if (module) + ::FreeLibrary(module); +} + +inline +d3d12_validation_tools::HModule::operator HMODULE () const +{ + return module; +} + +bool +d3d12_validation_tools::HModule::load(LPCSTR file_name) +{ + module = ::LoadLibrary(file_name); + return module != nullptr; +} + + +class ShaderBlob : public IDxcBlob { +public: + ShaderBlob(blob* data) : m_data(data) {} + + LPVOID STDMETHODCALLTYPE GetBufferPointer(void) override { return m_data->data; } + + SIZE_T STDMETHODCALLTYPE GetBufferSize() override { return m_data->size; } + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID, void**) override { return E_NOINTERFACE; } + + ULONG STDMETHODCALLTYPE AddRef() override { return 1; } + + ULONG STDMETHODCALLTYPE Release() override { return 0; } + + blob* m_data; +}; + +bool d3d12_validation_tools::validate_and_sign(struct blob *dxil) +{ + ShaderBlob source(dxil); + + ComPtr result; + if (!validator) + return false; + + validator->Validate(&source, DxcValidatorFlags_InPlaceEdit, &result); + HRESULT validationStatus; + result->GetStatus(&validationStatus); + if (FAILED(validationStatus) && library) { + ComPtr printBlob, printBlobUtf8; + result->GetErrorBuffer(&printBlob); + library->GetBlobAsUtf8(printBlob.Get(), printBlobUtf8.GetAddressOf()); + + char *errorString; + if (printBlobUtf8) { + errorString = reinterpret_cast(printBlobUtf8->GetBufferPointer()); + } + + errorString[printBlobUtf8->GetBufferSize() - 1] = 0; + debug_printf("== VALIDATION ERROR =============================================\n%s\n" + "== END ==========================================================\n", + errorString); + + return false; + } + return true; + +} + +void d3d12_validation_tools::disassemble(struct blob *dxil) +{ + if (!compiler) { + fprintf(stderr, "D3D12: No Disassembler\n"); + return; + } + ShaderBlob source(dxil); + IDxcBlobEncoding* pDisassembly = nullptr; + + if (FAILED(compiler->Disassemble(&source, &pDisassembly))) { + fprintf(stderr, "D3D12: Disassembler failed\n"); + return; + } + + ComPtr dissassably(pDisassembly); + ComPtr blobUtf8; + library->GetBlobAsUtf8(pDisassembly, blobUtf8.GetAddressOf()); + if (!blobUtf8) { + fprintf(stderr, "D3D12: Unable to get utf8 encoding\n"); + return; + } + + char *disassembly = reinterpret_cast(blobUtf8->GetBufferPointer()); + disassembly[blobUtf8->GetBufferSize() - 1] = 0; + + fprintf(stderr, "== BEGIN SHADER ============================================\n" + "%s\n" + "== END SHADER ==============================================\n", + disassembly); +} + +/* Sort io values so that first come normal varyings, + * then system values, and then system generated values. + */ +static void insert_sorted(struct exec_list *var_list, nir_variable *new_var) +{ + nir_foreach_variable_in_list(var, var_list) { + if (var->data.driver_location > new_var->data.driver_location || + (var->data.driver_location == new_var->data.driver_location && + var->data.location > new_var->data.location)) { + exec_node_insert_node_before(&var->node, &new_var->node); + return; + } + } + exec_list_push_tail(var_list, &new_var->node); +} + +/* Order varyings according to driver location */ +uint64_t +d3d12_sort_by_driver_location(nir_shader *s, nir_variable_mode modes) +{ + uint64_t result = 0; + struct exec_list new_list; + exec_list_make_empty(&new_list); + + nir_foreach_variable_with_modes_safe(var, s, modes) { + exec_node_remove(&var->node); + insert_sorted(&new_list, var); + result |= 1ull << var->data.location; + } + exec_list_append(&s->variables, &new_list); + return result; +} + +/* Sort PS outputs so that color outputs come first */ +void +d3d12_sort_ps_outputs(nir_shader *s) +{ + struct exec_list new_list; + exec_list_make_empty(&new_list); + + nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out) { + exec_node_remove(&var->node); + /* We use the driver_location here to avoid introducing a new + * struct or member variable here. The true, updated driver location + * will be written below, after sorting */ + switch (var->data.location) { + case FRAG_RESULT_DEPTH: + var->data.driver_location = 1; + break; + case FRAG_RESULT_STENCIL: + var->data.driver_location = 2; + break; + case FRAG_RESULT_SAMPLE_MASK: + var->data.driver_location = 3; + break; + default: + var->data.driver_location = 0; + } + insert_sorted(&new_list, var); + } + exec_list_append(&s->variables, &new_list); + + unsigned driver_loc = 0; + nir_foreach_variable_with_modes(var, s, nir_var_shader_out) { + var->data.driver_location = driver_loc++; + } +} + +/* Order between stage values so that normal varyings come first, + * then sysvalues and then system generated values. + */ +uint64_t +d3d12_reassign_driver_locations(nir_shader *s, nir_variable_mode modes, + uint64_t other_stage_mask) +{ + struct exec_list new_list; + exec_list_make_empty(&new_list); + + uint64_t result = 0; + nir_foreach_variable_with_modes_safe(var, s, modes) { + exec_node_remove(&var->node); + /* We use the driver_location here to avoid introducing a new + * struct or member variable here. The true, updated driver location + * will be written below, after sorting */ + var->data.driver_location = nir_var_to_dxil_sysvalue_type(var, other_stage_mask); + insert_sorted(&new_list, var); + } + exec_list_append(&s->variables, &new_list); + + unsigned driver_loc = 0; + nir_foreach_variable_with_modes(var, s, modes) { + result |= 1ull << var->data.location; + var->data.driver_location = driver_loc++; + } + return result; +} diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.h b/src/gallium/drivers/d3d12/d3d12_compiler.h new file mode 100644 index 00000000000..d8382012e9b --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_compiler.h @@ -0,0 +1,208 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_COMPILER_H +#define D3D12_COMPILER_H + +#include "d3d12_context.h" + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "compiler/shader_info.h" +#include "program/prog_statevars.h" + +#include "nir.h" + +struct pipe_screen; + +#ifdef __cplusplus +extern "C" { +#endif + +struct d3d12_validation_tools *d3d12_validator_create(); + +void d3d12_validator_destroy(struct d3d12_validation_tools *validator); + +const void * +d3d12_get_compiler_options(struct pipe_screen *screen, + enum pipe_shader_ir ir, + enum pipe_shader_type shader); + +struct d3d12_varying_info { + struct { + const struct glsl_type *type; + unsigned interpolation:3; // INTERP_MODE_COUNT = 5 + unsigned driver_location:6; // VARYING_SLOT_MAX = 64 + } vars[VARYING_SLOT_MAX]; + uint64_t mask; +}; + +struct d3d12_shader_key { + enum pipe_shader_type stage; + + struct d3d12_varying_info required_varying_inputs; + struct d3d12_varying_info required_varying_outputs; + uint64_t next_varying_inputs; + uint64_t prev_varying_outputs; + unsigned last_vertex_processing_stage : 1; + unsigned invert_depth : 1; + unsigned samples_int_textures : 1; + unsigned tex_saturate_s : PIPE_MAX_SAMPLERS; + unsigned tex_saturate_r : PIPE_MAX_SAMPLERS; + unsigned tex_saturate_t : PIPE_MAX_SAMPLERS; + + struct { + unsigned needs_format_emulation:1; + enum pipe_format format_conversion[PIPE_MAX_ATTRIBS]; + } vs; + + struct { + unsigned sprite_coord_enable:24; + unsigned sprite_origin_upper_left:1; + unsigned point_pos_stream_out:1; + unsigned writes_psize:1; + unsigned point_size_per_vertex:1; + unsigned aa_point:1; + unsigned stream_output_factor:3; + unsigned primitive_id:1; + unsigned triangle_strip:1; + } gs; + + struct { + unsigned missing_dual_src_outputs : 2; + unsigned frag_result_color_lowering : 4; + unsigned cast_to_uint : 1; + unsigned cast_to_int : 1; + unsigned provoking_vertex : 2; + unsigned manual_depth_range : 1; + unsigned polygon_stipple : 1; + unsigned remap_front_facing : 1; + } fs; + + int n_texture_states; + dxil_wrap_sampler_state tex_wrap_states[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + dxil_texture_swizzle_state swizzle_state[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + enum compare_func sampler_compare_funcs[PIPE_MAX_SHADER_SAMPLER_VIEWS]; +}; + +struct d3d12_shader { + void *bytecode; + size_t bytecode_length; + + nir_shader *nir; + + struct { + unsigned binding; + } cb_bindings[PIPE_MAX_CONSTANT_BUFFERS]; + size_t num_cb_bindings; + + struct { + enum d3d12_state_var var; + unsigned offset; + } state_vars[D3D12_MAX_STATE_VARS]; + unsigned num_state_vars; + size_t state_vars_size; + bool state_vars_used; + + struct { + int index; + int binding; + uint32_t dimension; + } srv_bindings[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + size_t num_srv_bindings; + + bool has_default_ubo0; + unsigned pstipple_binding; + + struct d3d12_shader_key key; + struct d3d12_shader *next_variant; +}; + +struct d3d12_gs_variant_key +{ + unsigned passthrough:1; + unsigned provoking_vertex:3; + unsigned alternate_tri:1; + unsigned fill_mode:2; + unsigned cull_mode:2; + unsigned has_front_face:1; + unsigned front_ccw:1; + unsigned edge_flag_fix:1; + unsigned flatshade_first:1; + uint64_t flat_varyings; + struct d3d12_varying_info varyings; +}; + +struct d3d12_shader_selector { + enum pipe_shader_type stage; + nir_shader *initial; + struct d3d12_shader *first; + struct d3d12_shader *current; + + struct pipe_stream_output_info so_info; + + unsigned samples_int_textures:1; + unsigned compare_with_lod_bias_grad:1; + + bool is_gs_variant; + struct d3d12_gs_variant_key gs_key; +}; + + +struct d3d12_shader_selector * +d3d12_create_shader(struct d3d12_context *ctx, + enum pipe_shader_type stage, + const struct pipe_shader_state *shader); + +void +d3d12_shader_free(struct d3d12_shader_selector *shader); + +void +d3d12_select_shader_variants(struct d3d12_context *ctx, + const struct pipe_draw_info *dinfo); + +void +d3d12_gs_variant_cache_init(struct d3d12_context *ctx); + +void +d3d12_gs_variant_cache_destroy(struct d3d12_context *ctx); + +struct d3d12_shader_selector * +d3d12_get_gs_variant(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key); + +uint64_t +d3d12_reassign_driver_locations(nir_shader *s, nir_variable_mode modes, + uint64_t other_stage_mask); + +uint64_t +d3d12_sort_by_driver_location(nir_shader *s, nir_variable_mode modes); + +void +d3d12_sort_ps_outputs(nir_shader *s); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_context.cpp b/src/gallium/drivers/d3d12/d3d12_context.cpp new file mode 100644 index 00000000000..aa7c8edb639 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_context.cpp @@ -0,0 +1,2052 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_blit.h" +#include "d3d12_context.h" +#include "d3d12_compiler.h" +#include "d3d12_debug.h" +#include "d3d12_fence.h" +#include "d3d12_format.h" +#include "d3d12_query.h" +#include "d3d12_resource.h" +#include "d3d12_root_signature.h" +#include "d3d12_screen.h" +#include "d3d12_surface.h" + +#include "util/u_blitter.h" +#include "util/u_dual_blend.h" +#include "util/u_framebuffer.h" +#include "util/u_helpers.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" +#include "util/u_pstipple.h" +#include "nir_to_dxil.h" + +#include "D3D12ResourceState.h" + +extern "C" { +#include "indices/u_primconvert.h" +} + +#include + +static void +d3d12_context_destroy(struct pipe_context *pctx) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + d3d12_validator_destroy(ctx->validation_tools); + + if (ctx->timestamp_query) + pctx->destroy_query(pctx, ctx->timestamp_query); + + util_blitter_destroy(ctx->blitter); + d3d12_end_batch(ctx, d3d12_current_batch(ctx)); + for (int i = 0; i < ARRAY_SIZE(ctx->batches); ++i) + d3d12_destroy_batch(ctx, &ctx->batches[i]); + ctx->cmdlist->Release(); + ctx->cmdqueue_fence->Release(); + d3d12_descriptor_pool_free(ctx->rtv_pool); + d3d12_descriptor_pool_free(ctx->dsv_pool); + d3d12_descriptor_pool_free(ctx->sampler_pool); + d3d12_descriptor_pool_free(ctx->view_pool); + util_primconvert_destroy(ctx->primconvert); + slab_destroy_child(&ctx->transfer_pool); + d3d12_gs_variant_cache_destroy(ctx); + d3d12_gfx_pipeline_state_cache_destroy(ctx); + d3d12_root_signature_cache_destroy(ctx); + + u_suballocator_destroy(ctx->query_allocator); + + if (pctx->stream_uploader) + u_upload_destroy(pctx->stream_uploader); + if (pctx->const_uploader) + u_upload_destroy(pctx->const_uploader); + + delete ctx->resource_state_manager; + + FREE(ctx); +} + +static void * +d3d12_create_vertex_elements_state(struct pipe_context *pctx, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct d3d12_vertex_elements_state *cso = CALLOC_STRUCT(d3d12_vertex_elements_state); + if (!cso) + return NULL; + + for (unsigned i = 0; i < num_elements; ++i) { + cso->elements[i].SemanticName = dxil_vs_attr_index_to_name(i); + cso->elements[i].SemanticIndex = 0; + + enum pipe_format format_helper = d3d12_emulated_vtx_format(elements[i].src_format); + bool needs_emulation = format_helper != elements[i].src_format; + cso->needs_format_emulation |= needs_emulation; + cso->format_conversion[i] = needs_emulation ? elements[i].src_format : PIPE_FORMAT_NONE; + + cso->elements[i].Format = d3d12_get_format(format_helper); + assert(cso->elements[i].Format != DXGI_FORMAT_UNKNOWN); + cso->elements[i].InputSlot = elements[i].vertex_buffer_index; + cso->elements[i].AlignedByteOffset = elements[i].src_offset; + + if (elements[i].instance_divisor) { + cso->elements[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; + cso->elements[i].InstanceDataStepRate = elements[i].instance_divisor; + } else { + cso->elements[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + cso->elements[i].InstanceDataStepRate = 0; + } + } + + cso->num_elements = num_elements; + return cso; +} + +static void +d3d12_bind_vertex_elements_state(struct pipe_context *pctx, + void *ve) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + ctx->gfx_pipeline_state.ves = (struct d3d12_vertex_elements_state *)ve; + ctx->state_dirty |= D3D12_DIRTY_VERTEX_ELEMENTS; +} + +static void +d3d12_delete_vertex_elements_state(struct pipe_context *pctx, + void *ve) +{ + FREE(ve); +} + +static D3D12_BLEND +blend_factor_rgb(enum pipe_blendfactor factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: return D3D12_BLEND_ZERO; + case PIPE_BLENDFACTOR_ONE: return D3D12_BLEND_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: return D3D12_BLEND_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: return D3D12_BLEND_DEST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; + case PIPE_BLENDFACTOR_CONST_COLOR: return D3D12_BLEND_BLEND_FACTOR; + case PIPE_BLENDFACTOR_SRC1_COLOR: return D3D12_BLEND_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: return D3D12_BLEND_INV_BLEND_FACTOR; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return D3D12_BLEND_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA; + case PIPE_BLENDFACTOR_CONST_ALPHA: return D3D12_BLEND_BLEND_FACTOR; /* Doesn't exist in D3D12 */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return D3D12_BLEND_INV_BLEND_FACTOR; /* Doesn't exist in D3D12 */ + } + unreachable("unexpected blend factor"); +} + +static D3D12_BLEND +blend_factor_alpha(enum pipe_blendfactor factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: return D3D12_BLEND_ZERO; + case PIPE_BLENDFACTOR_ONE: return D3D12_BLEND_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_CONST_ALPHA: return D3D12_BLEND_BLEND_FACTOR; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return D3D12_BLEND_INV_BLEND_FACTOR; + } + unreachable("unexpected blend factor"); +} + +static unsigned +need_blend_factor_rgb(enum pipe_blendfactor factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return D3D12_BLEND_FACTOR_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return D3D12_BLEND_FACTOR_ALPHA; + + default: + return D3D12_BLEND_FACTOR_NONE; + } +} + +static unsigned +need_blend_factor_alpha(enum pipe_blendfactor factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return D3D12_BLEND_FACTOR_ANY; + + default: + return D3D12_BLEND_FACTOR_NONE; + } +} + +static D3D12_BLEND_OP +blend_op(enum pipe_blend_func func) +{ + switch (func) { + case PIPE_BLEND_ADD: return D3D12_BLEND_OP_ADD; + case PIPE_BLEND_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT; + case PIPE_BLEND_MIN: return D3D12_BLEND_OP_MIN; + case PIPE_BLEND_MAX: return D3D12_BLEND_OP_MAX; + } + unreachable("unexpected blend function"); +} + +static D3D12_COMPARISON_FUNC +compare_op(enum pipe_compare_func op) +{ + switch (op) { + case PIPE_FUNC_NEVER: return D3D12_COMPARISON_FUNC_NEVER; + case PIPE_FUNC_LESS: return D3D12_COMPARISON_FUNC_LESS; + case PIPE_FUNC_EQUAL: return D3D12_COMPARISON_FUNC_EQUAL; + case PIPE_FUNC_LEQUAL: return D3D12_COMPARISON_FUNC_LESS_EQUAL; + case PIPE_FUNC_GREATER: return D3D12_COMPARISON_FUNC_GREATER; + case PIPE_FUNC_NOTEQUAL: return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case PIPE_FUNC_GEQUAL: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case PIPE_FUNC_ALWAYS: return D3D12_COMPARISON_FUNC_ALWAYS; + } + unreachable("unexpected compare"); +} + +static D3D12_LOGIC_OP +logic_op(enum pipe_logicop func) +{ + switch (func) { + case PIPE_LOGICOP_CLEAR: return D3D12_LOGIC_OP_CLEAR; + case PIPE_LOGICOP_NOR: return D3D12_LOGIC_OP_NOR; + case PIPE_LOGICOP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED; + case PIPE_LOGICOP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED; + case PIPE_LOGICOP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE; + case PIPE_LOGICOP_INVERT: return D3D12_LOGIC_OP_INVERT; + case PIPE_LOGICOP_XOR: return D3D12_LOGIC_OP_XOR; + case PIPE_LOGICOP_NAND: return D3D12_LOGIC_OP_NAND; + case PIPE_LOGICOP_AND: return D3D12_LOGIC_OP_AND; + case PIPE_LOGICOP_EQUIV: return D3D12_LOGIC_OP_EQUIV; + case PIPE_LOGICOP_NOOP: return D3D12_LOGIC_OP_NOOP; + case PIPE_LOGICOP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED; + case PIPE_LOGICOP_COPY: return D3D12_LOGIC_OP_COPY; + case PIPE_LOGICOP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE; + case PIPE_LOGICOP_OR: return D3D12_LOGIC_OP_OR; + case PIPE_LOGICOP_SET: return D3D12_LOGIC_OP_SET; + } + unreachable("unexpected logicop function"); +} + +static UINT8 +color_write_mask(unsigned colormask) +{ + UINT8 mask = 0; + + if (colormask & PIPE_MASK_R) + mask |= D3D12_COLOR_WRITE_ENABLE_RED; + if (colormask & PIPE_MASK_G) + mask |= D3D12_COLOR_WRITE_ENABLE_GREEN; + if (colormask & PIPE_MASK_B) + mask |= D3D12_COLOR_WRITE_ENABLE_BLUE; + if (colormask & PIPE_MASK_A) + mask |= D3D12_COLOR_WRITE_ENABLE_ALPHA; + + return mask; +} + +static void * +d3d12_create_blend_state(struct pipe_context *pctx, + const struct pipe_blend_state *blend_state) +{ + struct d3d12_blend_state *state = CALLOC_STRUCT(d3d12_blend_state); + if (!state) + return NULL; + + if (blend_state->logicop_enable) { + state->desc.RenderTarget[0].LogicOpEnable = TRUE; + state->desc.RenderTarget[0].LogicOp = logic_op((pipe_logicop) blend_state->logicop_func); + } + + /* TODO Dithering */ + + state->desc.AlphaToCoverageEnable = blend_state->alpha_to_coverage; + + int num_targets = 1; + if (blend_state->independent_blend_enable) { + state->desc.IndependentBlendEnable = TRUE; + num_targets = PIPE_MAX_COLOR_BUFS; + } + + for (int i = 0; i < num_targets; ++i) { + const struct pipe_rt_blend_state *rt = blend_state->rt + i; + + if (rt->blend_enable) { + state->desc.RenderTarget[i].BlendEnable = TRUE; + state->desc.RenderTarget[i].SrcBlend = blend_factor_rgb((pipe_blendfactor) rt->rgb_src_factor); + state->desc.RenderTarget[i].DestBlend = blend_factor_rgb((pipe_blendfactor) rt->rgb_dst_factor); + state->desc.RenderTarget[i].BlendOp = blend_op((pipe_blend_func) rt->rgb_func); + state->desc.RenderTarget[i].SrcBlendAlpha = blend_factor_alpha((pipe_blendfactor) rt->alpha_src_factor); + state->desc.RenderTarget[i].DestBlendAlpha = blend_factor_alpha((pipe_blendfactor) rt->alpha_dst_factor); + state->desc.RenderTarget[i].BlendOpAlpha = blend_op((pipe_blend_func) rt->alpha_func); + + state->blend_factor_flags |= need_blend_factor_rgb((pipe_blendfactor) rt->rgb_src_factor); + state->blend_factor_flags |= need_blend_factor_rgb((pipe_blendfactor) rt->rgb_dst_factor); + state->blend_factor_flags |= need_blend_factor_alpha((pipe_blendfactor) rt->alpha_src_factor); + state->blend_factor_flags |= need_blend_factor_alpha((pipe_blendfactor) rt->alpha_dst_factor); + + if (state->blend_factor_flags == (D3D12_BLEND_FACTOR_COLOR | D3D12_BLEND_FACTOR_ALPHA) && + (d3d12_debug & D3D12_DEBUG_VERBOSE)) { + /* We can't set a blend factor for both constant color and constant alpha */ + debug_printf("D3D12: unsupported blend factors combination (const color and const alpha)\n"); + } + + if (util_blend_state_is_dual(blend_state, i)) + state->is_dual_src = true; + } + + state->desc.RenderTarget[i].RenderTargetWriteMask = color_write_mask(rt->colormask); + } + + return state; +} + +static void +d3d12_bind_blend_state(struct pipe_context *pctx, void *blend_state) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_blend_state *new_state = (struct d3d12_blend_state *) blend_state; + struct d3d12_blend_state *old_state = ctx->gfx_pipeline_state.blend; + + ctx->gfx_pipeline_state.blend = new_state; + ctx->state_dirty |= D3D12_DIRTY_BLEND; + if (new_state == NULL || old_state == NULL || + new_state->blend_factor_flags != old_state->blend_factor_flags) + ctx->state_dirty |= D3D12_DIRTY_BLEND_COLOR; +} + +static void +d3d12_delete_blend_state(struct pipe_context *pctx, void *blend_state) +{ + d3d12_gfx_pipeline_state_cache_invalidate(d3d12_context(pctx), blend_state); + FREE(blend_state); +} + +static D3D12_STENCIL_OP +stencil_op(enum pipe_stencil_op op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP; + case PIPE_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO; + case PIPE_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE; + case PIPE_STENCIL_OP_INCR: return D3D12_STENCIL_OP_INCR_SAT; + case PIPE_STENCIL_OP_DECR: return D3D12_STENCIL_OP_DECR_SAT; + case PIPE_STENCIL_OP_INCR_WRAP: return D3D12_STENCIL_OP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: return D3D12_STENCIL_OP_DECR; + case PIPE_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT; + } + unreachable("unexpected op"); +} + +static D3D12_DEPTH_STENCILOP_DESC +stencil_op_state(const struct pipe_stencil_state *src) +{ + D3D12_DEPTH_STENCILOP_DESC ret; + ret.StencilFailOp = stencil_op((pipe_stencil_op) src->fail_op); + ret.StencilPassOp = stencil_op((pipe_stencil_op) src->zpass_op); + ret.StencilDepthFailOp = stencil_op((pipe_stencil_op) src->zfail_op); + ret.StencilFunc = compare_op((pipe_compare_func) src->func); + return ret; +} + +static void * +d3d12_create_depth_stencil_alpha_state(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *depth_stencil_alpha) +{ + struct d3d12_depth_stencil_alpha_state *dsa = CALLOC_STRUCT(d3d12_depth_stencil_alpha_state); + if (!dsa) + return NULL; + + if (depth_stencil_alpha->depth.enabled) { + dsa->desc.DepthEnable = TRUE; + dsa->desc.DepthFunc = compare_op((pipe_compare_func) depth_stencil_alpha->depth.func); + } + + /* TODO Add support for GL_depth_bound_tests */ + #if 0 + if (depth_stencil_alpha->depth.bounds_test) { + dsa->desc.DepthBoundsTestEnable = TRUE; + dsa->min_depth_bounds = depth_stencil_alpha->depth.bounds_min; + dsa->max_depth_bounds = depth_stencil_alpha->depth.bounds_max; + } + #endif + + if (depth_stencil_alpha->stencil[0].enabled) { + dsa->desc.StencilEnable = TRUE; + dsa->desc.FrontFace = stencil_op_state(depth_stencil_alpha->stencil); + } + + if (depth_stencil_alpha->stencil[1].enabled) + dsa->desc.BackFace = stencil_op_state(depth_stencil_alpha->stencil + 1); + else + dsa->desc.BackFace = dsa->desc.FrontFace; + + dsa->desc.StencilReadMask = depth_stencil_alpha->stencil[0].valuemask; /* FIXME Back face mask */ + dsa->desc.StencilWriteMask = depth_stencil_alpha->stencil[0].writemask; /* FIXME Back face mask */ + dsa->desc.DepthWriteMask = (D3D12_DEPTH_WRITE_MASK) depth_stencil_alpha->depth.writemask; + + return dsa; +} + +static void +d3d12_bind_depth_stencil_alpha_state(struct pipe_context *pctx, + void *dsa) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + ctx->gfx_pipeline_state.zsa = (struct d3d12_depth_stencil_alpha_state *) dsa; + ctx->state_dirty |= D3D12_DIRTY_ZSA; +} + +static void +d3d12_delete_depth_stencil_alpha_state(struct pipe_context *pctx, + void *dsa_state) +{ + d3d12_gfx_pipeline_state_cache_invalidate(d3d12_context(pctx), dsa_state); + FREE(dsa_state); +} + +static D3D12_FILL_MODE +fill_mode(unsigned mode) +{ + switch (mode) { + case PIPE_POLYGON_MODE_FILL: + return D3D12_FILL_MODE_SOLID; + case PIPE_POLYGON_MODE_LINE: + return D3D12_FILL_MODE_WIREFRAME; + case PIPE_POLYGON_MODE_POINT: + return D3D12_FILL_MODE_SOLID; + + default: + unreachable("unsupported fill-mode"); + } +} + +static void * +d3d12_create_rasterizer_state(struct pipe_context *pctx, + const struct pipe_rasterizer_state *rs_state) +{ + struct d3d12_rasterizer_state *cso = CALLOC_STRUCT(d3d12_rasterizer_state); + if (!cso) + return NULL; + + cso->base = *rs_state; + + assert(rs_state->depth_clip_near == rs_state->depth_clip_far); + + switch (rs_state->cull_face) { + case PIPE_FACE_NONE: + if (rs_state->fill_front != rs_state->fill_back) { + cso->base.cull_face = PIPE_FACE_BACK; + cso->desc.CullMode = D3D12_CULL_MODE_BACK; + cso->desc.FillMode = fill_mode(rs_state->fill_front); + + /* create a modified CSO for the back-state, so we can draw with + * either. + */ + struct pipe_rasterizer_state templ = *rs_state; + templ.cull_face = PIPE_FACE_FRONT; + templ.fill_front = rs_state->fill_back; + cso->twoface_back = d3d12_create_rasterizer_state(pctx, &templ); + + if (!cso->twoface_back) { + FREE(cso); + return NULL; + } + } else { + cso->desc.CullMode = D3D12_CULL_MODE_NONE; + cso->desc.FillMode = fill_mode(rs_state->fill_front); + } + break; + + case PIPE_FACE_FRONT: + cso->desc.CullMode = D3D12_CULL_MODE_FRONT; + cso->desc.FillMode = fill_mode(rs_state->fill_back); + break; + + case PIPE_FACE_BACK: + cso->desc.CullMode = D3D12_CULL_MODE_BACK; + cso->desc.FillMode = fill_mode(rs_state->fill_front); + break; + + case PIPE_FACE_FRONT_AND_BACK: + /* this is wrong, and we shouldn't actually have to support this! */ + cso->desc.CullMode = D3D12_CULL_MODE_NONE; + cso->desc.FillMode = D3D12_FILL_MODE_SOLID; + break; + + default: + unreachable("unsupported cull-mode"); + } + + cso->desc.FrontCounterClockwise = rs_state->front_ccw; + cso->desc.DepthClipEnable = rs_state->depth_clip_near; + cso->desc.MultisampleEnable = rs_state->multisample; + cso->desc.AntialiasedLineEnable = rs_state->line_smooth; + cso->desc.ForcedSampleCount = 0; // TODO + cso->desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; /* Not Implemented */ + + return cso; +} + +static void +d3d12_bind_rasterizer_state(struct pipe_context *pctx, void *rs_state) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + ctx->gfx_pipeline_state.rast = (struct d3d12_rasterizer_state *)rs_state; + ctx->state_dirty |= D3D12_DIRTY_RASTERIZER | D3D12_DIRTY_SCISSOR; +} + +static void +d3d12_delete_rasterizer_state(struct pipe_context *pctx, void *rs_state) +{ + d3d12_gfx_pipeline_state_cache_invalidate(d3d12_context(pctx), rs_state); + FREE(rs_state); +} + +static D3D12_TEXTURE_ADDRESS_MODE +sampler_address_mode(enum pipe_tex_wrap wrap, enum pipe_tex_filter filter) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_WRAP; + case PIPE_TEX_WRAP_CLAMP: return filter == PIPE_TEX_FILTER_NEAREST ? + D3D12_TEXTURE_ADDRESS_MODE_CLAMP : + D3D12_TEXTURE_ADDRESS_MODE_BORDER; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR; + case PIPE_TEX_WRAP_MIRROR_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; /* not technically correct, but kinda works */ + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; /* FIXME: Doesn't exist in D3D12 */ + } + unreachable("unexpected wrap"); +} + +static D3D12_FILTER +get_filter(const struct pipe_sampler_state *state) +{ + static const D3D12_FILTER lut[16] = { + D3D12_FILTER_MIN_MAG_MIP_POINT, + D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR, + D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT, + D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR, + D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT, + D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR, + D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT, + D3D12_FILTER_MIN_MAG_MIP_LINEAR, + D3D12_FILTER_COMPARISON_MIN_MAG_MIP_POINT, + D3D12_FILTER_COMPARISON_MIN_MAG_POINT_MIP_LINEAR, + D3D12_FILTER_COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT, + D3D12_FILTER_COMPARISON_MIN_POINT_MAG_MIP_LINEAR, + D3D12_FILTER_COMPARISON_MIN_LINEAR_MAG_MIP_POINT, + D3D12_FILTER_COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR, + D3D12_FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT, + D3D12_FILTER_COMPARISON_MIN_MAG_MIP_LINEAR, + }; + + static const D3D12_FILTER anisotropic_lut[2] = { + D3D12_FILTER_ANISOTROPIC, + D3D12_FILTER_COMPARISON_ANISOTROPIC, + }; + + if (state->max_anisotropy > 1) { + return anisotropic_lut[state->compare_mode]; + } else { + int idx = (state->mag_img_filter << 1) | + (state->min_img_filter << 2) | + (state->compare_mode << 3); + if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) + idx |= state->min_mip_filter; + return lut[idx]; + } +} + +static void * +d3d12_create_sampler_state(struct pipe_context *pctx, + const struct pipe_sampler_state *state) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_screen *screen = d3d12_screen(pctx->screen); + struct d3d12_sampler_state *ss = CALLOC_STRUCT(d3d12_sampler_state); + D3D12_SAMPLER_DESC desc = {0}; + if (!state) + return NULL; + + ss->filter = (pipe_tex_filter)state->min_img_filter; + ss->wrap_r = (pipe_tex_wrap)state->wrap_r; + ss->wrap_s = (pipe_tex_wrap)state->wrap_s; + ss->wrap_t = (pipe_tex_wrap)state->wrap_t; + ss->lod_bias = state->lod_bias; + ss->min_lod = state->min_lod; + ss->max_lod = state->max_lod; + memcpy(ss->border_color, state->border_color.f, sizeof(float) * 4); + ss->compare_func = (pipe_compare_func)state->compare_func; + + if (state->min_mip_filter < PIPE_TEX_MIPFILTER_NONE) { + desc.MinLOD = state->min_lod; + desc.MaxLOD = state->max_lod; + } else if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + desc.MinLOD = 0; + desc.MaxLOD = 0; + } else { + unreachable("unexpected mip filter"); + } + + if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + desc.ComparisonFunc = compare_op((pipe_compare_func) state->compare_func); + desc.Filter = D3D12_FILTER_COMPARISON_MIN_MAG_MIP_POINT; + } else if (state->compare_mode == PIPE_TEX_COMPARE_NONE) { + desc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + desc.Filter = get_filter(state); + } else + unreachable("unexpected comparison mode"); + + desc.MaxAnisotropy = state->max_anisotropy; + + desc.AddressU = sampler_address_mode((pipe_tex_wrap) state->wrap_s, + (pipe_tex_filter) state->min_img_filter); + desc.AddressV = sampler_address_mode((pipe_tex_wrap) state->wrap_t, + (pipe_tex_filter) state->min_img_filter); + desc.AddressW = sampler_address_mode((pipe_tex_wrap) state->wrap_r, + (pipe_tex_filter) state->min_img_filter); + desc.MipLODBias = CLAMP(state->lod_bias, -16.0f, 15.99f); + memcpy(desc.BorderColor, state->border_color.f, sizeof(float) * 4); + + // TODO Normalized Coordinates? + d3d12_descriptor_pool_alloc_handle(ctx->sampler_pool, &ss->handle); + screen->dev->CreateSampler(&desc, ss->handle.cpu_handle); + + if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + desc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + struct pipe_sampler_state fake_state = *state; + fake_state.compare_mode = PIPE_TEX_COMPARE_NONE; + desc.Filter = get_filter(&fake_state); + + d3d12_descriptor_pool_alloc_handle(ctx->sampler_pool, + &ss->handle_without_shadow); + screen->dev->CreateSampler(&desc, + ss->handle_without_shadow.cpu_handle); + ss->is_shadow_sampler = true; + } + + return ss; +} + +static void +d3d12_bind_sampler_states(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start_slot, + unsigned num_samplers, + void **samplers) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + bool shader_state_dirty = false; + +#define STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(X) \ + static_assert((enum compare_func)PIPE_FUNC_##X == COMPARE_FUNC_##X, #X " needs switch case"); + + STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(LESS); + STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(GREATER); + STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(LEQUAL); + STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(GEQUAL); + STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(NOTEQUAL); + STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(NEVER); + STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC(ALWAYS); + +#undef STATIC_ASSERT_PIPE_EQUAL_COMP_FUNC + + for (unsigned i = 0; i < num_samplers; ++i) { + d3d12_sampler_state *sampler = (struct d3d12_sampler_state*) samplers[i]; + ctx->samplers[shader][start_slot + i] = sampler; + dxil_wrap_sampler_state &wrap = ctx->tex_wrap_states[shader][start_slot + i]; + if (sampler) { + shader_state_dirty |= wrap.wrap[0] != sampler->wrap_s || + wrap.wrap[1] != sampler->wrap_t || + wrap.wrap[2] != sampler->wrap_r; + shader_state_dirty |= !!memcmp(wrap.border_color, sampler->border_color, 4 * sizeof(float)); + + wrap.wrap[0] = sampler->wrap_s; + wrap.wrap[1] = sampler->wrap_t; + wrap.wrap[2] = sampler->wrap_r; + wrap.lod_bias = sampler->lod_bias; + wrap.min_lod = sampler->min_lod; + wrap.max_lod = sampler->max_lod; + memcpy(wrap.border_color, sampler->border_color, 4 * sizeof(float)); + ctx->tex_compare_func[shader][start_slot + i] = (enum compare_func)sampler->compare_func; + } else { + memset(&wrap, 0, sizeof (dxil_wrap_sampler_state)); + } + } + + ctx->num_samplers[shader] = start_slot + num_samplers; + ctx->shader_dirty[shader] |= D3D12_SHADER_DIRTY_SAMPLERS; + if (shader_state_dirty) + ctx->state_dirty |= D3D12_DIRTY_SHADER; +} + +static void +d3d12_delete_sampler_state(struct pipe_context *pctx, + void *ss) +{ + struct d3d12_batch *batch = d3d12_current_batch(d3d12_context(pctx)); + struct d3d12_sampler_state *state = (struct d3d12_sampler_state*) ss; + util_dynarray_append(&batch->zombie_samplers, d3d12_descriptor_handle, + state->handle); + if (state->is_shadow_sampler) + util_dynarray_append(&batch->zombie_samplers, d3d12_descriptor_handle, + state->handle_without_shadow); + FREE(ss); +} + +static D3D12_SRV_DIMENSION +view_dimension(enum pipe_texture_target target, unsigned samples) +{ + switch (target) { + case PIPE_BUFFER: return D3D12_SRV_DIMENSION_BUFFER; + case PIPE_TEXTURE_1D: return D3D12_SRV_DIMENSION_TEXTURE1D; + case PIPE_TEXTURE_1D_ARRAY: return D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D: + return samples > 1 ? D3D12_SRV_DIMENSION_TEXTURE2DMS : + D3D12_SRV_DIMENSION_TEXTURE2D; + case PIPE_TEXTURE_2D_ARRAY: + return samples > 1 ? D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY : + D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + case PIPE_TEXTURE_CUBE: return D3D12_SRV_DIMENSION_TEXTURECUBE; + case PIPE_TEXTURE_CUBE_ARRAY: return D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + case PIPE_TEXTURE_3D: return D3D12_SRV_DIMENSION_TEXTURE3D; + default: + unreachable("unexpected target"); + } +} + +static D3D12_SHADER_COMPONENT_MAPPING +component_mapping(enum pipe_swizzle swizzle, D3D12_SHADER_COMPONENT_MAPPING id) +{ + switch (swizzle) { + case PIPE_SWIZZLE_X: return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0; + case PIPE_SWIZZLE_Y: return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1; + case PIPE_SWIZZLE_Z: return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2; + case PIPE_SWIZZLE_W: return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3; + case PIPE_SWIZZLE_0: return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0; + case PIPE_SWIZZLE_1: return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1; + case PIPE_SWIZZLE_NONE: return id; + default: + unreachable("unexpected swizzle"); + } +} + +static struct pipe_sampler_view * +d3d12_create_sampler_view(struct pipe_context *pctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *state) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_screen *screen = d3d12_screen(pctx->screen); + struct d3d12_resource *res = d3d12_resource(texture); + struct d3d12_sampler_view *sampler_view = CALLOC_STRUCT(d3d12_sampler_view); + + sampler_view->base = *state; + sampler_view->base.texture = NULL; + pipe_resource_reference(&sampler_view->base.texture, texture); + sampler_view->base.reference.count = 1; + sampler_view->base.context = pctx; + sampler_view->mip_levels = state->u.tex.last_level - state->u.tex.first_level + 1; + sampler_view->array_size = texture->array_size; + + D3D12_SHADER_RESOURCE_VIEW_DESC desc = {}; + struct d3d12_format_info format_info = d3d12_get_format_info(state->format, state->target); + pipe_swizzle swizzle[4] = { + format_info.swizzle[sampler_view->base.swizzle_r], + format_info.swizzle[sampler_view->base.swizzle_g], + format_info.swizzle[sampler_view->base.swizzle_b], + format_info.swizzle[sampler_view->base.swizzle_a] + }; + + sampler_view->swizzle_override_r = swizzle[0]; + sampler_view->swizzle_override_g = swizzle[1]; + sampler_view->swizzle_override_b = swizzle[2]; + sampler_view->swizzle_override_a = swizzle[3]; + + desc.Format = d3d12_get_resource_srv_format(state->format, state->target); + desc.ViewDimension = view_dimension(state->target, texture->nr_samples); + + /* Integer cube textures are not really supported, because TextureLoad doesn't exist + * for cube maps, and we sampling is not supported for integer textures, so we have to + * handle this SRV as if it were a 2D texture array */ + if ((desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || + desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY) && + util_format_is_pure_integer(state->format)) { + desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + } + + desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + component_mapping(swizzle[0], D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0), + component_mapping(swizzle[1], D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1), + component_mapping(swizzle[2], D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2), + component_mapping(swizzle[3], D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3) + ); + + unsigned array_size = state->u.tex.last_layer - state->u.tex.first_layer + 1; + switch (desc.ViewDimension) { + case D3D12_SRV_DIMENSION_TEXTURE1D: + if (state->u.tex.first_layer > 0) + debug_printf("D3D12: can't create 1D SRV from layer %d\n", + state->u.tex.first_layer); + + desc.Texture1D.MostDetailedMip = state->u.tex.first_level; + desc.Texture1D.MipLevels = sampler_view->mip_levels; + desc.Texture1D.ResourceMinLODClamp = 0.0f; + break; + case D3D12_SRV_DIMENSION_TEXTURE1DARRAY: + desc.Texture1DArray.MostDetailedMip = state->u.tex.first_level; + desc.Texture1DArray.MipLevels = sampler_view->mip_levels; + desc.Texture1DArray.ResourceMinLODClamp = 0.0f; + desc.Texture1DArray.FirstArraySlice = state->u.tex.first_layer; + desc.Texture1DArray.ArraySize = array_size; + break; + case D3D12_SRV_DIMENSION_TEXTURE2D: + if (state->u.tex.first_layer > 0) + debug_printf("D3D12: can't create 2D SRV from layer %d\n", + state->u.tex.first_layer); + + desc.Texture2D.MostDetailedMip = state->u.tex.first_level; + desc.Texture2D.MipLevels = sampler_view->mip_levels; + desc.Texture2D.PlaneSlice = format_info.plane_slice; + desc.Texture2D.ResourceMinLODClamp = 0.0f; + break; + case D3D12_SRV_DIMENSION_TEXTURE2DMS: + if (state->u.tex.first_layer > 0) + debug_printf("D3D12: can't create 2DMS SRV from layer %d\n", + state->u.tex.first_layer); + break; + case D3D12_SRV_DIMENSION_TEXTURE2DARRAY: + desc.Texture2DArray.MostDetailedMip = state->u.tex.first_level; + desc.Texture2DArray.MipLevels = sampler_view->mip_levels; + desc.Texture2DArray.ResourceMinLODClamp = 0.0f; + desc.Texture2DArray.FirstArraySlice = state->u.tex.first_layer; + desc.Texture2DArray.PlaneSlice = format_info.plane_slice; + desc.Texture2DArray.ArraySize = array_size; + break; + case D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY: + desc.Texture2DMSArray.FirstArraySlice = state->u.tex.first_layer; + desc.Texture2DMSArray.ArraySize = array_size; + break; + case D3D12_SRV_DIMENSION_TEXTURE3D: + if (state->u.tex.first_layer > 0) + debug_printf("D3D12: can't create 3D SRV from layer %d\n", + state->u.tex.first_layer); + + desc.Texture3D.MostDetailedMip = state->u.tex.first_level; + desc.Texture3D.MipLevels = sampler_view->mip_levels; + desc.Texture3D.ResourceMinLODClamp = 0.0f; + break; + case D3D12_SRV_DIMENSION_TEXTURECUBE: + if (state->u.tex.first_layer > 0) + debug_printf("D3D12: can't create CUBE SRV from layer %d\n", + state->u.tex.first_layer); + + desc.TextureCube.MostDetailedMip = state->u.tex.first_level; + desc.TextureCube.MipLevels = sampler_view->mip_levels; + desc.TextureCube.ResourceMinLODClamp = 0.0f; + break; + case D3D12_SRV_DIMENSION_BUFFER: + desc.Buffer.FirstElement = 0; + desc.Buffer.StructureByteStride = 0; + desc.Buffer.NumElements = texture->width0 / util_format_get_blocksize(state->format); + break; + } + + d3d12_descriptor_pool_alloc_handle(ctx->view_pool, &sampler_view->handle); + screen->dev->CreateShaderResourceView(d3d12_resource_resource(res), &desc, + sampler_view->handle.cpu_handle); + + return &sampler_view->base; +} + +static void +d3d12_set_sampler_views(struct pipe_context *pctx, + enum pipe_shader_type shader_type, + unsigned start_slot, + unsigned num_views, + struct pipe_sampler_view **views) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + assert(views); + unsigned shader_bit = (1 << shader_type); + ctx->has_int_samplers &= ~shader_bit; + + for (unsigned i = 0; i < num_views; ++i) { + pipe_sampler_view_reference( + &ctx->sampler_views[shader_type][start_slot + i], + views[i]); + + if (views[i]) { + dxil_wrap_sampler_state &wss = ctx->tex_wrap_states[shader_type][start_slot + i]; + dxil_texture_swizzle_state &swizzle_state = ctx->tex_swizzle_state[shader_type][i]; + if (util_format_is_pure_integer(views[i]->format)) { + ctx->has_int_samplers |= shader_bit; + wss.is_int_sampler = 1; + wss.last_level = views[i]->texture->last_level; + /* When we emulate a integer cube texture (array) by using a texture 2d Array + * the coordinates are evaluated to always reside withing the acceptable range + * because the 3d ray for picking the texel is always pointing at one cube face, + * hence we can skip the boundary condition handling when the texture operations are + * lowered to texel fetches later. */ + wss.skip_boundary_conditions = views[i]->target == PIPE_TEXTURE_CUBE || + views[i]->target == PIPE_TEXTURE_CUBE_ARRAY; + } else { + wss.is_int_sampler = 0; + } + /* We need the swizzle state for compare texture lowering, because it + * encode the use of the shadow texture lookup result as either luminosity, + * intensity, or alpha. and we need the swizzle state for applying the + * boundary color correctly */ + struct d3d12_sampler_view *ss = d3d12_sampler_view(views[i]); + swizzle_state.swizzle_r = ss->swizzle_override_r; + swizzle_state.swizzle_g = ss->swizzle_override_g; + swizzle_state.swizzle_b = ss->swizzle_override_b; + swizzle_state.swizzle_a = ss->swizzle_override_a; + } + } + ctx->num_sampler_views[shader_type] = start_slot + num_views; + ctx->shader_dirty[shader_type] |= D3D12_SHADER_DIRTY_SAMPLER_VIEWS; +} + +static void +d3d12_destroy_sampler_view(struct pipe_context *pctx, + struct pipe_sampler_view *pview) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_sampler_view *view = d3d12_sampler_view(pview); + d3d12_descriptor_handle_free(&view->handle); + pipe_resource_reference(&view->base.texture, NULL); + FREE(view); +} + +static void +delete_shader(struct d3d12_context *ctx, enum pipe_shader_type stage, + struct d3d12_shader_selector *shader) +{ + d3d12_gfx_pipeline_state_cache_invalidate_shader(ctx, stage, shader); + + /* Make sure the pipeline state no longer reference the deleted shader */ + struct d3d12_shader *iter = shader->first; + while (iter) { + if (ctx->gfx_pipeline_state.stages[stage] == iter) { + ctx->gfx_pipeline_state.stages[stage] = NULL; + break; + } + iter = iter->next_variant; + } + + d3d12_shader_free(shader); +} + +static void +bind_stage(struct d3d12_context *ctx, enum pipe_shader_type stage, + struct d3d12_shader_selector *shader) +{ + assert(stage < D3D12_GFX_SHADER_STAGES); + ctx->gfx_stages[stage] = shader; +} + +static void * +d3d12_create_vs_state(struct pipe_context *pctx, + const struct pipe_shader_state *shader) +{ + return d3d12_create_shader(d3d12_context(pctx), PIPE_SHADER_VERTEX, shader); +} + +static void +d3d12_bind_vs_state(struct pipe_context *pctx, + void *vss) +{ + bind_stage(d3d12_context(pctx), PIPE_SHADER_VERTEX, + (struct d3d12_shader_selector *) vss); +} + +static void +d3d12_delete_vs_state(struct pipe_context *pctx, + void *vs) +{ + delete_shader(d3d12_context(pctx), PIPE_SHADER_VERTEX, + (struct d3d12_shader_selector *) vs); +} + +static void * +d3d12_create_fs_state(struct pipe_context *pctx, + const struct pipe_shader_state *shader) +{ + return d3d12_create_shader(d3d12_context(pctx), PIPE_SHADER_FRAGMENT, shader); +} + +static void +d3d12_bind_fs_state(struct pipe_context *pctx, + void *fss) +{ + bind_stage(d3d12_context(pctx), PIPE_SHADER_FRAGMENT, + (struct d3d12_shader_selector *) fss); +} + +static void +d3d12_delete_fs_state(struct pipe_context *pctx, + void *fs) +{ + delete_shader(d3d12_context(pctx), PIPE_SHADER_FRAGMENT, + (struct d3d12_shader_selector *) fs); +} + +static void * +d3d12_create_gs_state(struct pipe_context *pctx, + const struct pipe_shader_state *shader) +{ + return d3d12_create_shader(d3d12_context(pctx), PIPE_SHADER_GEOMETRY, shader); +} + +static void +d3d12_bind_gs_state(struct pipe_context *pctx, void *gss) +{ + bind_stage(d3d12_context(pctx), PIPE_SHADER_GEOMETRY, + (struct d3d12_shader_selector *) gss); +} + +static void +d3d12_delete_gs_state(struct pipe_context *pctx, void *gs) +{ + delete_shader(d3d12_context(pctx), PIPE_SHADER_GEOMETRY, + (struct d3d12_shader_selector *) gs); +} + +static bool +d3d12_init_polygon_stipple(struct pipe_context *pctx) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + + ctx->pstipple.texture = util_pstipple_create_stipple_texture(pctx, NULL); + if (!ctx->pstipple.texture) + return false; + + ctx->pstipple.sampler_view = util_pstipple_create_sampler_view(pctx, ctx->pstipple.texture); + if (!ctx->pstipple.sampler_view) + return false; + + ctx->pstipple.sampler_cso = (struct d3d12_sampler_state *)util_pstipple_create_sampler(pctx); + if (!ctx->pstipple.sampler_cso) + return false; + + return true; +} + +static void +d3d12_set_polygon_stipple(struct pipe_context *pctx, + const struct pipe_poly_stipple *ps) +{ + static bool initialized = false; + static const uint32_t zero[32] = {0}; + static uint32_t undef[32] = {0}; + struct d3d12_context *ctx = d3d12_context(pctx); + + if (!initialized) + memset(undef, UINT32_MAX, sizeof(undef)); + + if (!memcmp(ctx->pstipple.pattern, ps->stipple, sizeof(ps->stipple))) + return; + + memcpy(ctx->pstipple.pattern, ps->stipple, sizeof(ps->stipple)); + ctx->pstipple.enabled = !!memcmp(ps->stipple, undef, sizeof(ps->stipple)) && + !!memcmp(ps->stipple, zero, sizeof(ps->stipple)); + if (ctx->pstipple.enabled) + util_pstipple_update_stipple_texture(pctx, ctx->pstipple.texture, ps->stipple); +} + +static void +d3d12_set_vertex_buffers(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_buffers, + const struct pipe_vertex_buffer *buffers) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + util_set_vertex_buffers_count(ctx->vbs, &ctx->num_vbs, + buffers, start_slot, num_buffers); + + for (unsigned i = 0; i < ctx->num_vbs; ++i) { + const struct pipe_vertex_buffer* buf = ctx->vbs + i; + if (!buf->buffer.resource) + continue; + struct d3d12_resource *res = d3d12_resource(buf->buffer.resource); + ctx->vbvs[i].BufferLocation = d3d12_resource_gpu_virtual_address(res) + buf->buffer_offset; + ctx->vbvs[i].StrideInBytes = buf->stride; + ctx->vbvs[i].SizeInBytes = res->base.width0 - buf->buffer_offset; + } + ctx->state_dirty |= D3D12_DIRTY_VERTEX_BUFFERS; +} + +static void +d3d12_set_viewport_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *state) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + + for (unsigned i = 0; i < num_viewports; ++i) { + if (state[i].scale[1] < 0) { + ctx->flip_y = 1.0f; + ctx->viewports[start_slot + i].TopLeftY = state[i].translate[1] + state[i].scale[1]; + ctx->viewports[start_slot + i].Height = -state[i].scale[1] * 2; + } else { + ctx->flip_y = -1.0f; + ctx->viewports[start_slot + i].TopLeftY = state[i].translate[1] - state[i].scale[1]; + ctx->viewports[start_slot + i].Height = state[i].scale[1] * 2; + } + ctx->viewports[start_slot + i].TopLeftX = state[i].translate[0] - state[i].scale[0]; + ctx->viewports[start_slot + i].Width = state[i].scale[0] * 2; + + float near_depth = state[i].translate[2] - state[i].scale[2]; + float far_depth = state[i].translate[2] + state[i].scale[2]; + + ctx->reverse_depth_range = near_depth > far_depth; + if (ctx->reverse_depth_range) { + float tmp = near_depth; + near_depth = far_depth; + far_depth = tmp; + } + ctx->viewports[start_slot + i].MinDepth = near_depth; + ctx->viewports[start_slot + i].MaxDepth = far_depth; + ctx->viewport_states[start_slot + i] = state[i]; + } + ctx->num_viewports = start_slot + num_viewports; + ctx->state_dirty |= D3D12_DIRTY_VIEWPORT; +} + + +static void +d3d12_set_scissor_states(struct pipe_context *pctx, + unsigned start_slot, unsigned num_scissors, + const struct pipe_scissor_state *states) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + + for (unsigned i = 0; i < num_scissors; i++) { + ctx->scissors[start_slot + i].left = states[i].minx; + ctx->scissors[start_slot + i].top = states[i].miny; + ctx->scissors[start_slot + i].right = states[i].maxx; + ctx->scissors[start_slot + i].bottom = states[i].maxy; + ctx->scissor_states[start_slot + i] = states[i]; + } + ctx->state_dirty |= D3D12_DIRTY_SCISSOR; +} + +static void +d3d12_set_constant_buffer(struct pipe_context *pctx, + enum pipe_shader_type shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + + if (buf) { + struct pipe_resource *buffer = buf->buffer; + unsigned offset = buf->buffer_offset; + if (buf->user_buffer) { + u_upload_data(pctx->const_uploader, 0, buf->buffer_size, + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + buf->user_buffer, &offset, &ctx->cbufs[shader][index].buffer); + + } else + pipe_resource_reference(&ctx->cbufs[shader][index].buffer, buffer); + + + ctx->cbufs[shader][index].buffer_offset = offset; + ctx->cbufs[shader][index].buffer_size = buf->buffer_size; + ctx->cbufs[shader][index].user_buffer = NULL; + + } else { + pipe_resource_reference(&ctx->cbufs[shader][index].buffer, NULL); + ctx->cbufs[shader][index].buffer_offset = 0; + ctx->cbufs[shader][index].buffer_size = 0; + ctx->cbufs[shader][index].user_buffer = NULL; + } + ctx->shader_dirty[shader] |= D3D12_SHADER_DIRTY_CONSTBUF; +} + +static void +d3d12_set_framebuffer_state(struct pipe_context *pctx, + const struct pipe_framebuffer_state *state) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + int samples = -1; + + util_copy_framebuffer_state(&d3d12_context(pctx)->fb, state); + + ctx->gfx_pipeline_state.num_cbufs = state->nr_cbufs; + ctx->gfx_pipeline_state.has_float_rtv = false; + for (int i = 0; i < state->nr_cbufs; ++i) { + if (state->cbufs[i]) { + if (util_format_is_float(state->cbufs[i]->format)) + ctx->gfx_pipeline_state.has_float_rtv = true; + ctx->gfx_pipeline_state.rtv_formats[i] = d3d12_get_format(state->cbufs[i]->format); + samples = MAX2(samples, (int)state->cbufs[i]->texture->nr_samples); + } else { + ctx->gfx_pipeline_state.rtv_formats[i] = DXGI_FORMAT_UNKNOWN; + } + } + + if (state->zsbuf) { + ctx->gfx_pipeline_state.dsv_format = d3d12_get_resource_rt_format(state->zsbuf->format); + samples = MAX2(samples, (int)ctx->fb.zsbuf->texture->nr_samples); + } else + ctx->gfx_pipeline_state.dsv_format = DXGI_FORMAT_UNKNOWN; + + if (samples < 0) + samples = state->samples; + + ctx->gfx_pipeline_state.samples = MAX2(samples, 1); + + ctx->state_dirty |= D3D12_DIRTY_FRAMEBUFFER; +} + +static void +d3d12_set_blend_color(struct pipe_context *pctx, + const struct pipe_blend_color *color) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + memcpy(ctx->blend_factor, color->color, sizeof(float) * 4); + ctx->state_dirty |= D3D12_DIRTY_BLEND_COLOR; +} + +static void +d3d12_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + ctx->gfx_pipeline_state.sample_mask = sample_mask; + ctx->state_dirty |= D3D12_DIRTY_SAMPLE_MASK; +} + +static void +d3d12_set_stencil_ref(struct pipe_context *pctx, + const struct pipe_stencil_ref *ref) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + if ((ref->ref_value[0] != ref->ref_value[1]) && + (d3d12_debug & D3D12_DEBUG_VERBOSE)) + debug_printf("D3D12: Different values for front and back stencil reference are not supported\n"); + ctx->stencil_ref = *ref; + ctx->state_dirty |= D3D12_DIRTY_STENCIL_REF; +} + +static void +d3d12_set_clip_state(struct pipe_context *pctx, + const struct pipe_clip_state *pcs) +{ +} + +static struct pipe_stream_output_target * +d3d12_create_stream_output_target(struct pipe_context *pctx, + struct pipe_resource *pres, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_screen *screen = d3d12_screen(pctx->screen); + struct d3d12_resource *res = d3d12_resource(pres); + struct d3d12_stream_output_target *cso = CALLOC_STRUCT(d3d12_stream_output_target); + + if (!cso) + return NULL; + + pipe_reference_init(&cso->base.reference, 1); + pipe_resource_reference(&cso->base.buffer, pres); + cso->base.buffer_offset = buffer_offset; + cso->base.buffer_size = buffer_size; + cso->base.context = pctx; + + util_range_add(pres, &res->valid_buffer_range, buffer_offset, + buffer_offset + buffer_size); + + return &cso->base; +} + +static void +d3d12_stream_output_target_destroy(struct pipe_context *ctx, + struct pipe_stream_output_target *state) +{ + pipe_resource_reference(&state->buffer, NULL); + + FREE(state); +} + +static void +fill_stream_output_buffer_view(D3D12_STREAM_OUTPUT_BUFFER_VIEW *view, + struct d3d12_stream_output_target *target) +{ + struct d3d12_resource *res = d3d12_resource(target->base.buffer); + struct d3d12_resource *fill_res = d3d12_resource(target->fill_buffer); + + view->SizeInBytes = target->base.buffer_size; + view->BufferLocation = d3d12_resource_gpu_virtual_address(res) + target->base.buffer_offset; + view->BufferFilledSizeLocation = d3d12_resource_gpu_virtual_address(fill_res) + target->fill_buffer_offset; +} + +static void +d3d12_set_stream_output_targets(struct pipe_context *pctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + + assert(num_targets <= ARRAY_SIZE(ctx->so_targets)); + + d3d12_disable_fake_so_buffers(ctx); + + for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { + struct d3d12_stream_output_target *target = + i < num_targets ? (struct d3d12_stream_output_target *)targets[i] : NULL; + + if (target) { + /* Sub-allocate a new fill buffer each time to avoid GPU/CPU synchronization */ + u_suballocator_alloc(ctx->so_allocator, sizeof(uint64_t), 4, + &target->fill_buffer_offset, &target->fill_buffer); + fill_stream_output_buffer_view(&ctx->so_buffer_views[i], target); + pipe_so_target_reference(&ctx->so_targets[i], targets[i]); + } else { + ctx->so_buffer_views[i].SizeInBytes = 0; + pipe_so_target_reference(&ctx->so_targets[i], NULL); + } + } + + ctx->gfx_pipeline_state.num_so_targets = num_targets; + ctx->state_dirty |= D3D12_DIRTY_STREAM_OUTPUT; +} + +bool +d3d12_enable_fake_so_buffers(struct d3d12_context *ctx, unsigned factor) +{ + if (ctx->fake_so_buffer_factor == factor) + return true; + + d3d12_disable_fake_so_buffers(ctx); + + for (int i = 0; i < ctx->gfx_pipeline_state.num_so_targets; ++i) { + struct d3d12_stream_output_target *target = (struct d3d12_stream_output_target *)ctx->so_targets[i]; + struct d3d12_stream_output_target *fake_target; + + fake_target = CALLOC_STRUCT(d3d12_stream_output_target); + if (!fake_target) + return false; + pipe_reference_init(&fake_target->base.reference, 1); + fake_target->base.context = &ctx->base; + + d3d12_resource_wait_idle(ctx, d3d12_resource(target->base.buffer)); + + /* Check if another target is using the same buffer */ + for (int j = i - 1; j >= 0; --j) { + if (ctx->so_targets[j] && ctx->so_targets[j]->buffer == target->base.buffer) { + struct d3d12_stream_output_target *prev_target = + (struct d3d12_stream_output_target *)ctx->fake_so_targets[j]; + pipe_resource_reference(&fake_target->base.buffer, prev_target->base.buffer); + pipe_resource_reference(&fake_target->fill_buffer, prev_target->fill_buffer); + fake_target->fill_buffer_offset = prev_target->fill_buffer_offset; + fake_target->cached_filled_size = prev_target->cached_filled_size; + break; + } + } + + /* Create new SO buffer 6x (2 triangles instead of 1 point) the original size if not */ + if (!fake_target->base.buffer) { + fake_target->base.buffer = pipe_buffer_create(ctx->base.screen, + PIPE_BIND_STREAM_OUTPUT, + PIPE_USAGE_STAGING, + target->base.buffer->width0 * factor); + u_suballocator_alloc(ctx->so_allocator, sizeof(uint64_t), 4, + &fake_target->fill_buffer_offset, &fake_target->fill_buffer); + pipe_buffer_read(&ctx->base, target->fill_buffer, + target->fill_buffer_offset, sizeof(uint64_t), + &fake_target->cached_filled_size); + } + + fake_target->base.buffer_offset = target->base.buffer_offset * factor; + fake_target->base.buffer_size = (target->base.buffer_size - fake_target->cached_filled_size) * factor; + ctx->fake_so_targets[i] = &fake_target->base; + fill_stream_output_buffer_view(&ctx->fake_so_buffer_views[i], fake_target); + } + + ctx->fake_so_buffer_factor = factor; + ctx->cmdlist_dirty |= D3D12_DIRTY_STREAM_OUTPUT; + + return true; +} + +bool +d3d12_disable_fake_so_buffers(struct d3d12_context *ctx) +{ + if (ctx->fake_so_buffer_factor == 0) + return true; + + d3d12_flush_cmdlist_and_wait(ctx); + + for (int i = 0; i < ctx->gfx_pipeline_state.num_so_targets; ++i) { + struct d3d12_stream_output_target *target = (struct d3d12_stream_output_target *)ctx->so_targets[i]; + struct d3d12_stream_output_target *fake_target = (struct d3d12_stream_output_target *)ctx->fake_so_targets[i]; + uint64_t filled_size; + struct pipe_transfer *src_transfer, *dst_transfer; + uint8_t *src, *dst; + + if (fake_target == NULL) + continue; + + pipe_buffer_read(&ctx->base, fake_target->fill_buffer, + fake_target->fill_buffer_offset, sizeof(uint64_t), + &filled_size); + + src = (uint8_t *)pipe_buffer_map_range(&ctx->base, fake_target->base.buffer, + fake_target->base.buffer_offset, + fake_target->base.buffer_size, + PIPE_MAP_READ, &src_transfer); + dst = (uint8_t *)pipe_buffer_map_range(&ctx->base, target->base.buffer, + target->base.buffer_offset, + target->base.buffer_size, + PIPE_MAP_READ, &dst_transfer); + + /* Note: This will break once support for gl_SkipComponents is added */ + uint32_t stride = ctx->gfx_pipeline_state.so_info.stride[i] * 4; + uint64_t src_offset = 0, dst_offset = fake_target->cached_filled_size; + while (src_offset < filled_size) { + memcpy(dst + dst_offset, src + src_offset, stride); + src_offset += stride * ctx->fake_so_buffer_factor; + dst_offset += stride; + } + + pipe_buffer_unmap(&ctx->base, src_transfer); + pipe_buffer_unmap(&ctx->base, dst_transfer); + + pipe_so_target_reference(&ctx->fake_so_targets[i], NULL); + ctx->fake_so_buffer_views[i].SizeInBytes = 0; + + /* Make sure the buffer is not copied twice */ + for (int j = i + 1; j <= ctx->gfx_pipeline_state.num_so_targets; ++j) { + if (ctx->so_targets[j] && ctx->so_targets[j]->buffer == target->base.buffer) + pipe_so_target_reference(&ctx->fake_so_targets[j], NULL); + } + } + + ctx->fake_so_buffer_factor = 0; + ctx->cmdlist_dirty |= D3D12_DIRTY_STREAM_OUTPUT; + + return true; +} + +void +d3d12_flush_cmdlist(struct d3d12_context *ctx) +{ + d3d12_end_batch(ctx, d3d12_current_batch(ctx)); + + ctx->current_batch_idx++; + if (ctx->current_batch_idx == ARRAY_SIZE(ctx->batches)) + ctx->current_batch_idx = 0; + + d3d12_start_batch(ctx, d3d12_current_batch(ctx)); +} + +void +d3d12_flush_cmdlist_and_wait(struct d3d12_context *ctx) +{ + struct d3d12_batch *batch = d3d12_current_batch(ctx); + + d3d12_foreach_submitted_batch(ctx, old_batch) + d3d12_reset_batch(ctx, old_batch, PIPE_TIMEOUT_INFINITE); + d3d12_flush_cmdlist(ctx); + d3d12_reset_batch(ctx, batch, PIPE_TIMEOUT_INFINITE); +} + +void +d3d12_transition_resource_state(struct d3d12_context *ctx, + struct d3d12_resource *res, + D3D12_RESOURCE_STATES state) +{ + TransitionableResourceState *xres = d3d12_resource_state(res); + ctx->resource_state_manager->TransitionResource(xres, state); +} + +void +d3d12_transition_subresources_state(struct d3d12_context *ctx, + struct d3d12_resource *res, + uint32_t start_level, uint32_t num_levels, + uint32_t start_layer, uint32_t num_layers, + uint32_t start_plane, uint32_t num_planes, + D3D12_RESOURCE_STATES state) +{ + TransitionableResourceState *xres = d3d12_resource_state(res); + + for (uint32_t l = 0; l < num_levels; l++) { + const uint32_t level = start_level + l; + for (uint32_t a = 0; a < num_layers; a++) { + const uint32_t layer = start_layer + a; + for( uint32_t p = 0; p < num_planes; p++) { + const uint32_t plane = start_plane + p; + uint32_t subres_id = level + (layer * res->mip_levels) + plane * (res->mip_levels * res->base.array_size); + assert(subres_id < xres->NumSubresources()); + ctx->resource_state_manager->TransitionSubresource(xres, subres_id, state); + } + } + } +} + +void +d3d12_apply_resource_states(struct d3d12_context *ctx) +{ + ctx->resource_state_manager->ApplyAllResourceTransitions(ctx->cmdlist, ctx->fence_value); +} + +static void +d3d12_clear_render_target(struct pipe_context *pctx, + struct pipe_surface *psurf, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + bool render_condition_enabled) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_surface *surf = d3d12_surface(psurf); + + if (!render_condition_enabled && ctx->current_predication) + ctx->cmdlist->SetPredication(NULL, 0, D3D12_PREDICATION_OP_EQUAL_ZERO); + + d3d12_transition_resource_state(ctx, d3d12_resource(psurf->texture), + D3D12_RESOURCE_STATE_RENDER_TARGET); + d3d12_apply_resource_states(ctx); + + enum pipe_format format = psurf->texture->format; + float clear_color[4]; + + if (util_format_is_pure_uint(format)) { + for (int c = 0; c < 4; ++c) + clear_color[c] = color->ui[c]; + } else if (util_format_is_pure_sint(format)) { + for (int c = 0; c < 4; ++c) + clear_color[c] = color->i[c]; + } else { + for (int c = 0; c < 4; ++c) + clear_color[c] = color->f[c]; + } + + D3D12_RECT rect = { dstx, dsty, dstx + width, dsty + height }; + ctx->cmdlist->ClearRenderTargetView(surf->desc_handle.cpu_handle, + color->f, 1, &rect); + + d3d12_batch_reference_surface_texture(d3d12_current_batch(ctx), surf); + + if (!render_condition_enabled && ctx->current_predication) { + ctx->cmdlist->SetPredication( + d3d12_resource_resource(ctx->current_predication), 0, + D3D12_PREDICATION_OP_EQUAL_ZERO); + } +} + +static void +d3d12_clear_depth_stencil(struct pipe_context *pctx, + struct pipe_surface *psurf, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + bool render_condition_enabled) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_surface *surf = d3d12_surface(psurf); + + if (!render_condition_enabled && ctx->current_predication) + ctx->cmdlist->SetPredication(NULL, 0, D3D12_PREDICATION_OP_EQUAL_ZERO); + + D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0; + if (clear_flags & PIPE_CLEAR_DEPTH) + flags |= D3D12_CLEAR_FLAG_DEPTH; + if (clear_flags & PIPE_CLEAR_STENCIL) + flags |= D3D12_CLEAR_FLAG_STENCIL; + + d3d12_transition_resource_state(ctx, d3d12_resource(ctx->fb.zsbuf->texture), + D3D12_RESOURCE_STATE_DEPTH_WRITE); + d3d12_apply_resource_states(ctx); + + D3D12_RECT rect = { dstx, dsty, dstx + width, dsty + height }; + ctx->cmdlist->ClearDepthStencilView(surf->desc_handle.cpu_handle, flags, + depth, stencil, 1, &rect); + + d3d12_batch_reference_surface_texture(d3d12_current_batch(ctx), surf); + + if (!render_condition_enabled && ctx->current_predication) { + ctx->cmdlist->SetPredication( + d3d12_resource_resource(ctx->current_predication), 0, + D3D12_PREDICATION_OP_EQUAL_ZERO); + } +} + +static void +d3d12_clear(struct pipe_context *pctx, + unsigned buffers, + const struct pipe_scissor_state *scissor_state, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + + if (buffers & PIPE_CLEAR_COLOR) { + for (int i = 0; i < ctx->fb.nr_cbufs; ++i) { + if (buffers & (PIPE_CLEAR_COLOR0 << i)) { + struct pipe_surface *psurf = ctx->fb.cbufs[i]; + d3d12_clear_render_target(pctx, psurf, color, + 0, 0, psurf->width, psurf->height, + true); + } + } + } + + if (buffers & PIPE_CLEAR_DEPTHSTENCIL && ctx->fb.zsbuf) { + struct pipe_surface *psurf = ctx->fb.zsbuf; + d3d12_clear_depth_stencil(pctx, psurf, + buffers & PIPE_CLEAR_DEPTHSTENCIL, + depth, stencil, + 0, 0, psurf->width, psurf->height, + true); + } +} + +static void +d3d12_flush(struct pipe_context *pipe, + struct pipe_fence_handle **fence, + unsigned flags) +{ + struct d3d12_context *ctx = d3d12_context(pipe); + struct d3d12_batch *batch = d3d12_current_batch(ctx); + + d3d12_flush_cmdlist(ctx); + + if (fence) + d3d12_fence_reference((struct d3d12_fence **)fence, batch->fence); +} + +static void +d3d12_flush_resource(struct pipe_context *pctx, + struct pipe_resource *pres) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_resource *res = d3d12_resource(pres); + + d3d12_transition_resource_state(ctx, res, + D3D12_RESOURCE_STATE_COMMON); + d3d12_apply_resource_states(ctx); +} + +static void +d3d12_init_null_srvs(struct d3d12_context *ctx) +{ + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + + for (unsigned i = 0; i < RESOURCE_DIMENSION_COUNT; ++i) { + D3D12_SHADER_RESOURCE_VIEW_DESC srv = {}; + + srv.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + switch (i) { + case RESOURCE_DIMENSION_BUFFER: + case RESOURCE_DIMENSION_UNKNOWN: + srv.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srv.Buffer.FirstElement = 0; + srv.Buffer.NumElements = 0; + srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + srv.Buffer.StructureByteStride = 0; + break; + case RESOURCE_DIMENSION_TEXTURE1D: + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + srv.Texture1D.MipLevels = 1; + srv.Texture1D.MostDetailedMip = 0; + srv.Texture1D.ResourceMinLODClamp = 0.0f; + break; + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + srv.Texture1DArray.MipLevels = 1; + srv.Texture1DArray.ArraySize = 1; + srv.Texture1DArray.MostDetailedMip = 0; + srv.Texture1DArray.FirstArraySlice = 0; + srv.Texture1DArray.ResourceMinLODClamp = 0.0f; + break; + case RESOURCE_DIMENSION_TEXTURE2D: + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srv.Texture2D.MipLevels = 1; + srv.Texture2D.MostDetailedMip = 0; + srv.Texture2D.PlaneSlice = 0; + srv.Texture2D.ResourceMinLODClamp = 0.0f; + break; + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + srv.Texture2DArray.MipLevels = 1; + srv.Texture2DArray.ArraySize = 1; + srv.Texture2DArray.MostDetailedMip = 0; + srv.Texture2DArray.FirstArraySlice = 0; + srv.Texture2DArray.PlaneSlice = 0; + srv.Texture2DArray.ResourceMinLODClamp = 0.0f; + break; + case RESOURCE_DIMENSION_TEXTURE2DMS: + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; + break; + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + srv.Texture2DMSArray.ArraySize = 1; + srv.Texture2DMSArray.FirstArraySlice = 0; + break; + case RESOURCE_DIMENSION_TEXTURE3D: + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + srv.Texture3D.MipLevels = 1; + srv.Texture3D.MostDetailedMip = 0; + srv.Texture3D.ResourceMinLODClamp = 0.0f; + break; + case RESOURCE_DIMENSION_TEXTURECUBE: + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + srv.TextureCube.MipLevels = 1; + srv.TextureCube.MostDetailedMip = 0; + srv.TextureCube.ResourceMinLODClamp = 0.0f; + break; + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + srv.TextureCubeArray.MipLevels = 1; + srv.TextureCubeArray.NumCubes = 1; + srv.TextureCubeArray.MostDetailedMip = 0; + srv.TextureCubeArray.First2DArrayFace = 0; + srv.TextureCubeArray.ResourceMinLODClamp = 0.0f; + break; + } + + if (srv.ViewDimension != D3D12_SRV_DIMENSION_UNKNOWN) + { + d3d12_descriptor_pool_alloc_handle(ctx->view_pool, &ctx->null_srvs[i]); + screen->dev->CreateShaderResourceView(NULL, &srv, ctx->null_srvs[i].cpu_handle); + } + } +} + +static void +d3d12_init_null_rtv(struct d3d12_context *ctx) +{ + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + + D3D12_RENDER_TARGET_VIEW_DESC rtv = {}; + rtv.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + rtv.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rtv.Texture2D.MipSlice = 0; + rtv.Texture2D.PlaneSlice = 0; + d3d12_descriptor_pool_alloc_handle(ctx->rtv_pool, &ctx->null_rtv); + screen->dev->CreateRenderTargetView(NULL, &rtv, ctx->null_rtv.cpu_handle); +} + +static void +d3d12_init_null_sampler(struct d3d12_context *ctx) +{ + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + + d3d12_descriptor_pool_alloc_handle(ctx->sampler_pool, &ctx->null_sampler); + + D3D12_SAMPLER_DESC desc; + desc.Filter = D3D12_FILTER_ANISOTROPIC; + desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + desc.MipLODBias = 0.0f; + desc.MaxAnisotropy = 0; + desc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + desc.MinLOD = 0.0f; + desc.MaxLOD = 0.0f; + memset(desc.BorderColor, 0, sizeof(desc.BorderColor)); + screen->dev->CreateSampler(&desc, ctx->null_sampler.cpu_handle); +} + +static uint64_t +d3d12_get_timestamp(struct pipe_context *pctx) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_screen *screen = d3d12_screen(pctx->screen); + + if (!ctx->timestamp_query) + ctx->timestamp_query = pctx->create_query(pctx, PIPE_QUERY_TIMESTAMP, 0); + + pipe_query_result result; + pctx->end_query(pctx, ctx->timestamp_query); + pctx->get_query_result(pctx, ctx->timestamp_query, true, &result); + return result.u64; +} + +struct pipe_context * +d3d12_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) +{ + struct d3d12_screen *screen = d3d12_screen(pscreen); + + struct d3d12_context *ctx = CALLOC_STRUCT(d3d12_context); + if (!ctx) + return NULL; + + ctx->base.screen = pscreen; + ctx->base.priv = priv; + + ctx->base.destroy = d3d12_context_destroy; + + ctx->base.create_vertex_elements_state = d3d12_create_vertex_elements_state; + ctx->base.bind_vertex_elements_state = d3d12_bind_vertex_elements_state; + ctx->base.delete_vertex_elements_state = d3d12_delete_vertex_elements_state; + + ctx->base.create_blend_state = d3d12_create_blend_state; + ctx->base.bind_blend_state = d3d12_bind_blend_state; + ctx->base.delete_blend_state = d3d12_delete_blend_state; + + ctx->base.create_depth_stencil_alpha_state = d3d12_create_depth_stencil_alpha_state; + ctx->base.bind_depth_stencil_alpha_state = d3d12_bind_depth_stencil_alpha_state; + ctx->base.delete_depth_stencil_alpha_state = d3d12_delete_depth_stencil_alpha_state; + + ctx->base.create_rasterizer_state = d3d12_create_rasterizer_state; + ctx->base.bind_rasterizer_state = d3d12_bind_rasterizer_state; + ctx->base.delete_rasterizer_state = d3d12_delete_rasterizer_state; + + ctx->base.create_sampler_state = d3d12_create_sampler_state; + ctx->base.bind_sampler_states = d3d12_bind_sampler_states; + ctx->base.delete_sampler_state = d3d12_delete_sampler_state; + + ctx->base.create_sampler_view = d3d12_create_sampler_view; + ctx->base.set_sampler_views = d3d12_set_sampler_views; + ctx->base.sampler_view_destroy = d3d12_destroy_sampler_view; + + ctx->base.create_vs_state = d3d12_create_vs_state; + ctx->base.bind_vs_state = d3d12_bind_vs_state; + ctx->base.delete_vs_state = d3d12_delete_vs_state; + + ctx->base.create_fs_state = d3d12_create_fs_state; + ctx->base.bind_fs_state = d3d12_bind_fs_state; + ctx->base.delete_fs_state = d3d12_delete_fs_state; + + ctx->base.create_gs_state = d3d12_create_gs_state; + ctx->base.bind_gs_state = d3d12_bind_gs_state; + ctx->base.delete_gs_state = d3d12_delete_gs_state; + + ctx->base.set_polygon_stipple = d3d12_set_polygon_stipple; + ctx->base.set_vertex_buffers = d3d12_set_vertex_buffers; + ctx->base.set_viewport_states = d3d12_set_viewport_states; + ctx->base.set_scissor_states = d3d12_set_scissor_states; + ctx->base.set_constant_buffer = d3d12_set_constant_buffer; + ctx->base.set_framebuffer_state = d3d12_set_framebuffer_state; + ctx->base.set_clip_state = d3d12_set_clip_state; + ctx->base.set_blend_color = d3d12_set_blend_color; + ctx->base.set_sample_mask = d3d12_set_sample_mask; + ctx->base.set_stencil_ref = d3d12_set_stencil_ref; + + ctx->base.create_stream_output_target = d3d12_create_stream_output_target; + ctx->base.stream_output_target_destroy = d3d12_stream_output_target_destroy; + ctx->base.set_stream_output_targets = d3d12_set_stream_output_targets; + + ctx->base.get_timestamp = d3d12_get_timestamp; + + ctx->base.clear = d3d12_clear; + ctx->base.clear_render_target = d3d12_clear_render_target; + ctx->base.clear_depth_stencil = d3d12_clear_depth_stencil; + ctx->base.draw_vbo = d3d12_draw_vbo; + ctx->base.flush = d3d12_flush; + ctx->base.flush_resource = d3d12_flush_resource; + + ctx->gfx_pipeline_state.sample_mask = ~0; + + d3d12_context_surface_init(&ctx->base); + d3d12_context_resource_init(&ctx->base); + d3d12_context_query_init(&ctx->base); + d3d12_context_blit_init(&ctx->base); + + + slab_create_child(&ctx->transfer_pool, &d3d12_screen(pscreen)->transfer_pool); + + ctx->base.stream_uploader = u_upload_create_default(&ctx->base); + ctx->base.const_uploader = u_upload_create_default(&ctx->base); + ctx->so_allocator = u_suballocator_create(&ctx->base, 4096, 0, + PIPE_USAGE_DEFAULT, + 0, true); + + struct primconvert_config cfg; + cfg.primtypes_mask = 1 << PIPE_PRIM_POINTS | + 1 << PIPE_PRIM_LINES | + 1 << PIPE_PRIM_LINE_STRIP | + 1 << PIPE_PRIM_TRIANGLES | + 1 << PIPE_PRIM_TRIANGLE_STRIP; + cfg.fixed_prim_restart = true; + ctx->primconvert = util_primconvert_create_config(&ctx->base, &cfg); + if (!ctx->primconvert) { + debug_printf("D3D12: failed to create primconvert\n"); + return NULL; + } + + d3d12_gfx_pipeline_state_cache_init(ctx); + d3d12_root_signature_cache_init(ctx); + d3d12_gs_variant_cache_init(ctx); + + HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL"); + if (!hD3D12Mod) { + debug_printf("D3D12: failed to load D3D12.DLL\n"); + return NULL; + } + ctx->D3D12SerializeVersionedRootSignature = + (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)GetProcAddress(hD3D12Mod, "D3D12SerializeVersionedRootSignature"); + + if (FAILED(screen->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE, + __uuidof(ctx->cmdqueue_fence), + (void **)&ctx->cmdqueue_fence))) { + FREE(ctx); + return NULL; + } + + for (int i = 0; i < ARRAY_SIZE(ctx->batches); ++i) { + if (!d3d12_init_batch(ctx, &ctx->batches[i])) { + FREE(ctx); + return NULL; + } + } + d3d12_start_batch(ctx, &ctx->batches[0]); + + ctx->rtv_pool = d3d12_descriptor_pool_new(&ctx->base, + D3D12_DESCRIPTOR_HEAP_TYPE_RTV, + 64); + if (!ctx->rtv_pool) { + FREE(ctx); + return NULL; + } + + ctx->dsv_pool = d3d12_descriptor_pool_new(&ctx->base, + D3D12_DESCRIPTOR_HEAP_TYPE_DSV, + 64); + if (!ctx->dsv_pool) { + FREE(ctx); + return NULL; + } + + ctx->sampler_pool = d3d12_descriptor_pool_new(&ctx->base, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + 64); + if (!ctx->sampler_pool) { + FREE(ctx); + return NULL; + } + + ctx->view_pool = d3d12_descriptor_pool_new(&ctx->base, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + 1024); + if (!ctx->view_pool) { + debug_printf("D3D12: failed to create CBV/SRV descriptor pool\n"); + FREE(ctx); + return NULL; + } + + d3d12_init_null_srvs(ctx); + d3d12_init_null_rtv(ctx); + d3d12_init_null_sampler(ctx); + + ctx->validation_tools = d3d12_validator_create(); + + ctx->blitter = util_blitter_create(&ctx->base); + if (!ctx->blitter) + return NULL; + + ctx->resource_state_manager = new ResourceStateManager(); + + if (!d3d12_init_polygon_stipple(&ctx->base)) { + debug_printf("D3D12: failed to initialize polygon stipple resources\n"); + FREE(ctx); + return NULL; + } + + return &ctx->base; +} + +bool +d3d12_need_zero_one_depth_range(struct d3d12_context *ctx) +{ + struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; + + /** + * OpenGL Compatibility spec, section 15.2.3 (Shader Outputs) says + * the following: + * + * For fixed-point depth buffers, the final fragment depth written by + * a fragment shader is first clamped to [0, 1] and then converted to + * fixed-point as if it were a window z value (see section 13.8.1). + * For floating-point depth buffers, conversion is not performed but + * clamping is. Note that the depth range computation is not applied + * here, only the conversion to fixed-point. + * + * However, the D3D11.3 Functional Spec, section 17.10 (Depth Clamp) says + * the following: + * + * Depth values that reach the Output Merger, whether coming from + * interpolation or from Pixel Shader output (replacing the + * interpolated z), are always clamped: + * z = min(Viewport.MaxDepth,max(Viewport.MinDepth,z)) + * following the D3D11 Floating Point Rules(3.1) for min/max. + * + * This means that we can't always use the fixed-function viewport-mapping + * D3D provides. + * + * There's only one case where the difference matters: When the fragment + * shader writes a non-implicit value to gl_FragDepth. In all other + * cases, the fragment either shouldn't have been rasterized in the + * first place, or the implicit gl_FragCoord.z-value should already have + * been clamped to the depth-range. + * + * For simplicity, let's assume that an explicitly written frag-result + * doesn't simply forward the value of gl_FragCoord.z. If it does, we'll + * end up generating needless code, but the result will be correct. + */ + + return fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH); +} diff --git a/src/gallium/drivers/d3d12/d3d12_context.h b/src/gallium/drivers/d3d12/d3d12_context.h new file mode 100644 index 00000000000..b805218a949 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_context.h @@ -0,0 +1,334 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_CONTEXT_H +#define D3D12_CONTEXT_H + +#include "d3d12_batch.h" +#include "d3d12_descriptor_pool.h" +#include "d3d12_pipeline_state.h" +#include "d3d12_nir_lower_texcmp.h" + +#include "dxil_nir_lower_int_samplers.h" + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/list.h" +#include "util/slab.h" +#include "util/u_suballoc.h" + +#include + +#define D3D12_GFX_SHADER_STAGES (PIPE_SHADER_TYPES - 1) +#define D3D12_MAX_POINT_SIZE 255.0f + +enum d3d12_dirty_flags +{ + D3D12_DIRTY_NONE = 0, + D3D12_DIRTY_BLEND = (1 << 0), + D3D12_DIRTY_RASTERIZER = (1 << 1), + D3D12_DIRTY_ZSA = (1 << 2), + D3D12_DIRTY_VERTEX_ELEMENTS = (1 << 3), + D3D12_DIRTY_BLEND_COLOR = (1 << 4), + D3D12_DIRTY_STENCIL_REF = (1 << 5), + D3D12_DIRTY_SAMPLE_MASK = (1 << 6), + D3D12_DIRTY_VIEWPORT = (1 << 7), + D3D12_DIRTY_FRAMEBUFFER = (1 << 8), + D3D12_DIRTY_SCISSOR = (1 << 9), + D3D12_DIRTY_VERTEX_BUFFERS = (1 << 10), + D3D12_DIRTY_INDEX_BUFFER = (1 << 11), + D3D12_DIRTY_PRIM_MODE = (1 << 12), + D3D12_DIRTY_SHADER = (1 << 13), + D3D12_DIRTY_ROOT_SIGNATURE = (1 << 14), + D3D12_DIRTY_STREAM_OUTPUT = (1 << 15), + D3D12_DIRTY_STRIP_CUT_VALUE = (1 << 16), +}; + +enum d3d12_shader_dirty_flags +{ + D3D12_SHADER_DIRTY_CONSTBUF = (1 << 0), + D3D12_SHADER_DIRTY_SAMPLER_VIEWS = (1 << 1), + D3D12_SHADER_DIRTY_SAMPLERS = (1 << 2), +}; + +#define D3D12_DIRTY_PSO (D3D12_DIRTY_BLEND | D3D12_DIRTY_RASTERIZER | D3D12_DIRTY_ZSA | \ + D3D12_DIRTY_FRAMEBUFFER | D3D12_DIRTY_SAMPLE_MASK | \ + D3D12_DIRTY_VERTEX_ELEMENTS | D3D12_DIRTY_PRIM_MODE | \ + D3D12_DIRTY_SHADER | D3D12_DIRTY_ROOT_SIGNATURE | \ + D3D12_DIRTY_STRIP_CUT_VALUE) + +#define D3D12_SHADER_DIRTY_ALL (D3D12_SHADER_DIRTY_CONSTBUF | D3D12_SHADER_DIRTY_SAMPLER_VIEWS | \ + D3D12_SHADER_DIRTY_SAMPLERS) + +enum d3d12_binding_type { + D3D12_BINDING_CONSTANT_BUFFER, + D3D12_BINDING_SHADER_RESOURCE_VIEW, + D3D12_BINDING_SAMPLER, + D3D12_BINDING_STATE_VARS, + D3D12_NUM_BINDING_TYPES +}; + +enum d3d12_state_var { + D3D12_STATE_VAR_Y_FLIP = 0, + D3D12_STATE_VAR_PT_SPRITE, + D3D12_STATE_VAR_FIRST_VERTEX, + D3D12_STATE_VAR_DEPTH_TRANSFORM, + D3D12_MAX_STATE_VARS +}; + +enum resource_dimension +{ + RESOURCE_DIMENSION_UNKNOWN = 0, + RESOURCE_DIMENSION_BUFFER = 1, + RESOURCE_DIMENSION_TEXTURE1D = 2, + RESOURCE_DIMENSION_TEXTURE2D = 3, + RESOURCE_DIMENSION_TEXTURE2DMS = 4, + RESOURCE_DIMENSION_TEXTURE3D = 5, + RESOURCE_DIMENSION_TEXTURECUBE = 6, + RESOURCE_DIMENSION_TEXTURE1DARRAY = 7, + RESOURCE_DIMENSION_TEXTURE2DARRAY = 8, + RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9, + RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10, + RESOURCE_DIMENSION_COUNT +}; + +struct d3d12_sampler_state { + struct d3d12_descriptor_handle handle, handle_without_shadow; + bool is_integer_texture; + bool is_shadow_sampler; + enum pipe_tex_wrap wrap_r; + enum pipe_tex_wrap wrap_s; + enum pipe_tex_wrap wrap_t; + enum pipe_tex_filter filter; + float lod_bias; + float min_lod, max_lod; + float border_color[4]; + enum pipe_compare_func compare_func; +}; + +enum d3d12_blend_factor_flags { + D3D12_BLEND_FACTOR_NONE = 0, + D3D12_BLEND_FACTOR_COLOR = 1 << 0, + D3D12_BLEND_FACTOR_ALPHA = 1 << 1, + D3D12_BLEND_FACTOR_ANY = 1 << 2, +}; + +struct d3d12_sampler_view { + struct pipe_sampler_view base; + struct d3d12_descriptor_handle handle; + unsigned mip_levels; + unsigned array_size; + unsigned swizzle_override_r:3; /**< PIPE_SWIZZLE_x for red component */ + unsigned swizzle_override_g:3; /**< PIPE_SWIZZLE_x for green component */ + unsigned swizzle_override_b:3; /**< PIPE_SWIZZLE_x for blue component */ + unsigned swizzle_override_a:3; /**< PIPE_SWIZZLE_x for alpha component */ +}; + +static inline struct d3d12_sampler_view * +d3d12_sampler_view(struct pipe_sampler_view *pview) +{ + return (struct d3d12_sampler_view *)pview; +} + +struct d3d12_stream_output_target { + struct pipe_stream_output_target base; + struct pipe_resource *fill_buffer; + unsigned fill_buffer_offset; + uint64_t cached_filled_size; +}; + +struct d3d12_shader_state { + struct d3d12_shader *current; + unsigned state_dirty; +}; + +struct blitter_context; +struct primconvert_context; +struct d3d12_validation_tools; + +#ifdef __cplusplus +class ResourceStateManager; +#endif + +struct d3d12_context { + struct pipe_context base; + struct slab_child_pool transfer_pool; + struct primconvert_context *primconvert; + struct blitter_context *blitter; + struct u_suballocator *query_allocator; + struct u_suballocator *so_allocator; + struct hash_table *pso_cache; + struct hash_table *root_signature_cache; + struct hash_table *gs_variant_cache; + + struct d3d12_batch batches[4]; + unsigned current_batch_idx; + + struct pipe_constant_buffer cbufs[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; + struct pipe_framebuffer_state fb; + struct pipe_vertex_buffer vbs[PIPE_MAX_ATTRIBS]; + D3D12_VERTEX_BUFFER_VIEW vbvs[PIPE_MAX_ATTRIBS]; + unsigned num_vbs; + float flip_y; + bool need_zero_one_depth_range; + enum pipe_prim_type initial_api_prim; + struct pipe_viewport_state viewport_states[PIPE_MAX_VIEWPORTS]; + D3D12_VIEWPORT viewports[PIPE_MAX_VIEWPORTS]; + unsigned num_viewports; + struct pipe_scissor_state scissor_states[PIPE_MAX_VIEWPORTS]; + D3D12_RECT scissors[PIPE_MAX_VIEWPORTS]; + float blend_factor[4]; + struct pipe_stencil_ref stencil_ref; + struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; + unsigned num_sampler_views[PIPE_SHADER_TYPES]; + unsigned has_int_samplers; + struct d3d12_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; + unsigned num_samplers[PIPE_SHADER_TYPES]; + D3D12_INDEX_BUFFER_VIEW ibv; + dxil_wrap_sampler_state tex_wrap_states[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; + dxil_texture_swizzle_state tex_swizzle_state[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; + enum compare_func tex_compare_func[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; + + struct { + bool enabled; + uint32_t pattern[32]; + struct pipe_resource *texture; + struct pipe_sampler_view *sampler_view; + struct d3d12_sampler_state *sampler_cso; + } pstipple; + + struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_BUFFERS]; + D3D12_STREAM_OUTPUT_BUFFER_VIEW so_buffer_views[PIPE_MAX_SO_BUFFERS]; + struct pipe_stream_output_target *fake_so_targets[PIPE_MAX_SO_BUFFERS]; + D3D12_STREAM_OUTPUT_BUFFER_VIEW fake_so_buffer_views[PIPE_MAX_SO_BUFFERS]; + unsigned fake_so_buffer_factor; + + struct d3d12_shader_selector *gfx_stages[D3D12_GFX_SHADER_STAGES]; + + struct d3d12_gfx_pipeline_state gfx_pipeline_state; + unsigned shader_dirty[D3D12_GFX_SHADER_STAGES]; + unsigned state_dirty; + unsigned cmdlist_dirty; + ID3D12PipelineState *current_pso; + bool reverse_depth_range; + + ID3D12Fence *cmdqueue_fence; + uint64_t fence_value; + ID3D12GraphicsCommandList *cmdlist; + + struct list_head active_queries; + bool queries_disabled; + + struct d3d12_descriptor_pool *rtv_pool; + struct d3d12_descriptor_pool *dsv_pool; + struct d3d12_descriptor_pool *sampler_pool; + struct d3d12_descriptor_pool *view_pool; + + struct d3d12_descriptor_handle null_srvs[RESOURCE_DIMENSION_COUNT]; + struct d3d12_descriptor_handle null_rtv; + struct d3d12_descriptor_handle null_sampler; + + PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE D3D12SerializeVersionedRootSignature; + struct d3d12_validation_tools *validation_tools; + + struct d3d12_resource *current_predication; + +#ifdef __cplusplus + ResourceStateManager *resource_state_manager; +#else + void *resource_state_manager; /* opaque pointer; we don't know about classes in C */ +#endif + struct pipe_query *timestamp_query; + + void *stencil_resolve_vs, *stencil_resolve_fs, *sampler_state; /* used by d3d12_blit.cpp */ +}; + +static inline struct d3d12_context * +d3d12_context(struct pipe_context *context) +{ + return (struct d3d12_context *)context; +} + +static inline struct d3d12_batch * +d3d12_current_batch(struct d3d12_context *ctx) +{ + assert(ctx->current_batch_idx < ARRAY_SIZE(ctx->batches)); + return ctx->batches + ctx->current_batch_idx; +} + +#define d3d12_foreach_submitted_batch(ctx, batch) \ + unsigned oldest = (ctx->current_batch_idx + 1) % ARRAY_SIZE(ctx->batches); \ + while (ctx->batches[oldest].fence == NULL && oldest != ctx->current_batch_idx) \ + oldest = (oldest + 1) % ARRAY_SIZE(ctx->batches); \ + struct d3d12_batch *batch = &ctx->batches[oldest]; \ + for (; oldest != ctx->current_batch_idx; \ + oldest = (oldest + 1) % ARRAY_SIZE(ctx->batches), \ + batch = &ctx->batches[oldest]) + +struct pipe_context * +d3d12_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); + +bool +d3d12_enable_fake_so_buffers(struct d3d12_context *ctx, unsigned factor); + +bool +d3d12_disable_fake_so_buffers(struct d3d12_context *ctx); + +void +d3d12_flush_cmdlist(struct d3d12_context *ctx); + +void +d3d12_flush_cmdlist_and_wait(struct d3d12_context *ctx); + + +void +d3d12_transition_resource_state(struct d3d12_context* ctx, + struct d3d12_resource* res, + D3D12_RESOURCE_STATES state); + +void +d3d12_transition_subresources_state(struct d3d12_context *ctx, + struct d3d12_resource *res, + unsigned start_level, unsigned num_levels, + unsigned start_layer, unsigned num_layers, + unsigned start_plane, unsigned num_planes, + D3D12_RESOURCE_STATES state); + +void +d3d12_apply_resource_states(struct d3d12_context* ctx); + +void +d3d12_draw_vbo(struct pipe_context *pctx, + const struct pipe_draw_info *dinfo); + +void +d3d12_blit(struct pipe_context *pctx, + const struct pipe_blit_info *info); + +void +d3d12_context_query_init(struct pipe_context *pctx); + +bool +d3d12_need_zero_one_depth_range(struct d3d12_context *ctx); + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_debug.h b/src/gallium/drivers/d3d12/d3d12_debug.h new file mode 100644 index 00000000000..f6601bb33d7 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_debug.h @@ -0,0 +1,48 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_DEBUG_H +#define D3D12_DEBUG_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#define D3D12_DEBUG_VERBOSE (1 << 0) +#define D3D12_DEBUG_EXPERIMENTAL (1 << 1) +#define D3D12_DEBUG_DXIL (1 << 2) +#define D3D12_DEBUG_DISASS (1 << 3) +#define D3D12_DEBUG_BLIT (1 << 4) +#define D3D12_DEBUG_RESOURCE (1 << 5) +#define D3D12_DEBUG_DEBUG_LAYER (1 << 6) +#define D3D12_DEBUG_GPU_VALIDATOR (1 << 7) + +extern uint32_t d3d12_debug; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_descriptor_pool.cpp b/src/gallium/drivers/d3d12/d3d12_descriptor_pool.cpp new file mode 100644 index 00000000000..c7a2bc181fa --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_descriptor_pool.cpp @@ -0,0 +1,246 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_context.h" +#include "d3d12_descriptor_pool.h" +#include "d3d12_screen.h" + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "util/list.h" +#include "util/u_dynarray.h" +#include "util/u_memory.h" + +#include + +struct d3d12_descriptor_pool { + ID3D12Device *dev; + D3D12_DESCRIPTOR_HEAP_TYPE type; + uint32_t num_descriptors; + list_head heaps; +}; + +struct d3d12_descriptor_heap { + struct d3d12_descriptor_pool *pool; + + D3D12_DESCRIPTOR_HEAP_DESC desc; + ID3D12Device *dev; + ID3D12DescriptorHeap *heap; + uint32_t desc_size; + uint64_t cpu_base; + uint64_t gpu_base; + uint32_t size; + uint32_t next; + util_dynarray free_list; + list_head link; +}; + +struct d3d12_descriptor_heap* +d3d12_descriptor_heap_new(ID3D12Device *dev, + D3D12_DESCRIPTOR_HEAP_TYPE type, + D3D12_DESCRIPTOR_HEAP_FLAGS flags, + uint32_t num_descriptors) +{ + struct d3d12_descriptor_heap *heap = CALLOC_STRUCT(d3d12_descriptor_heap); + + heap->desc.NumDescriptors = num_descriptors; + heap->desc.Type = type; + heap->desc.Flags = flags; + if (FAILED(dev->CreateDescriptorHeap(&heap->desc, + __uuidof(heap->heap), + (void **)&heap->heap))) { + FREE(heap); + return NULL; + } + + heap->dev = dev; + heap->desc_size = dev->GetDescriptorHandleIncrementSize(type); + heap->size = num_descriptors * heap->desc_size; + heap->cpu_base = heap->heap->GetCPUDescriptorHandleForHeapStart().ptr; + if (flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) + heap->gpu_base = heap->heap->GetGPUDescriptorHandleForHeapStart().ptr; + util_dynarray_init(&heap->free_list, NULL); + + return heap; +} + +void +d3d12_descriptor_heap_free(struct d3d12_descriptor_heap *heap) +{ + heap->heap->Release(); + util_dynarray_fini(&heap->free_list); + FREE(heap); +} + +ID3D12DescriptorHeap* +d3d12_descriptor_heap_get(struct d3d12_descriptor_heap *heap) +{ + return heap->heap; +} + +static uint32_t +d3d12_descriptor_heap_is_online(struct d3d12_descriptor_heap *heap) +{ + return (heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) ? 1 : 0; +} + +static uint32_t +d3d12_descriptor_heap_can_allocate(struct d3d12_descriptor_heap *heap) +{ + return (heap->free_list.size > 0 || + heap->size >= heap->next + heap->desc_size); +} + +uint32_t +d3d12_descriptor_heap_get_remaining_handles(struct d3d12_descriptor_heap *heap) +{ + return (heap->size - heap->next) / heap->desc_size; +} + +void +d2d12_descriptor_heap_get_next_handle(struct d3d12_descriptor_heap *heap, + struct d3d12_descriptor_handle *handle) +{ + handle->heap = heap; + handle->cpu_handle.ptr = heap->cpu_base + heap->next; + handle->gpu_handle.ptr = d3d12_descriptor_heap_is_online(heap) ? + heap->gpu_base + heap->next : 0; +} + +uint32_t +d3d12_descriptor_heap_alloc_handle(struct d3d12_descriptor_heap *heap, + struct d3d12_descriptor_handle *handle) +{ + uint32_t offset = 0; + + assert(handle != NULL); + + if (heap->free_list.size > 0) { + offset = util_dynarray_pop(&heap->free_list, uint32_t); + } else if (heap->size >= heap->next + heap->desc_size) { + offset = heap->next; + heap->next += heap->desc_size; + } else { + /* Todo: we should add a new descriptor heap to get more handles */ + assert(0 && "No handles available in descriptor heap"); + return 0; + } + + handle->heap = heap; + handle->cpu_handle.ptr = heap->cpu_base + offset; + handle->gpu_handle.ptr = d3d12_descriptor_heap_is_online(heap) ? + heap->gpu_base + offset : 0; + + return 1; +} + +void +d3d12_descriptor_handle_free(struct d3d12_descriptor_handle *handle) +{ + const uint32_t index = handle->cpu_handle.ptr - handle->heap->cpu_base; + if (index + handle->heap->desc_size == handle->heap->next) { + handle->heap->next = index; + } else { + util_dynarray_append(&handle->heap->free_list, uint32_t, index); + } + + handle->heap = NULL; + handle->cpu_handle.ptr = 0; + handle->gpu_handle.ptr = 0; +} + +void +d3d12_descriptor_heap_append_handles(struct d3d12_descriptor_heap *heap, + D3D12_CPU_DESCRIPTOR_HANDLE *handles, + unsigned num_handles) +{ + D3D12_CPU_DESCRIPTOR_HANDLE dst; + + assert(heap->next + (num_handles * heap->desc_size) <= heap->size); + dst.ptr = heap->cpu_base + heap->next; + heap->dev->CopyDescriptors(1, &dst, &num_handles, + num_handles, handles, NULL, + heap->desc.Type); + heap->next += num_handles * heap->desc_size; +} + +void +d3d12_descriptor_heap_clear(struct d3d12_descriptor_heap *heap) +{ + heap->next = 0; + util_dynarray_clear(&heap->free_list); +} + +struct d3d12_descriptor_pool* +d3d12_descriptor_pool_new(pipe_context *pctx, + D3D12_DESCRIPTOR_HEAP_TYPE type, + uint32_t num_descriptors) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + + struct d3d12_descriptor_pool *pool = CALLOC_STRUCT(d3d12_descriptor_pool); + if (!pool) + return NULL; + + pool->dev = d3d12_screen(pctx->screen)->dev; + pool->type = type; + pool->num_descriptors = num_descriptors; + list_inithead(&pool->heaps); + + return pool; +} + +void +d3d12_descriptor_pool_free(struct d3d12_descriptor_pool *pool) +{ + list_for_each_entry_safe(struct d3d12_descriptor_heap, heap, &pool->heaps, link) { + list_del(&heap->link); + d3d12_descriptor_heap_free(heap); + } + FREE(pool); +} + +uint32_t +d3d12_descriptor_pool_alloc_handle(struct d3d12_descriptor_pool *pool, + struct d3d12_descriptor_handle *handle) +{ + struct d3d12_descriptor_heap *valid_heap = NULL; + + list_for_each_entry(struct d3d12_descriptor_heap, heap, &pool->heaps, link) { + if (d3d12_descriptor_heap_can_allocate(heap)) { + valid_heap = heap; + break; + } + } + + if (!valid_heap) { + valid_heap = d3d12_descriptor_heap_new(pool->dev, + pool->type, + D3D12_DESCRIPTOR_HEAP_FLAG_NONE, + pool->num_descriptors); + list_addtail(&valid_heap->link, &pool->heaps); + } + + return d3d12_descriptor_heap_alloc_handle(valid_heap, handle); +} diff --git a/src/gallium/drivers/d3d12/d3d12_descriptor_pool.h b/src/gallium/drivers/d3d12/d3d12_descriptor_pool.h new file mode 100644 index 00000000000..324e55a3e4d --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_descriptor_pool.h @@ -0,0 +1,98 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_DESCRIPTOR_POOL_H +#define D3D12_DESCRIPTOR_POOL_H + +#include "pipe/p_context.h" + +#define D3D12_IGNORE_SDK_LAYERS +#include + +struct d3d12_descriptor_pool; +struct d3d12_descriptor_heap; + +struct d3d12_descriptor_handle { + D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle; + struct d3d12_descriptor_heap *heap; +}; + +inline bool +d3d12_descriptor_handle_is_allocated(struct d3d12_descriptor_handle *handle) +{ + return (handle->heap != NULL); +} + +void +d3d12_descriptor_handle_free(struct d3d12_descriptor_handle *handle); + +/* Offline Descriptor Pool */ + +struct d3d12_descriptor_pool* +d3d12_descriptor_pool_new(struct pipe_context *pctx, + D3D12_DESCRIPTOR_HEAP_TYPE type, + uint32_t num_descriptors); + +void +d3d12_descriptor_pool_free(struct d3d12_descriptor_pool *pool); + +uint32_t +d3d12_descriptor_pool_alloc_handle(struct d3d12_descriptor_pool *pool, + struct d3d12_descriptor_handle *handle); + + +/* Online/Offline Descriptor Heaps */ + +struct d3d12_descriptor_heap* +d3d12_descriptor_heap_new(ID3D12Device *device, + D3D12_DESCRIPTOR_HEAP_TYPE type, + D3D12_DESCRIPTOR_HEAP_FLAGS flags, + uint32_t num_descriptors); + +void +d3d12_descriptor_heap_free(struct d3d12_descriptor_heap *heap); + +ID3D12DescriptorHeap* +d3d12_descriptor_heap_get(struct d3d12_descriptor_heap *heap); + +void +d2d12_descriptor_heap_get_next_handle(struct d3d12_descriptor_heap *heap, + struct d3d12_descriptor_handle *handle); + +uint32_t +d3d12_descriptor_heap_get_remaining_handles(struct d3d12_descriptor_heap *heap); + +uint32_t +d3d12_descriptor_heap_alloc_handle(struct d3d12_descriptor_heap *heap, + struct d3d12_descriptor_handle *handle); + +void +d3d12_descriptor_heap_append_handles(struct d3d12_descriptor_heap *heap, + D3D12_CPU_DESCRIPTOR_HANDLE *handles, + unsigned num_handles); + +void +d3d12_descriptor_heap_clear(struct d3d12_descriptor_heap *heap); + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_draw.cpp b/src/gallium/drivers/d3d12/d3d12_draw.cpp new file mode 100644 index 00000000000..44a5f01437b --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_draw.cpp @@ -0,0 +1,723 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_compiler.h" +#include "d3d12_context.h" +#include "d3d12_format.h" +#include "d3d12_query.h" +#include "d3d12_resource.h" +#include "d3d12_root_signature.h" +#include "d3d12_screen.h" +#include "d3d12_surface.h" + +#include "util/u_debug.h" +#include "util/u_helpers.h" +#include "util/u_inlines.h" +#include "util/u_prim.h" +#include "util/u_prim_restart.h" +#include "util/u_math.h" + +extern "C" { +#include "indices/u_primconvert.h" +} + +static const D3D12_RECT MAX_SCISSOR = { D3D12_VIEWPORT_BOUNDS_MIN, + D3D12_VIEWPORT_BOUNDS_MIN, + D3D12_VIEWPORT_BOUNDS_MAX, + D3D12_VIEWPORT_BOUNDS_MAX }; + +static D3D12_GPU_DESCRIPTOR_HANDLE +fill_cbv_descriptors(struct d3d12_context *ctx, + struct d3d12_shader *shader, + int stage) +{ + struct d3d12_batch *batch = d3d12_current_batch(ctx); + struct d3d12_descriptor_handle table_start; + d2d12_descriptor_heap_get_next_handle(batch->view_heap, &table_start); + + for (unsigned i = 0; i < shader->num_cb_bindings; i++) { + unsigned binding = shader->cb_bindings[i].binding; + struct pipe_constant_buffer *buffer = &ctx->cbufs[stage][binding]; + + assert(buffer->buffer_size > 0); + assert(buffer->buffer); + + struct d3d12_resource *res = d3d12_resource(buffer->buffer); + d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); + D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = {}; + cbv_desc.BufferLocation = d3d12_resource_gpu_virtual_address(res) + buffer->buffer_offset; + cbv_desc.SizeInBytes = min(D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16, + align(buffer->buffer_size, 256)); + d3d12_batch_reference_resource(batch, res); + + struct d3d12_descriptor_handle handle; + d3d12_descriptor_heap_alloc_handle(batch->view_heap, &handle); + d3d12_screen(ctx->base.screen)->dev->CreateConstantBufferView(&cbv_desc, handle.cpu_handle); + } + + return table_start.gpu_handle; +} + +static D3D12_GPU_DESCRIPTOR_HANDLE +fill_srv_descriptors(struct d3d12_context *ctx, + struct d3d12_shader *shader, + unsigned stage) +{ + struct d3d12_batch *batch = d3d12_current_batch(ctx); + D3D12_CPU_DESCRIPTOR_HANDLE descs[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + struct d3d12_descriptor_handle table_start; + + d2d12_descriptor_heap_get_next_handle(batch->view_heap, &table_start); + + for (int i = 0; i < shader->num_srv_bindings; i++) + { + struct d3d12_sampler_view *view; + + if (shader->srv_bindings[i].binding == shader->pstipple_binding) { + view = (struct d3d12_sampler_view*)ctx->pstipple.sampler_view; + } else { + int index = shader->srv_bindings[i].index; + view = (struct d3d12_sampler_view*)ctx->sampler_views[stage][index]; + } + + if (view != NULL) { + descs[i] = view->handle.cpu_handle ; + d3d12_batch_reference_sampler_view(batch, view); + + D3D12_RESOURCE_STATES state = (stage == PIPE_SHADER_FRAGMENT) ? + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE : + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + if (view->base.texture->target == PIPE_BUFFER) { + d3d12_transition_resource_state(ctx, d3d12_resource(view->base.texture), + state); + } else { + d3d12_transition_subresources_state(ctx, d3d12_resource(view->base.texture), + view->base.u.tex.first_level, view->mip_levels, + view->base.u.tex.first_layer, view->array_size, + 0, d3d12_get_format_num_planes(view->base.format), + state); + } + } else { + descs[i] = ctx->null_srvs[shader->srv_bindings[i].dimension].cpu_handle; + } + } + + d3d12_descriptor_heap_append_handles(batch->view_heap, descs, shader->num_srv_bindings); + + return table_start.gpu_handle; +} + +static D3D12_GPU_DESCRIPTOR_HANDLE +fill_sampler_descriptors(struct d3d12_context *ctx, + const struct d3d12_shader_selector *shader_sel, + unsigned stage) +{ + const struct d3d12_shader *shader = shader_sel->current; + struct d3d12_batch *batch = d3d12_current_batch(ctx); + D3D12_CPU_DESCRIPTOR_HANDLE descs[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + struct d3d12_descriptor_handle table_start; + + d2d12_descriptor_heap_get_next_handle(batch->sampler_heap, &table_start); + + for (int i = 0; i < shader->num_srv_bindings; i++) + { + struct d3d12_sampler_state *sampler; + + if (shader->srv_bindings[i].binding == shader->pstipple_binding) { + sampler = ctx->pstipple.sampler_cso; + } else { + int index = shader->srv_bindings[i].index; + sampler = ctx->samplers[stage][index]; + } + + if (sampler != NULL) { + if (sampler->is_shadow_sampler && shader_sel->compare_with_lod_bias_grad) + descs[i] = sampler->handle_without_shadow.cpu_handle; + else + descs[i] = sampler->handle.cpu_handle; + } else + descs[i] = ctx->null_sampler.cpu_handle; + } + + d3d12_descriptor_heap_append_handles(batch->sampler_heap, descs, shader->num_srv_bindings); + return table_start.gpu_handle; +} + +static unsigned +fill_state_vars(struct d3d12_context *ctx, + const struct pipe_draw_info *dinfo, + struct d3d12_shader *shader, + uint32_t *values) +{ + unsigned size = 0; + + for (unsigned j = 0; j < shader->num_state_vars; ++j) { + uint32_t *ptr = values + size; + + switch (shader->state_vars[j].var) { + case D3D12_STATE_VAR_Y_FLIP: + ptr[0] = fui(ctx->flip_y); + size += 4; + break; + case D3D12_STATE_VAR_PT_SPRITE: + ptr[0] = fui(1.0 / ctx->viewports[0].Width); + ptr[1] = fui(1.0 / ctx->viewports[0].Height); + ptr[2] = fui(ctx->gfx_pipeline_state.rast->base.point_size); + ptr[3] = fui(D3D12_MAX_POINT_SIZE); + size += 4; + break; + case D3D12_STATE_VAR_FIRST_VERTEX: + ptr[0] = dinfo->index_size ? dinfo->index_bias : dinfo->start; + size += 4; + break; + case D3D12_STATE_VAR_DEPTH_TRANSFORM: + ptr[0] = fui(2.0f * ctx->viewport_states[0].scale[2]); + ptr[1] = fui(ctx->viewport_states[0].translate[2] - ctx->viewport_states[0].scale[2]); + size += 4; + break; + default: + unreachable("unknown state variable"); + } + } + + return size; +} + +static bool +check_descriptors_left(struct d3d12_context *ctx) +{ + struct d3d12_batch *batch = d3d12_current_batch(ctx); + unsigned needed_descs = 0; + + for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) { + struct d3d12_shader_selector *shader = ctx->gfx_stages[i]; + + if (!shader) + continue; + + needed_descs += shader->current->num_cb_bindings; + needed_descs += shader->current->num_srv_bindings; + } + + if (d3d12_descriptor_heap_get_remaining_handles(batch->view_heap) < needed_descs) + return false; + + needed_descs = 0; + for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) { + struct d3d12_shader_selector *shader = ctx->gfx_stages[i]; + + if (!shader) + continue; + + needed_descs += shader->current->num_srv_bindings; + } + + if (d3d12_descriptor_heap_get_remaining_handles(batch->sampler_heap) < needed_descs) + return false; + + return true; +} + +static void +set_graphics_root_parameters(struct d3d12_context *ctx, + const struct pipe_draw_info *dinfo) +{ + unsigned num_params = 0; + + for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) { + if (!ctx->gfx_stages[i]) + continue; + + struct d3d12_shader_selector *shader_sel = ctx->gfx_stages[i]; + struct d3d12_shader *shader = shader_sel->current; + uint64_t dirty = ctx->shader_dirty[i]; + assert(shader); + + if (shader->num_cb_bindings > 0) { + if (dirty & D3D12_SHADER_DIRTY_CONSTBUF) + ctx->cmdlist->SetGraphicsRootDescriptorTable(num_params, fill_cbv_descriptors(ctx, shader, i)); + num_params++; + } + if (shader->num_srv_bindings > 0) { + if (dirty & D3D12_SHADER_DIRTY_SAMPLER_VIEWS) + ctx->cmdlist->SetGraphicsRootDescriptorTable(num_params, fill_srv_descriptors(ctx, shader, i)); + num_params++; + if (dirty & D3D12_SHADER_DIRTY_SAMPLERS) + ctx->cmdlist->SetGraphicsRootDescriptorTable(num_params, fill_sampler_descriptors(ctx, shader_sel, i)); + num_params++; + } + /* TODO Don't always update state vars */ + if (shader->num_state_vars > 0) { + uint32_t constants[D3D12_MAX_STATE_VARS * 4]; + unsigned size = fill_state_vars(ctx, dinfo, shader, constants); + ctx->cmdlist->SetGraphicsRoot32BitConstants(num_params, size, constants, 0); + num_params++; + } + } +} + +static bool +validate_stream_output_targets(struct d3d12_context *ctx) +{ + unsigned factor = 0; + + if (ctx->gfx_pipeline_state.num_so_targets && + ctx->gfx_pipeline_state.stages[PIPE_SHADER_GEOMETRY]) + factor = ctx->gfx_pipeline_state.stages[PIPE_SHADER_GEOMETRY]->key.gs.stream_output_factor; + + if (factor > 1) + return d3d12_enable_fake_so_buffers(ctx, factor); + else + return d3d12_disable_fake_so_buffers(ctx); +} + +static D3D_PRIMITIVE_TOPOLOGY +topology(enum pipe_prim_type prim_type) +{ + switch (prim_type) { + case PIPE_PRIM_POINTS: + return D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + + case PIPE_PRIM_LINES: + return D3D_PRIMITIVE_TOPOLOGY_LINELIST; + + case PIPE_PRIM_LINE_STRIP: + return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; + + case PIPE_PRIM_TRIANGLES: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + + case PIPE_PRIM_TRIANGLE_STRIP: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + + case PIPE_PRIM_LINES_ADJACENCY: + return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; + + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ; + + case PIPE_PRIM_TRIANGLES_ADJACENCY: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ; + + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ; + +/* + case PIPE_PRIM_PATCHES: + return D3D_PRIMITIVE_TOPOLOGY_PATCHLIST; +*/ + + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; /* HACK: this is just wrong! */ + + default: + debug_printf("pipe_prim_type: %s\n", u_prim_name(prim_type)); + unreachable("unexpected enum pipe_prim_type"); + } +} + +static DXGI_FORMAT +ib_format(unsigned index_size) +{ + switch (index_size) { + case 1: return DXGI_FORMAT_R8_UINT; + case 2: return DXGI_FORMAT_R16_UINT; + case 4: return DXGI_FORMAT_R32_UINT; + + default: + unreachable("unexpected index-buffer size"); + } +} + +static void +twoface_emulation(struct d3d12_context *ctx, + struct d3d12_rasterizer_state *rast, + const struct pipe_draw_info *dinfo) +{ + /* draw backfaces */ + ctx->base.bind_rasterizer_state(&ctx->base, rast->twoface_back); + d3d12_draw_vbo(&ctx->base, dinfo); + + /* restore real state */ + ctx->base.bind_rasterizer_state(&ctx->base, rast); +} + +static void +transition_surface_subresources_state(struct d3d12_context *ctx, + struct pipe_surface *psurf, + struct pipe_resource *pres, + D3D12_RESOURCE_STATES state) +{ + struct d3d12_resource *res = d3d12_resource(pres); + unsigned start_layer, num_layers; + if (!d3d12_subresource_id_uses_layer(res->base.target)) { + start_layer = 0; + num_layers = 1; + } else { + start_layer = psurf->u.tex.first_layer; + num_layers = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1; + } + d3d12_transition_subresources_state(ctx, res, + psurf->u.tex.level, 1, + start_layer, num_layers, + 0, d3d12_get_format_num_planes(psurf->format), + state); +} + +static bool +prim_supported(enum pipe_prim_type prim_type) +{ + switch (prim_type) { + case PIPE_PRIM_POINTS: + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_TRIANGLES: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return true; + + default: + return false; + } +} + +static inline struct d3d12_shader_selector * +d3d12_last_vertex_stage(struct d3d12_context *ctx) +{ + struct d3d12_shader_selector *sel = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; + if (!sel || sel->is_gs_variant) + sel = ctx->gfx_stages[PIPE_SHADER_VERTEX]; + return sel; +} + +void +d3d12_draw_vbo(struct pipe_context *pctx, + const struct pipe_draw_info *dinfo) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_batch *batch; + struct pipe_resource *index_buffer = NULL; + unsigned index_offset = 0; + enum d3d12_surface_conversion_mode conversion_modes[PIPE_MAX_COLOR_BUFS] = {0}; + + if (!prim_supported(dinfo->mode) || + dinfo->index_size == 1 || + (dinfo->primitive_restart && dinfo->restart_index != 0xffff && + dinfo->restart_index != 0xffffffff)) { + + if (!dinfo->primitive_restart && + !u_trim_pipe_prim(dinfo->mode, (unsigned *)&dinfo->count)) + return; + + ctx->initial_api_prim = dinfo->mode; + util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->gfx_pipeline_state.rast->base); + util_primconvert_draw_vbo(ctx->primconvert, dinfo); + return; + } + + for (int i = 0; i < ctx->fb.nr_cbufs; ++i) { + if (ctx->fb.cbufs[i]) { + struct d3d12_surface *surface = d3d12_surface(ctx->fb.cbufs[i]); + conversion_modes[i] = d3d12_surface_update_pre_draw(surface, d3d12_rtv_format(ctx, i)); + if (conversion_modes[i] != D3D12_SURFACE_CONVERSION_NONE) + ctx->cmdlist_dirty |= D3D12_DIRTY_FRAMEBUFFER; + } + } + + struct d3d12_rasterizer_state *rast = ctx->gfx_pipeline_state.rast; + if (rast->twoface_back) { + enum pipe_prim_type saved_mode = ctx->initial_api_prim; + twoface_emulation(ctx, rast, dinfo); + ctx->initial_api_prim = saved_mode; + } + + if (ctx->pstipple.enabled) + ctx->shader_dirty[PIPE_SHADER_FRAGMENT] |= D3D12_SHADER_DIRTY_SAMPLER_VIEWS | + D3D12_SHADER_DIRTY_SAMPLERS; + + /* this should *really* be fixed at a higher level than here! */ + enum pipe_prim_type reduced_prim = u_reduced_prim(dinfo->mode); + if (reduced_prim == PIPE_PRIM_TRIANGLES && + ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT_AND_BACK) + return; + + if (ctx->gfx_pipeline_state.prim_type != dinfo->mode) { + ctx->gfx_pipeline_state.prim_type = dinfo->mode; + ctx->state_dirty |= D3D12_DIRTY_PRIM_MODE; + } + + d3d12_select_shader_variants(ctx, dinfo); + d3d12_validate_queries(ctx); + for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) { + struct d3d12_shader *shader = ctx->gfx_stages[i] ? ctx->gfx_stages[i]->current : NULL; + if (ctx->gfx_pipeline_state.stages[i] != shader) { + ctx->gfx_pipeline_state.stages[i] = shader; + ctx->state_dirty |= D3D12_DIRTY_SHADER; + } + } + + /* Reset to an invalid value after it's been used */ + ctx->initial_api_prim = PIPE_PRIM_MAX; + + /* Copy the stream output info from the current vertex/geometry shader */ + if (ctx->state_dirty & D3D12_DIRTY_SHADER) { + struct d3d12_shader_selector *sel = d3d12_last_vertex_stage(ctx); + if (sel) { + ctx->gfx_pipeline_state.so_info = sel->so_info; + } else { + memset(&ctx->gfx_pipeline_state.so_info, 0, sizeof(sel->so_info)); + } + } + if (!validate_stream_output_targets(ctx)) { + debug_printf("validate_stream_output_targets() failed\n"); + return; + } + + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE ib_strip_cut_value = + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; + if (dinfo->index_size > 0) { + assert(dinfo->index_size != 1); + + if (dinfo->has_user_indices) { + if (!util_upload_index_buffer(pctx, dinfo, &index_buffer, + &index_offset, 4)) { + debug_printf("util_upload_index_buffer() failed\n"); + return; + } + } else { + index_buffer = dinfo->index.resource; + } + + if (dinfo->primitive_restart) { + assert(dinfo->restart_index == 0xffff || + dinfo->restart_index == 0xffffffff); + ib_strip_cut_value = dinfo->restart_index == 0xffff ? + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF : + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF; + } + } + + if (ctx->gfx_pipeline_state.ib_strip_cut_value != ib_strip_cut_value) { + ctx->gfx_pipeline_state.ib_strip_cut_value = ib_strip_cut_value; + ctx->state_dirty |= D3D12_DIRTY_STRIP_CUT_VALUE; + } + + if (!ctx->gfx_pipeline_state.root_signature || ctx->state_dirty & D3D12_DIRTY_SHADER) { + ID3D12RootSignature *root_signature = d3d12_get_root_signature(ctx); + if (ctx->gfx_pipeline_state.root_signature != root_signature) { + ctx->gfx_pipeline_state.root_signature = root_signature; + ctx->state_dirty |= D3D12_DIRTY_ROOT_SIGNATURE; + for (int i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) + ctx->shader_dirty[i] |= D3D12_SHADER_DIRTY_ALL; + } + } + + if (!ctx->current_pso || ctx->state_dirty & D3D12_DIRTY_PSO) { + ctx->current_pso = d3d12_get_gfx_pipeline_state(ctx); + assert(ctx->current_pso); + } + + ctx->cmdlist_dirty |= ctx->state_dirty; + + if (!check_descriptors_left(ctx)) + d3d12_flush_cmdlist(ctx); + batch = d3d12_current_batch(ctx); + + if (ctx->cmdlist_dirty & D3D12_DIRTY_ROOT_SIGNATURE) { + d3d12_batch_reference_object(batch, ctx->gfx_pipeline_state.root_signature); + ctx->cmdlist->SetGraphicsRootSignature(ctx->gfx_pipeline_state.root_signature); + } + + if (ctx->cmdlist_dirty & D3D12_DIRTY_PSO) { + assert(ctx->current_pso); + d3d12_batch_reference_object(batch, ctx->current_pso); + ctx->cmdlist->SetPipelineState(ctx->current_pso); + } + + set_graphics_root_parameters(ctx, dinfo); + + bool need_zero_one_depth_range = d3d12_need_zero_one_depth_range(ctx); + if (need_zero_one_depth_range != ctx->need_zero_one_depth_range) { + ctx->cmdlist_dirty |= D3D12_DIRTY_VIEWPORT; + ctx->need_zero_one_depth_range = need_zero_one_depth_range; + } + + if (ctx->cmdlist_dirty & D3D12_DIRTY_VIEWPORT) { + if (ctx->need_zero_one_depth_range) { + D3D12_VIEWPORT viewports[PIPE_MAX_VIEWPORTS]; + for (int i = 0; i < ctx->num_viewports; ++i) { + viewports[i] = ctx->viewports[i]; + viewports[i].MinDepth = 0.0f; + viewports[i].MaxDepth = 1.0f; + } + ctx->cmdlist->RSSetViewports(ctx->num_viewports, viewports); + } else + ctx->cmdlist->RSSetViewports(ctx->num_viewports, ctx->viewports); + } + + if (ctx->cmdlist_dirty & D3D12_DIRTY_SCISSOR) { + if (ctx->gfx_pipeline_state.rast->base.scissor && ctx->num_viewports > 0) + ctx->cmdlist->RSSetScissorRects(ctx->num_viewports, ctx->scissors); + else + ctx->cmdlist->RSSetScissorRects(1, &MAX_SCISSOR); + } + + if (ctx->cmdlist_dirty & D3D12_DIRTY_BLEND_COLOR) { + unsigned blend_factor_flags = ctx->gfx_pipeline_state.blend->blend_factor_flags; + if (blend_factor_flags & (D3D12_BLEND_FACTOR_COLOR | D3D12_BLEND_FACTOR_ANY)) { + ctx->cmdlist->OMSetBlendFactor(ctx->blend_factor); + } else if (blend_factor_flags & D3D12_BLEND_FACTOR_ALPHA) { + float alpha_const[4] = { ctx->blend_factor[3], ctx->blend_factor[3], + ctx->blend_factor[3], ctx->blend_factor[3] }; + ctx->cmdlist->OMSetBlendFactor(alpha_const); + } + } + + if (ctx->cmdlist_dirty & D3D12_DIRTY_STENCIL_REF) + ctx->cmdlist->OMSetStencilRef(ctx->stencil_ref.ref_value[0]); + + if (ctx->cmdlist_dirty & D3D12_DIRTY_PRIM_MODE) + ctx->cmdlist->IASetPrimitiveTopology(topology(dinfo->mode)); + + for (unsigned i = 0; i < ctx->num_vbs; ++i) { + if (ctx->vbs[i].buffer.resource) { + struct d3d12_resource *res = d3d12_resource(ctx->vbs[i].buffer.resource); + d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); + if (ctx->cmdlist_dirty & D3D12_DIRTY_VERTEX_BUFFERS) + d3d12_batch_reference_resource(batch, res); + } + } + if (ctx->cmdlist_dirty & D3D12_DIRTY_VERTEX_BUFFERS) + ctx->cmdlist->IASetVertexBuffers(0, ctx->num_vbs, ctx->vbvs); + + if (index_buffer) { + D3D12_INDEX_BUFFER_VIEW ibv; + struct d3d12_resource *res = d3d12_resource(index_buffer); + ibv.BufferLocation = d3d12_resource_gpu_virtual_address(res) + index_offset; + ibv.SizeInBytes = res->base.width0 - index_offset; + ibv.Format = ib_format(dinfo->index_size); + d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_INDEX_BUFFER); + if (ctx->cmdlist_dirty & D3D12_DIRTY_INDEX_BUFFER || + memcmp(&ctx->ibv, &ibv, sizeof(D3D12_INDEX_BUFFER_VIEW)) != 0) { + ctx->ibv = ibv; + d3d12_batch_reference_resource(batch, res); + ctx->cmdlist->IASetIndexBuffer(&ibv); + } + + if (dinfo->has_user_indices) + pipe_resource_reference(&index_buffer, NULL); + } + + if (ctx->cmdlist_dirty & D3D12_DIRTY_FRAMEBUFFER) { + D3D12_CPU_DESCRIPTOR_HANDLE render_targets[PIPE_MAX_COLOR_BUFS] = {}; + D3D12_CPU_DESCRIPTOR_HANDLE *depth_desc = NULL, tmp_desc; + for (int i = 0; i < ctx->fb.nr_cbufs; ++i) { + if (ctx->fb.cbufs[i]) { + struct d3d12_surface *surface = d3d12_surface(ctx->fb.cbufs[i]); + render_targets[i] = d3d12_surface_get_handle(surface, conversion_modes[i]); + d3d12_batch_reference_surface_texture(batch, surface); + } else + render_targets[i] = ctx->null_rtv.cpu_handle; + } + if (ctx->fb.zsbuf) { + struct d3d12_surface *surface = d3d12_surface(ctx->fb.zsbuf); + tmp_desc = surface->desc_handle.cpu_handle; + d3d12_batch_reference_surface_texture(batch, surface); + depth_desc = &tmp_desc; + } + ctx->cmdlist->OMSetRenderTargets(ctx->fb.nr_cbufs, render_targets, FALSE, depth_desc); + } + + struct pipe_stream_output_target **so_targets = ctx->fake_so_buffer_factor ? ctx->fake_so_targets + : ctx->so_targets; + D3D12_STREAM_OUTPUT_BUFFER_VIEW *so_buffer_views = ctx->fake_so_buffer_factor ? ctx->fake_so_buffer_views + : ctx->so_buffer_views; + for (int i = 0; i < ctx->gfx_pipeline_state.num_so_targets; ++i) { + struct d3d12_stream_output_target *target = (struct d3d12_stream_output_target *)so_targets[i]; + + if (!target) + continue; + + struct d3d12_resource *so_buffer = d3d12_resource(target->base.buffer); + struct d3d12_resource *fill_buffer = d3d12_resource(target->fill_buffer); + + d3d12_resource_make_writeable(pctx, target->base.buffer); + + if (ctx->cmdlist_dirty & D3D12_DIRTY_STREAM_OUTPUT) { + d3d12_batch_reference_resource(batch, so_buffer); + d3d12_batch_reference_resource(batch, fill_buffer); + } + + d3d12_transition_resource_state(ctx, so_buffer, D3D12_RESOURCE_STATE_STREAM_OUT); + d3d12_transition_resource_state(ctx, fill_buffer, D3D12_RESOURCE_STATE_STREAM_OUT); + } + if (ctx->cmdlist_dirty & D3D12_DIRTY_STREAM_OUTPUT) + ctx->cmdlist->SOSetTargets(0, 4, so_buffer_views); + + for (int i = 0; i < ctx->fb.nr_cbufs; ++i) { + struct pipe_surface *psurf = ctx->fb.cbufs[i]; + if (!psurf) + continue; + + struct pipe_resource *pres = conversion_modes[i] == D3D12_SURFACE_CONVERSION_BGRA_UINT ? + d3d12_surface(psurf)->rgba_texture : psurf->texture; + transition_surface_subresources_state(ctx, psurf, pres, + D3D12_RESOURCE_STATE_RENDER_TARGET); + } + if (ctx->fb.zsbuf) { + struct pipe_surface *psurf = ctx->fb.zsbuf; + transition_surface_subresources_state(ctx, psurf, psurf->texture, + D3D12_RESOURCE_STATE_DEPTH_WRITE); + } + + d3d12_apply_resource_states(ctx); + + if (dinfo->index_size > 0) + ctx->cmdlist->DrawIndexedInstanced(dinfo->count, dinfo->instance_count, + dinfo->start, dinfo->index_bias, + dinfo->start_instance); + else + ctx->cmdlist->DrawInstanced(dinfo->count, dinfo->instance_count, + dinfo->start, dinfo->start_instance); + + ctx->state_dirty = 0; + + if (index_buffer) + ctx->cmdlist_dirty = 0; + else + ctx->cmdlist_dirty &= D3D12_DIRTY_INDEX_BUFFER; + + for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) + ctx->shader_dirty[i] = 0; + + for (int i = 0; i < ctx->fb.nr_cbufs; ++i) { + if (ctx->fb.cbufs[i]) { + struct d3d12_surface *surface = d3d12_surface(ctx->fb.cbufs[i]); + d3d12_surface_update_post_draw(surface, conversion_modes[i]); + } + } +} diff --git a/src/gallium/drivers/d3d12/d3d12_fence.cpp b/src/gallium/drivers/d3d12/d3d12_fence.cpp new file mode 100644 index 00000000000..7b6f6a64a54 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_fence.cpp @@ -0,0 +1,93 @@ + +#include "d3d12_fence.h" + +#include "d3d12_context.h" +#include "d3d12_screen.h" + +#include "util/u_memory.h" + +static void +destroy_fence(struct d3d12_fence *fence) +{ + if (fence->event) + CloseHandle(fence->event); + FREE(fence); +} + +struct d3d12_fence * +d3d12_create_fence(struct d3d12_screen *screen, struct d3d12_context *ctx) +{ + struct d3d12_fence *ret = CALLOC_STRUCT(d3d12_fence); + if (!ret) { + debug_printf("CALLOC_STRUCT failed\n"); + return NULL; + } + + ret->cmdqueue_fence = ctx->cmdqueue_fence; + ret->value = ++ctx->fence_value; + ret->event = CreateEvent(NULL, FALSE, FALSE, NULL); + if (FAILED(ctx->cmdqueue_fence->SetEventOnCompletion(ret->value, ret->event))) + goto fail; + if (FAILED(screen->cmdqueue->Signal(ctx->cmdqueue_fence, ret->value))) + goto fail; + + pipe_reference_init(&ret->reference, 1); + return ret; + +fail: + destroy_fence(ret); + return NULL; +} + +void +d3d12_fence_reference(struct d3d12_fence **ptr, struct d3d12_fence *fence) +{ + if (pipe_reference(&(*ptr)->reference, &fence->reference)) + destroy_fence((struct d3d12_fence *)*ptr); + + *ptr = fence; +} + +static void +fence_reference(struct pipe_screen *pscreen, + struct pipe_fence_handle **pptr, + struct pipe_fence_handle *pfence) +{ + d3d12_fence_reference((struct d3d12_fence **)pptr, d3d12_fence(pfence)); +} + +bool +d3d12_fence_finish(struct d3d12_fence *fence, uint64_t timeout_ns) +{ + if (fence->signaled) + return true; + + bool complete = fence->cmdqueue_fence->GetCompletedValue() >= fence->value; + if (!complete && timeout_ns) { + DWORD timeout_ms = (timeout_ns == PIPE_TIMEOUT_INFINITE) ? INFINITE : timeout_ns * 1000; + complete = WaitForSingleObject(fence->event, timeout_ms) == WAIT_OBJECT_0; + } + + fence->signaled = complete; + return complete; +} + +static bool +fence_finish(struct pipe_screen *pscreen, struct pipe_context *pctx, + struct pipe_fence_handle *pfence, uint64_t timeout_ns) +{ + bool ret = d3d12_fence_finish(d3d12_fence(pfence), timeout_ns); + if (ret && pctx) { + struct d3d12_context *ctx = d3d12_context(pctx); + d3d12_foreach_submitted_batch(ctx, batch) + d3d12_reset_batch(ctx, batch, 0); + } + return ret; +} + +void +d3d12_screen_fence_init(struct pipe_screen *pscreen) +{ + pscreen->fence_reference = fence_reference; + pscreen->fence_finish = fence_finish; +} diff --git a/src/gallium/drivers/d3d12/d3d12_fence.h b/src/gallium/drivers/d3d12/d3d12_fence.h new file mode 100644 index 00000000000..d14204eba0f --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_fence.h @@ -0,0 +1,60 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_FENCE_H +#define D3D12_FENCE_H + +#include "util/u_inlines.h" + +#include + +struct pipe_screen; +struct d3d12_screen; + +struct d3d12_fence { + struct pipe_reference reference; + ID3D12Fence *cmdqueue_fence; + HANDLE event; + uint64_t value; + bool signaled; +}; + +static inline struct d3d12_fence * +d3d12_fence(struct pipe_fence_handle *pfence) +{ + return (struct d3d12_fence *)pfence; +} + +struct d3d12_fence * +d3d12_create_fence(struct d3d12_screen *screen, struct d3d12_context *ctx); + +void +d3d12_fence_reference(struct d3d12_fence **ptr, struct d3d12_fence *fence); + +bool +d3d12_fence_finish(struct d3d12_fence *fence, uint64_t timeout_ns); + +void +d3d12_screen_fence_init(struct pipe_screen *pscreen); + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_format.c b/src/gallium/drivers/d3d12/d3d12_format.c new file mode 100644 index 00000000000..302b182f56b --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_format.c @@ -0,0 +1,298 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_format.h" + +#include "pipe/p_format.h" +#include "util/format/u_format.h" +#include "util/u_math.h" + +static const DXGI_FORMAT formats[PIPE_FORMAT_COUNT] = { +#define MAP_FORMAT_NORM(FMT) \ + [PIPE_FORMAT_ ## FMT ## _UNORM] = DXGI_FORMAT_ ## FMT ## _UNORM, \ + [PIPE_FORMAT_ ## FMT ## _SNORM] = DXGI_FORMAT_ ## FMT ## _SNORM, + +#define MAP_FORMAT_INT(FMT) \ + [PIPE_FORMAT_ ## FMT ## _UINT] = DXGI_FORMAT_ ## FMT ## _UINT, \ + [PIPE_FORMAT_ ## FMT ## _SINT] = DXGI_FORMAT_ ## FMT ## _SINT, + +#define MAP_FORMAT_SRGB(FMT) \ + [PIPE_FORMAT_ ## FMT ## _SRGB] = DXGI_FORMAT_ ## FMT ## _UNORM_SRGB, + +#define MAP_FORMAT_FLOAT(FMT) \ + [PIPE_FORMAT_ ## FMT ## _FLOAT] = DXGI_FORMAT_ ## FMT ## _FLOAT, + +#define MAP_EMU_FORMAT_NO_ALPHA(BITS, TYPE) \ + [PIPE_FORMAT_L ## BITS ## _ ## TYPE] = DXGI_FORMAT_R ## BITS ## _ ## TYPE, \ + [PIPE_FORMAT_I ## BITS ## _ ## TYPE] = DXGI_FORMAT_R ## BITS ## _ ## TYPE, \ + [PIPE_FORMAT_L ## BITS ## A ## BITS ## _ ## TYPE] = \ + DXGI_FORMAT_R ## BITS ## G ## BITS ## _ ## TYPE, + +#define MAP_EMU_FORMAT(BITS, TYPE) \ + [PIPE_FORMAT_A ## BITS ## _ ## TYPE] = DXGI_FORMAT_R ## BITS ## _ ## TYPE, \ + MAP_EMU_FORMAT_NO_ALPHA(BITS, TYPE) + + MAP_FORMAT_NORM(R8) + MAP_FORMAT_INT(R8) + + MAP_FORMAT_NORM(R8G8) + MAP_FORMAT_INT(R8G8) + + MAP_FORMAT_NORM(R8G8B8A8) + MAP_FORMAT_INT(R8G8B8A8) + MAP_FORMAT_SRGB(R8G8B8A8) + + [PIPE_FORMAT_B8G8R8X8_UNORM] = DXGI_FORMAT_B8G8R8X8_UNORM, + [PIPE_FORMAT_B8G8R8A8_UNORM] = DXGI_FORMAT_B8G8R8A8_UNORM, + + MAP_FORMAT_SRGB(B8G8R8A8) + + MAP_FORMAT_INT(R32) + MAP_FORMAT_FLOAT(R32) + MAP_FORMAT_INT(R32G32) + MAP_FORMAT_FLOAT(R32G32) + MAP_FORMAT_INT(R32G32B32) + MAP_FORMAT_FLOAT(R32G32B32) + MAP_FORMAT_INT(R32G32B32A32) + MAP_FORMAT_FLOAT(R32G32B32A32) + + MAP_FORMAT_NORM(R16) + MAP_FORMAT_INT(R16) + MAP_FORMAT_FLOAT(R16) + + MAP_FORMAT_NORM(R16G16) + MAP_FORMAT_INT(R16G16) + MAP_FORMAT_FLOAT(R16G16) + + MAP_FORMAT_NORM(R16G16B16A16) + MAP_FORMAT_INT(R16G16B16A16) + MAP_FORMAT_FLOAT(R16G16B16A16) + + [PIPE_FORMAT_A8_UNORM] = DXGI_FORMAT_A8_UNORM, + MAP_EMU_FORMAT_NO_ALPHA(8, UNORM) + MAP_EMU_FORMAT(8, SNORM) + MAP_EMU_FORMAT(8, SNORM) + MAP_EMU_FORMAT(8, SINT) + MAP_EMU_FORMAT(8, UINT) + MAP_EMU_FORMAT(16, UNORM) + MAP_EMU_FORMAT(16, SNORM) + MAP_EMU_FORMAT(16, SINT) + MAP_EMU_FORMAT(16, UINT) + MAP_EMU_FORMAT(16, FLOAT) + MAP_EMU_FORMAT(32, SINT) + MAP_EMU_FORMAT(32, UINT) + MAP_EMU_FORMAT(32, FLOAT) + + [PIPE_FORMAT_R9G9B9E5_FLOAT] = DXGI_FORMAT_R9G9B9E5_SHAREDEXP, + [PIPE_FORMAT_R11G11B10_FLOAT] = DXGI_FORMAT_R11G11B10_FLOAT, + [PIPE_FORMAT_R10G10B10A2_UINT] = DXGI_FORMAT_R10G10B10A2_UINT, + [PIPE_FORMAT_R10G10B10A2_UNORM] = DXGI_FORMAT_R10G10B10A2_UNORM, + + [PIPE_FORMAT_DXT1_RGB] = DXGI_FORMAT_BC1_UNORM, + [PIPE_FORMAT_DXT1_RGBA] = DXGI_FORMAT_BC1_UNORM, + [PIPE_FORMAT_DXT3_RGBA] = DXGI_FORMAT_BC2_UNORM, + [PIPE_FORMAT_DXT5_RGBA] = DXGI_FORMAT_BC3_UNORM, + + [PIPE_FORMAT_DXT1_SRGB] = DXGI_FORMAT_BC1_UNORM_SRGB, + [PIPE_FORMAT_DXT1_SRGBA] = DXGI_FORMAT_BC1_UNORM_SRGB, + [PIPE_FORMAT_DXT3_SRGBA] = DXGI_FORMAT_BC2_UNORM_SRGB, + [PIPE_FORMAT_DXT5_SRGBA] = DXGI_FORMAT_BC3_UNORM_SRGB, + + [PIPE_FORMAT_RGTC1_UNORM] = DXGI_FORMAT_BC4_UNORM, + [PIPE_FORMAT_RGTC1_SNORM] = DXGI_FORMAT_BC4_SNORM, + [PIPE_FORMAT_RGTC2_UNORM] = DXGI_FORMAT_BC5_UNORM, + [PIPE_FORMAT_RGTC2_SNORM] = DXGI_FORMAT_BC5_SNORM, + + [PIPE_FORMAT_Z32_FLOAT] = DXGI_FORMAT_R32_TYPELESS, + [PIPE_FORMAT_Z16_UNORM] = DXGI_FORMAT_R16_TYPELESS, + [PIPE_FORMAT_Z24X8_UNORM] = DXGI_FORMAT_R24G8_TYPELESS, + [PIPE_FORMAT_X24S8_UINT] = DXGI_FORMAT_R24G8_TYPELESS, + + [PIPE_FORMAT_Z24_UNORM_S8_UINT] = DXGI_FORMAT_R24G8_TYPELESS, + [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = DXGI_FORMAT_R32G8X24_TYPELESS, + [PIPE_FORMAT_X32_S8X24_UINT] = DXGI_FORMAT_R32G8X24_TYPELESS, +}; + +DXGI_FORMAT +d3d12_get_format(enum pipe_format format) +{ + return formats[format]; +} + +DXGI_FORMAT +d3d12_get_resource_rt_format(enum pipe_format f) +{ + switch (f) { + case PIPE_FORMAT_Z16_UNORM: + return DXGI_FORMAT_D16_UNORM; + case PIPE_FORMAT_Z32_FLOAT: + return DXGI_FORMAT_D32_FLOAT; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X24S8_UINT: + return DXGI_FORMAT_D24_UNORM_S8_UINT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_X32_S8X24_UINT: + return DXGI_FORMAT_D32_FLOAT_S8X24_UINT; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return DXGI_FORMAT_D24_UNORM_S8_UINT; + default: + return d3d12_get_format(f); + } +} + +DXGI_FORMAT +d3d12_get_resource_srv_format(enum pipe_format f, enum pipe_texture_target target) +{ + switch (f) { + case PIPE_FORMAT_Z16_UNORM: + return DXGI_FORMAT_R16_UNORM; + case PIPE_FORMAT_Z32_FLOAT: + return DXGI_FORMAT_R32_FLOAT; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return DXGI_FORMAT_R24_UNORM_X8_TYPELESS; + case PIPE_FORMAT_X24S8_UINT: + return DXGI_FORMAT_X24_TYPELESS_G8_UINT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; + case PIPE_FORMAT_X32_S8X24_UINT: + return DXGI_FORMAT_X32_TYPELESS_G8X24_UINT; + case PIPE_FORMAT_A8_UNORM: + if (target == PIPE_BUFFER) + return DXGI_FORMAT_R8_UNORM; /* A8_UNORM is not supported for buffer SRV */ + /* passthrough */ + default: + return d3d12_get_format(f); + } +} + +#define DEF_SWIZZLE(name, X, Y, Z, W) \ + static const enum pipe_swizzle name ## _SWIZZLE[PIPE_SWIZZLE_MAX] = \ + { PIPE_SWIZZLE_ ## X, PIPE_SWIZZLE_ ## Y, PIPE_SWIZZLE_ ## Z, PIPE_SWIZZLE_ ## W, \ + PIPE_SWIZZLE_0, PIPE_SWIZZLE_1, PIPE_SWIZZLE_NONE } + +struct d3d12_format_info +d3d12_get_format_info(enum pipe_format pformat, enum pipe_texture_target target) +{ + DEF_SWIZZLE(IDENTITY, X, Y, Z, W); + DEF_SWIZZLE(RGB1, X, Y, Z, 1); + DEF_SWIZZLE(ALPHA, 0, 0, 0, W); + DEF_SWIZZLE(BUFFER, 0, 0, 0, X); + DEF_SWIZZLE(INTENSITY, X, X, X, X); + DEF_SWIZZLE(LUMINANCE, X, X, X, 1); + DEF_SWIZZLE(LUMINANCE_ALPHA, X, X, X, Y); + DEF_SWIZZLE(DEPTH, X, X, X, X); + DEF_SWIZZLE(STENCIL, Y, Y, Y, Y); + + const enum pipe_swizzle *swizzle = IDENTITY_SWIZZLE; + unsigned plane_slice = 0; + + if (pformat == PIPE_FORMAT_DXT1_RGB || + pformat == PIPE_FORMAT_DXT1_SRGB) + swizzle = RGB1_SWIZZLE; + + const struct util_format_description + *format_desc = util_format_description(pformat); + if (!util_format_is_srgb(pformat)) { + if (target == PIPE_BUFFER && util_format_is_alpha(pformat)) { + swizzle = BUFFER_SWIZZLE; + } else if (pformat == PIPE_FORMAT_A8_UNORM) { + /* no need to swizzle, it's natively supported */ + } else if (util_format_is_intensity(pformat)) { + swizzle = INTENSITY_SWIZZLE; + } else if (util_format_is_luminance(pformat)) { + swizzle = LUMINANCE_SWIZZLE; + } else if (util_format_is_luminance_alpha(pformat)) { + swizzle = LUMINANCE_ALPHA_SWIZZLE; + } else if (util_format_is_alpha(pformat)) { + swizzle = ALPHA_SWIZZLE; + } else if (util_format_has_depth(format_desc)) { + swizzle = DEPTH_SWIZZLE; + } else if (util_format_has_stencil(format_desc)) { + /* When reading from a stencil texture we have to use plane 1, and + * the formats X24S8 and X32_S8X24 have the actual data in the y-channel + * but the shader will read the x component so we need to adjust the swizzle. */ + plane_slice = 1; + swizzle = STENCIL_SWIZZLE; + } + } + + return (struct d3d12_format_info) { .swizzle = swizzle, .plane_slice = plane_slice }; +} + +enum pipe_format +d3d12_emulated_vtx_format(enum pipe_format fmt) +{ + switch (fmt) { + case PIPE_FORMAT_R10G10B10A2_SNORM: + case PIPE_FORMAT_R10G10B10A2_SSCALED: + case PIPE_FORMAT_R10G10B10A2_USCALED: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_B10G10R10A2_SNORM: + case PIPE_FORMAT_B10G10R10A2_SSCALED: + case PIPE_FORMAT_B10G10R10A2_USCALED: + return PIPE_FORMAT_R32_UINT; + + case PIPE_FORMAT_R8G8B8_SINT: + return PIPE_FORMAT_R8G8B8A8_SINT; + case PIPE_FORMAT_R8G8B8_UINT: + return PIPE_FORMAT_R8G8B8A8_UINT; + + case PIPE_FORMAT_R16G16B16_SINT: + return PIPE_FORMAT_R16G16B16A16_SINT; + case PIPE_FORMAT_R16G16B16_UINT: + return PIPE_FORMAT_R16G16B16A16_UINT; + + default: + return fmt; + } +} + + +unsigned +d3d12_non_opaque_plane_count(DXGI_FORMAT format) +{ + switch (format) { + case DXGI_FORMAT_V208: + case DXGI_FORMAT_V408: + return 3; + + case DXGI_FORMAT_NV12: + case DXGI_FORMAT_P010: + case DXGI_FORMAT_P016: + case DXGI_FORMAT_YUY2: + case DXGI_FORMAT_Y210: + case DXGI_FORMAT_Y216: + case DXGI_FORMAT_NV11: + return 2; + } + + return 1; +} + +unsigned +d3d12_get_format_num_planes(enum pipe_format fmt) +{ + return util_format_is_depth_or_stencil(fmt) ? + util_bitcount(util_format_get_mask(fmt)) : 1; +} diff --git a/src/gallium/drivers/d3d12/d3d12_format.h b/src/gallium/drivers/d3d12/d3d12_format.h new file mode 100644 index 00000000000..b578f4ce383 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_format.h @@ -0,0 +1,66 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_FORMATS_H +#define D3D12_FORMATS_H + +#include + +#include "pipe/p_format.h" +#include "pipe/p_defines.h" + +#ifdef __cplusplus +extern "C" { +#endif + +DXGI_FORMAT +d3d12_get_format(enum pipe_format format); + +DXGI_FORMAT +d3d12_get_resource_srv_format(enum pipe_format f, enum pipe_texture_target target); + +DXGI_FORMAT +d3d12_get_resource_rt_format(enum pipe_format f); + +unsigned +d3d12_non_opaque_plane_count(DXGI_FORMAT f); + +struct d3d12_format_info { + const enum pipe_swizzle *swizzle; + int plane_slice; +}; + +struct d3d12_format_info +d3d12_get_format_info(enum pipe_format format, enum pipe_texture_target); + +enum pipe_format +d3d12_emulated_vtx_format(enum pipe_format fmt); + +unsigned +d3d12_get_format_num_planes(enum pipe_format fmt); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_gs_variant.cpp b/src/gallium/drivers/d3d12/d3d12_gs_variant.cpp new file mode 100644 index 00000000000..8064085da57 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_gs_variant.cpp @@ -0,0 +1,516 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_compiler.h" +#include "d3d12_context.h" +#include "d3d12_debug.h" +#include "d3d12_screen.h" +#include "nir_to_dxil.h" + +#include "nir.h" +#include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_builtin_builder.h" + +#include "util/u_memory.h" +#include "util/u_simple_shaders.h" + +static void +nir_emit_vertex(nir_builder *b, unsigned stream_id) +{ + nir_intrinsic_instr *instr; + + instr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_emit_vertex); + nir_intrinsic_set_stream_id(instr, stream_id); + nir_builder_instr_insert(b, &instr->instr); +} + +static void +nir_end_primitve(nir_builder *b, unsigned stream_id) +{ + nir_intrinsic_instr *instr; + + instr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_primitive); + nir_intrinsic_set_stream_id(instr, 0); + nir_builder_instr_insert(b, &instr->instr); +} + +static nir_ssa_def * +nir_cull_face(nir_builder *b, nir_variable *vertices, bool ccw) +{ + nir_ssa_def *v0 = + nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 0))); + nir_ssa_def *v1 = + nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 1))); + nir_ssa_def *v2 = + nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, vertices), nir_imm_int(b, 2))); + + nir_ssa_def *dir = nir_fdot(b, nir_cross4(b, nir_fsub(b, v1, v0), + nir_fsub(b, v2, v0)), + nir_imm_vec4(b, 0.0, 0.0, -1.0, 0.0)); + if (ccw) + return nir_fge(b, nir_imm_int(b, 0), dir); + else + return nir_flt(b, nir_imm_int(b, 0), dir); +} + +static d3d12_shader_selector* +d3d12_make_passthrough_gs(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key) +{ + struct d3d12_shader_selector *gs; + uint64_t varyings = key->varyings.mask; + nir_builder b; + nir_shader *nir; + nir_intrinsic_instr *instr; + struct pipe_shader_state templ; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_GEOMETRY, + dxil_get_nir_compiler_options()); + + nir = b.shader; + nir->info.inputs_read = varyings; + nir->info.outputs_written = varyings; + nir->info.gs.input_primitive = GL_POINTS; + nir->info.gs.output_primitive = GL_POINTS; + nir->info.gs.vertices_in = 1; + nir->info.gs.vertices_out = 1; + nir->info.gs.invocations = 1; + nir->info.gs.active_stream_mask = 1; + nir->info.name = ralloc_strdup(nir, "passthrough"); + + /* Copy inputs to outputs. */ + while (varyings) { + nir_variable *in, *out; + char tmp[100]; + const int i = u_bit_scan64(&varyings); + + snprintf(tmp, ARRAY_SIZE(tmp), "in_%d", key->varyings.vars[i].driver_location); + in = nir_variable_create(nir, + nir_var_shader_in, + glsl_array_type(key->varyings.vars[i].type, 1, false), + tmp); + in->data.location = i; + in->data.driver_location = key->varyings.vars[i].driver_location; + in->data.interpolation = key->varyings.vars[i].interpolation; + + snprintf(tmp, ARRAY_SIZE(tmp), "out_%d", key->varyings.vars[i].driver_location); + out = nir_variable_create(nir, + nir_var_shader_out, + key->varyings.vars[i].type, + tmp); + out->data.location = i; + out->data.driver_location = key->varyings.vars[i].driver_location; + out->data.interpolation = key->varyings.vars[i].interpolation; + + nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), + nir_imm_int(&b, 0)); + nir_copy_deref(&b, nir_build_deref_var(&b, out), in_value); + } + + nir_emit_vertex(&b, 0); + nir_end_primitve(&b, 0); + + NIR_PASS_V(nir, nir_lower_var_copies); + nir_validate_shader(nir, "in d3d12_create_passthrough_gs"); + + templ.type = PIPE_SHADER_IR_NIR; + templ.ir.nir = nir; + templ.stream_output.num_outputs = 0; + + gs = d3d12_create_shader(ctx, PIPE_SHADER_GEOMETRY, &templ); + + return gs; +} + +struct emit_primitives_context +{ + struct d3d12_context *ctx; + nir_builder b; + + unsigned num_vars; + nir_variable *in[MAX_VARYING]; + nir_variable *out[MAX_VARYING]; + nir_variable *front_facing_var; + + nir_loop *loop; + nir_deref_instr *loop_index_deref; + nir_ssa_def *loop_index; + nir_ssa_def *edgeflag_cmp; + nir_ssa_def *front_facing; +}; + +static bool +d3d12_begin_emit_primitives_gs(struct emit_primitives_context *emit_ctx, + struct d3d12_context *ctx, + struct d3d12_gs_variant_key *key, + uint16_t output_primitive, + unsigned vertices_out) +{ + nir_builder *b = &emit_ctx->b; + nir_intrinsic_instr *instr; + nir_variable *edgeflag_var = NULL; + nir_variable *pos_var = NULL; + uint64_t varyings = key->varyings.mask; + + emit_ctx->ctx = ctx; + + nir_builder_init_simple_shader(b, NULL, MESA_SHADER_GEOMETRY, + dxil_get_nir_compiler_options()); + + nir_shader *nir = b->shader; + nir->info.inputs_read = varyings; + nir->info.outputs_written = varyings; + nir->info.gs.input_primitive = GL_TRIANGLES; + nir->info.gs.output_primitive = output_primitive; + nir->info.gs.vertices_in = 3; + nir->info.gs.vertices_out = vertices_out; + nir->info.gs.invocations = 1; + nir->info.gs.active_stream_mask = 1; + nir->info.name = ralloc_strdup(nir, "edgeflags"); + + while (varyings) { + char tmp[100]; + const int i = u_bit_scan64(&varyings); + + snprintf(tmp, ARRAY_SIZE(tmp), "in_%d", emit_ctx->num_vars); + emit_ctx->in[emit_ctx->num_vars] = nir_variable_create(nir, + nir_var_shader_in, + glsl_array_type(key->varyings.vars[i].type, 3, 0), + tmp); + emit_ctx->in[emit_ctx->num_vars]->data.location = i; + emit_ctx->in[emit_ctx->num_vars]->data.driver_location = key->varyings.vars[i].driver_location; + emit_ctx->in[emit_ctx->num_vars]->data.interpolation = key->varyings.vars[i].interpolation; + + /* Don't create an output for the edge flag variable */ + if (i == VARYING_SLOT_EDGE) { + edgeflag_var = emit_ctx->in[emit_ctx->num_vars]; + continue; + } else if (i == VARYING_SLOT_POS) { + pos_var = emit_ctx->in[emit_ctx->num_vars]; + } + + snprintf(tmp, ARRAY_SIZE(tmp), "out_%d", emit_ctx->num_vars); + emit_ctx->out[emit_ctx->num_vars] = nir_variable_create(nir, + nir_var_shader_out, + key->varyings.vars[i].type, + tmp); + emit_ctx->out[emit_ctx->num_vars]->data.location = i; + emit_ctx->out[emit_ctx->num_vars]->data.driver_location = key->varyings.vars[i].driver_location; + emit_ctx->out[emit_ctx->num_vars]->data.interpolation = key->varyings.vars[i].interpolation; + + emit_ctx->num_vars++; + } + + if (key->has_front_face) { + emit_ctx->front_facing_var = nir_variable_create(nir, + nir_var_shader_out, + glsl_uint_type(), + "gl_FrontFacing"); + emit_ctx->front_facing_var->data.location = VARYING_SLOT_VAR12; + emit_ctx->front_facing_var->data.driver_location = emit_ctx->num_vars; + emit_ctx->front_facing_var->data.interpolation = INTERP_MODE_FLAT; + } + + /* Temporary variable "loop_index" to loop over input vertices */ + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + nir_variable *loop_index_var = + nir_local_variable_create(impl, glsl_uint_type(), "loop_index"); + emit_ctx->loop_index_deref = nir_build_deref_var(b, loop_index_var); + nir_store_deref(b, emit_ctx->loop_index_deref, nir_imm_int(b, 0), 1); + + nir_ssa_def *diagonal_vertex = NULL; + if (key->edge_flag_fix) { + nir_ssa_def *prim_id = nir_load_primitive_id(b); + nir_ssa_def *odd = nir_build_alu(b, nir_op_imod, + prim_id, + nir_imm_int(b, 2), + NULL, NULL); + diagonal_vertex = nir_bcsel(b, nir_i2b(b, odd), + nir_imm_int(b, 2), + nir_imm_int(b, 1)); + } + + if (key->cull_mode != PIPE_FACE_NONE || key->has_front_face) { + if (key->cull_mode == PIPE_FACE_BACK) + emit_ctx->edgeflag_cmp = nir_cull_face(b, pos_var, key->front_ccw); + else if (key->cull_mode == PIPE_FACE_FRONT) + emit_ctx->edgeflag_cmp = nir_cull_face(b, pos_var, !key->front_ccw); + + if (key->has_front_face) { + if (key->cull_mode == PIPE_FACE_BACK) + emit_ctx->front_facing = emit_ctx->edgeflag_cmp; + else + emit_ctx->front_facing = nir_cull_face(b, pos_var, key->front_ccw); + emit_ctx->front_facing = nir_i2i32(b, emit_ctx->front_facing); + } + } + + /** + * while { + * if (loop_index >= 3) + * break; + */ + emit_ctx->loop = nir_push_loop(b); + + emit_ctx->loop_index = nir_load_deref(b, emit_ctx->loop_index_deref); + nir_ssa_def *cmp = nir_ige(b, emit_ctx->loop_index, + nir_imm_int(b, 3)); + nir_if *loop_check = nir_push_if(b, cmp); + nir_jump(b, nir_jump_break); + nir_pop_if(b, loop_check); + + if (edgeflag_var) { + nir_ssa_def *edge_flag = + nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, edgeflag_var), emit_ctx->loop_index)); + nir_ssa_def *is_edge = nir_feq(b, nir_channel(b, edge_flag, 0), nir_imm_float(b, 1.0)); + if (emit_ctx->edgeflag_cmp) + emit_ctx->edgeflag_cmp = nir_iand(b, emit_ctx->edgeflag_cmp, is_edge); + else + emit_ctx->edgeflag_cmp = is_edge; + } + + if (key->edge_flag_fix) { + nir_ssa_def *is_edge = nir_ine(b, emit_ctx->loop_index, diagonal_vertex); + if (emit_ctx->edgeflag_cmp) + emit_ctx->edgeflag_cmp = nir_iand(b, emit_ctx->edgeflag_cmp, is_edge); + else + emit_ctx->edgeflag_cmp = is_edge; + } + + return true; +} + +static struct d3d12_shader_selector * +d3d12_finish_emit_primitives_gs(struct emit_primitives_context *emit_ctx, bool end_primitive) +{ + struct d3d12_shader_selector *gs; + struct pipe_shader_state templ; + nir_builder *b = &emit_ctx->b; + nir_shader *nir = b->shader; + + /** + * loop_index++; + * } + */ + nir_store_deref(b, emit_ctx->loop_index_deref, nir_iadd_imm(b, emit_ctx->loop_index, 1), 1); + nir_pop_loop(b, emit_ctx->loop); + + if (end_primitive) + nir_end_primitve(b, 0); + + nir_validate_shader(nir, "in d3d12_lower_edge_flags"); + + NIR_PASS_V(nir, nir_lower_var_copies); + + templ.type = PIPE_SHADER_IR_NIR; + templ.ir.nir = nir; + templ.stream_output.num_outputs = 0; + + return d3d12_create_shader(emit_ctx->ctx, PIPE_SHADER_GEOMETRY, &templ); +} + +static d3d12_shader_selector* +d3d12_emit_points(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key) +{ + struct emit_primitives_context emit_ctx = {0}; + nir_builder *b = &emit_ctx.b; + nir_intrinsic_instr *instr; + + d3d12_begin_emit_primitives_gs(&emit_ctx, ctx, key, GL_POINTS, 3); + + /** + * if (edge_flag) + * out_position = in_position; + * else + * out_position = vec4(-2.0, -2.0, 0.0, 1.0); // Invalid position + * + * [...] // Copy other variables + * + * EmitVertex(); + */ + for (unsigned i = 0; i < emit_ctx.num_vars; ++i) { + nir_ssa_def *index = (key->flat_varyings & (1 << emit_ctx.in[i]->data.location)) ? + nir_imm_int(b, (key->flatshade_first ? 0 : 2)) : emit_ctx.loop_index; + nir_deref_instr *in_value = nir_build_deref_array(b, nir_build_deref_var(b, emit_ctx.in[i]), index); + if (emit_ctx.in[i]->data.location == VARYING_SLOT_POS && emit_ctx.edgeflag_cmp) { + nir_if *edge_check = nir_push_if(b, emit_ctx.edgeflag_cmp); + nir_copy_deref(b, nir_build_deref_var(b, emit_ctx.out[i]), in_value); + nir_if *edge_else = nir_push_else(b, edge_check); + nir_store_deref(b, nir_build_deref_var(b, emit_ctx.out[i]), + nir_imm_vec4(b, -2.0, -2.0, 0.0, 1.0), 0xf); + nir_pop_if(b, edge_else); + } else { + nir_copy_deref(b, nir_build_deref_var(b, emit_ctx.out[i]), in_value); + } + } + if (key->has_front_face) + nir_store_var(b, emit_ctx.front_facing_var, emit_ctx.front_facing, 0x1); + nir_emit_vertex(b, 0); + + return d3d12_finish_emit_primitives_gs(&emit_ctx, false); +} + +static d3d12_shader_selector* +d3d12_emit_lines(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key) +{ + struct emit_primitives_context emit_ctx = {0}; + nir_builder *b = &emit_ctx.b; + + d3d12_begin_emit_primitives_gs(&emit_ctx, ctx, key, GL_LINE_STRIP, 6); + + nir_ssa_def *next_index = nir_imod(b, nir_iadd_imm(b, emit_ctx.loop_index, 1), nir_imm_int(b, 3)); + + /* First vertex */ + for (unsigned i = 0; i < emit_ctx.num_vars; ++i) { + nir_ssa_def *index = (key->flat_varyings & (1 << emit_ctx.in[i]->data.location)) ? + nir_imm_int(b, (key->flatshade_first ? 0 : 2)) : emit_ctx.loop_index; + nir_deref_instr *in_value = nir_build_deref_array(b, nir_build_deref_var(b, emit_ctx.in[i]), index); + nir_copy_deref(b, nir_build_deref_var(b, emit_ctx.out[i]), in_value); + } + if (key->has_front_face) + nir_store_var(b, emit_ctx.front_facing_var, emit_ctx.front_facing, 0x1); + nir_emit_vertex(b, 0); + + /* Second vertex. If not an edge, use same position as first vertex */ + for (unsigned i = 0; i < emit_ctx.num_vars; ++i) { + nir_ssa_def *index = next_index; + if (emit_ctx.in[i]->data.location == VARYING_SLOT_POS) + index = nir_bcsel(b, emit_ctx.edgeflag_cmp, next_index, emit_ctx.loop_index); + else if (key->flat_varyings & (1 << emit_ctx.in[i]->data.location)) + index = nir_imm_int(b, 2); + nir_copy_deref(b, nir_build_deref_var(b, emit_ctx.out[i]), + nir_build_deref_array(b, nir_build_deref_var(b, emit_ctx.in[i]), index)); + } + if (key->has_front_face) + nir_store_var(b, emit_ctx.front_facing_var, emit_ctx.front_facing, 0x1); + nir_emit_vertex(b, 0); + + nir_end_primitve(b, 0); + + return d3d12_finish_emit_primitives_gs(&emit_ctx, false); +} + +static d3d12_shader_selector* +d3d12_emit_triangles(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key) +{ + struct emit_primitives_context emit_ctx = {0}; + nir_builder *b = &emit_ctx.b; + nir_intrinsic_instr *instr; + + d3d12_begin_emit_primitives_gs(&emit_ctx, ctx, key, GL_TRIANGLE_STRIP, 3); + + /** + * [...] // Copy variables + * + * EmitVertex(); + */ + + nir_ssa_def *incr = NULL; + + if (key->provoking_vertex > 0) + incr = nir_imm_int(b, key->provoking_vertex); + else + incr = nir_imm_int(b, 3); + + if (key->alternate_tri) { + nir_ssa_def *odd = nir_imod(b, nir_load_primitive_id(b), nir_imm_int(b, 2)); + incr = nir_isub(b, incr, odd); + } + + assert(incr != NULL); + nir_ssa_def *index = nir_imod(b, nir_iadd(b, emit_ctx.loop_index, incr), nir_imm_int(b, 3)); + for (unsigned i = 0; i < emit_ctx.num_vars; ++i) { + nir_deref_instr *in_value = nir_build_deref_array(b, nir_build_deref_var(b, emit_ctx.in[i]), index); + nir_copy_deref(b, nir_build_deref_var(b, emit_ctx.out[i]), in_value); + } + nir_emit_vertex(b, 0); + + return d3d12_finish_emit_primitives_gs(&emit_ctx, true); +} + +static uint32_t +hash_gs_variant_key(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct d3d12_gs_variant_key)); +} + +static bool +equals_gs_variant_key(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct d3d12_gs_variant_key)) == 0; +} + +void +d3d12_gs_variant_cache_init(struct d3d12_context *ctx) +{ + ctx->gs_variant_cache = _mesa_hash_table_create(NULL, NULL, equals_gs_variant_key); +} + +static void +delete_entry(struct hash_entry *entry) +{ + d3d12_shader_free((d3d12_shader_selector *)entry->data); +} + +void +d3d12_gs_variant_cache_destroy(struct d3d12_context *ctx) +{ + _mesa_hash_table_destroy(ctx->gs_variant_cache, delete_entry); +} + +static struct d3d12_shader_selector * +create_geometry_shader_variant(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key) +{ + d3d12_shader_selector *gs = NULL; + + if (key->passthrough) + gs = d3d12_make_passthrough_gs(ctx, key); + else if (key->provoking_vertex > 0 || key->alternate_tri) + gs = d3d12_emit_triangles(ctx, key); + else if (key->fill_mode == PIPE_POLYGON_MODE_POINT) + gs = d3d12_emit_points(ctx, key); + else if (key->fill_mode == PIPE_POLYGON_MODE_LINE) + gs = d3d12_emit_lines(ctx, key); + + if (gs) { + gs->is_gs_variant = true; + gs->gs_key = *key; + } + + return gs; +} + +d3d12_shader_selector * +d3d12_get_gs_variant(struct d3d12_context *ctx, struct d3d12_gs_variant_key *key) +{ + uint32_t hash = hash_gs_variant_key(key); + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ctx->gs_variant_cache, + hash, key); + if (!entry) { + d3d12_shader_selector *gs = create_geometry_shader_variant(ctx, key); + entry = _mesa_hash_table_insert_pre_hashed(ctx->gs_variant_cache, + hash, &gs->gs_key, gs); + assert(entry); + } + + return (d3d12_shader_selector *)entry->data; +} diff --git a/src/gallium/drivers/d3d12/d3d12_lower_int_cubemap_to_array.c b/src/gallium/drivers/d3d12/d3d12_lower_int_cubemap_to_array.c new file mode 100644 index 00000000000..d71689f15c9 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_lower_int_cubemap_to_array.c @@ -0,0 +1,273 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "d3d12_nir_passes.h" + +#include "nir_builder.h" +#include "nir_builtin_builder.h" + +static bool +lower_int_cubmap_to_array_filter(const nir_instr *instr, + UNUSED const void *_options) +{ + if (instr->type != nir_instr_type_tex) + return false; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + + if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) + return false; + + switch (tex->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txd: + case nir_texop_txl: + case nir_texop_txs: + case nir_texop_lod: + break; + default: + return false; + } + + int sampler_deref = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); + assert(sampler_deref >= 0); + nir_deref_instr *deref = nir_instr_as_deref(tex->src[sampler_deref].src.ssa->parent_instr); + nir_variable *cube = nir_deref_instr_get_variable(deref); + return glsl_base_type_is_integer(glsl_get_sampler_result_type(cube->type)); +} + +typedef struct { + nir_ssa_def *rx; + nir_ssa_def *ry; + nir_ssa_def *rz; + nir_ssa_def *arx; + nir_ssa_def *ary; + nir_ssa_def *arz; +} coord_t; + + +/* This is taken from from sp_tex_sample:convert_cube */ +static nir_ssa_def * +evaluate_face_x(nir_builder *b, coord_t *coord) +{ + nir_ssa_def *sign = nir_fsign(b, coord->rx); + nir_ssa_def *positive = nir_fge(b, coord->rx, nir_imm_float(b, 0.0)); + nir_ssa_def *ima = nir_fdiv(b, nir_imm_float(b, -0.5), coord->arx); + + nir_ssa_def *x = nir_fadd(b, nir_fmul(b, nir_fmul(b, sign, ima), coord->rz), nir_imm_float(b, 0.5)); + nir_ssa_def *y = nir_fadd(b, nir_fmul(b, ima, coord->ry), nir_imm_float(b, 0.5)); + nir_ssa_def *face = nir_bcsel(b, positive, nir_imm_float(b, 0.0), nir_imm_float(b, 1.0)); + + return nir_vec3(b, x,y, face); +} + +static nir_ssa_def * +evaluate_face_y(nir_builder *b, coord_t *coord) +{ + nir_ssa_def *sign = nir_fsign(b, coord->ry); + nir_ssa_def *positive = nir_fge(b, coord->ry, nir_imm_float(b, 0.0)); + nir_ssa_def *ima = nir_fdiv(b, nir_imm_float(b, 0.5), coord->ary); + + nir_ssa_def *x = nir_fadd(b, nir_fmul(b, ima, coord->rx), nir_imm_float(b, 0.5)); + nir_ssa_def *y = nir_fadd(b, nir_fmul(b, nir_fmul(b, sign, ima), coord->rz), nir_imm_float(b, 0.5)); + nir_ssa_def *face = nir_bcsel(b, positive, nir_imm_float(b, 2.0), nir_imm_float(b, 3.0)); + + return nir_vec3(b, x,y, face); +} + +static nir_ssa_def * +evaluate_face_z(nir_builder *b, coord_t *coord) +{ + nir_ssa_def *sign = nir_fsign(b, coord->rz); + nir_ssa_def *positive = nir_fge(b, coord->rz, nir_imm_float(b, 0.0)); + nir_ssa_def *ima = nir_fdiv(b, nir_imm_float(b, -0.5), coord->arz); + + nir_ssa_def *x = nir_fadd(b, nir_fmul(b, nir_fmul(b, sign, ima), nir_fneg(b, coord->rx)), nir_imm_float(b, 0.5)); + nir_ssa_def *y = nir_fadd(b, nir_fmul(b, ima, coord->ry), nir_imm_float(b, 0.5)); + nir_ssa_def *face = nir_bcsel(b, positive, nir_imm_float(b, 4.0), nir_imm_float(b, 5.0)); + + return nir_vec3(b, x,y, face); +} + +static nir_ssa_def * +create_array_tex_from_cube_tex(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *coord) +{ + nir_tex_instr *array_tex; + + array_tex = nir_tex_instr_create(b->shader, tex->num_srcs); + array_tex->op = tex->op; + array_tex->sampler_dim = GLSL_SAMPLER_DIM_2D; + array_tex->is_array = true; + array_tex->is_shadow = tex->is_shadow; + array_tex->is_new_style_shadow = tex->is_new_style_shadow; + array_tex->texture_index = tex->texture_index; + array_tex->sampler_index = tex->sampler_index; + array_tex->dest_type = tex->dest_type; + array_tex->coord_components = 3; + + nir_src coord_src = nir_src_for_ssa(coord); + for (unsigned i = 0; i < tex->num_srcs; i++) { + nir_src *psrc = (tex->src[i].src_type == nir_tex_src_coord) ? + &coord_src : &tex->src[i].src; + + nir_src_copy(&array_tex->src[i].src, psrc, array_tex); + array_tex->src[i].src_type = tex->src[i].src_type; + } + + nir_ssa_dest_init(&array_tex->instr, &array_tex->dest, + nir_tex_instr_dest_size(array_tex), 32, NULL); + nir_builder_instr_insert(b, &array_tex->instr); + return &array_tex->dest.ssa; +} + +static nir_ssa_def * +lower_cube_sample(nir_builder *b, nir_tex_instr *tex) +{ + /* We don't support cube map arrays yet */ + assert(!tex->is_array); + + int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); + assert(coord_index >= 0); + + /* Evaluate the face and the xy coordinates for a 2D tex op */ + nir_ssa_def *coord = tex->src[coord_index].src.ssa; + + coord_t coords; + coords.rx = nir_channel(b, coord, 0); + coords.ry = nir_channel(b, coord, 1); + coords.rz = nir_channel(b, coord, 2); + coords.arx = nir_fabs(b, coords.rx); + coords.ary = nir_fabs(b, coords.ry); + coords.arz = nir_fabs(b, coords.rz); + + nir_ssa_def *use_face_x = nir_iand(b, + nir_fge(b, coords.arx, coords.ary), + nir_fge(b, coords.arx, coords.arz)); + + nir_if *use_face_x_if = nir_push_if(b, use_face_x); + nir_ssa_def *face_x_coord = evaluate_face_x(b, &coords); + nir_if *use_face_x_else = nir_push_else(b, use_face_x_if); + + nir_ssa_def *use_face_y = nir_iand(b, + nir_fge(b, coords.ary, coords.arx), + nir_fge(b, coords.ary, coords.arz)); + + nir_if *use_face_y_if = nir_push_if(b, use_face_y); + nir_ssa_def *face_y_coord = evaluate_face_y(b, &coords); + nir_if *use_face_y_else = nir_push_else(b, use_face_y_if); + + nir_ssa_def *face_z_coord = evaluate_face_z(b, &coords); + + nir_pop_if(b, use_face_y_else); + nir_ssa_def *face_y_or_z_coord = nir_if_phi(b, face_y_coord, face_z_coord); + nir_pop_if(b, use_face_x_else); + + // This contains in xy the normalized sample coordinates, and in z the face index + nir_ssa_def *coord_and_face = nir_if_phi(b, face_x_coord, face_y_or_z_coord); + + return create_array_tex_from_cube_tex(b, tex, coord_and_face); +} + +/* We don't expect the array size here */ +static nir_ssa_def * +lower_cube_txs(nir_builder *b, nir_tex_instr *tex) +{ + b->cursor = nir_after_instr(&tex->instr); + return nir_channels(b, &tex->dest.ssa, 3); +} + +static const struct glsl_type * +make_2darray_from_cubemap(const struct glsl_type *type) +{ + return glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ? + glsl_sampler_type( + GLSL_SAMPLER_DIM_2D, + false, true, + glsl_get_sampler_result_type(type)) : type; +} + +static const struct glsl_type * +make_2darray_from_cubemap_with_array(const struct glsl_type *type) +{ + /* While we don't (yet) support cube map arrays, there still may be arrays + * of cube maps */ + if (glsl_type_is_array(type)) { + const struct glsl_type *new_type = glsl_without_array(type); + return new_type != type ? glsl_array_type(make_2darray_from_cubemap(glsl_without_array(type)), + glsl_get_length(type), 0) : type; + } else + return make_2darray_from_cubemap(type); +} + +static nir_ssa_def * +lower_int_cubmap_to_array_impl(nir_builder *b, nir_instr *instr, + UNUSED void *_options) +{ + nir_tex_instr *tex = nir_instr_as_tex(instr); + + int sampler_index = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); + assert(sampler_index >= 0); + + nir_deref_instr *sampler_deref = nir_instr_as_deref(tex->src[sampler_index].src.ssa->parent_instr); + nir_variable *sampler = nir_deref_instr_get_variable(sampler_deref); + + sampler->type = make_2darray_from_cubemap_with_array(sampler->type); + sampler_deref->type = sampler->type; + + switch (tex->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txd: + case nir_texop_txl: + case nir_texop_lod: + return lower_cube_sample(b, tex); + case nir_texop_txs: + return lower_cube_txs(b, tex); + default: + unreachable("Unsupported cupe map texture operation"); + } +} + +bool +d3d12_lower_int_cubmap_to_array(nir_shader *s) +{ + bool result = + nir_shader_lower_instructions(s, + lower_int_cubmap_to_array_filter, + lower_int_cubmap_to_array_impl, + NULL); + + if (result) { + nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) { + if (glsl_type_is_sampler(var->type)) { + if (glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE && + (glsl_base_type_is_integer(glsl_get_sampler_result_type(var->type)))) { + var->type = make_2darray_from_cubemap_with_array(var->type); + } + } + } + } + return result; + +} diff --git a/src/gallium/drivers/d3d12/d3d12_lower_point_sprite.c b/src/gallium/drivers/d3d12/d3d12_lower_point_sprite.c new file mode 100644 index 00000000000..8b8a5a4a5f9 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_lower_point_sprite.c @@ -0,0 +1,307 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "d3d12_compiler.h" +#include "program/prog_statevars.h" + +struct lower_state { + nir_variable *uniform; /* (1/w, 1/h, pt_sz, max_sz) */ + nir_variable *pos_out; + nir_variable *psiz_out; + nir_variable *point_coord_out[9]; + unsigned num_point_coords; + nir_variable *varying_out[VARYING_SLOT_MAX]; + + nir_ssa_def *point_dir_imm[4]; + nir_ssa_def *point_coord_imm[4]; + + /* Current point primitive */ + nir_ssa_def *point_pos; + nir_ssa_def *point_size; + nir_ssa_def *varying[VARYING_SLOT_MAX]; + unsigned varying_write_mask[VARYING_SLOT_MAX]; + + bool sprite_origin_lower_left; + bool point_size_per_vertex; + bool aa_point; +}; + +static void +find_outputs(nir_shader *shader, struct lower_state *state) +{ + nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) { + switch (var->data.location) { + case VARYING_SLOT_POS: + state->pos_out = var; + break; + case VARYING_SLOT_PSIZ: + state->psiz_out = var; + break; + default: + state->varying_out[var->data.location] = var; + break; + } + } +} + +static nir_ssa_def * +get_point_dir(nir_builder *b, struct lower_state *state, unsigned i) +{ + if (state->point_dir_imm[0] == NULL) { + state->point_dir_imm[0] = nir_imm_vec2(b, -1, -1); + state->point_dir_imm[1] = nir_imm_vec2(b, -1, 1); + state->point_dir_imm[2] = nir_imm_vec2(b, 1, -1); + state->point_dir_imm[3] = nir_imm_vec2(b, 1, 1); + } + + return state->point_dir_imm[i]; +} + +static nir_ssa_def * +get_point_coord(nir_builder *b, struct lower_state *state, unsigned i) +{ + if (state->point_coord_imm[0] == NULL) { + if (state->sprite_origin_lower_left) { + state->point_coord_imm[0] = nir_imm_vec4(b, 0, 0, 0, 1); + state->point_coord_imm[1] = nir_imm_vec4(b, 0, 1, 0, 1); + state->point_coord_imm[2] = nir_imm_vec4(b, 1, 0, 0, 1); + state->point_coord_imm[3] = nir_imm_vec4(b, 1, 1, 0, 1); + } else { + state->point_coord_imm[0] = nir_imm_vec4(b, 0, 1, 0, 1); + state->point_coord_imm[1] = nir_imm_vec4(b, 0, 0, 0, 1); + state->point_coord_imm[2] = nir_imm_vec4(b, 1, 1, 0, 1); + state->point_coord_imm[3] = nir_imm_vec4(b, 1, 0, 0, 1); + } + } + + return state->point_coord_imm[i]; +} + +/** + * scaled_point_size = pointSize * pos.w * ViewportSizeRcp + */ +static void +get_scaled_point_size(nir_builder *b, struct lower_state *state, + nir_ssa_def **x, nir_ssa_def **y) +{ + /* State uniform contains: (1/ViewportWidth, 1/ViewportHeight, PointSize, MaxPointSize) */ + nir_ssa_def *uniform = nir_load_var(b, state->uniform); + nir_ssa_def *point_size = state->point_size; + + /* clamp point-size to valid range */ + if (point_size && state->point_size_per_vertex) { + point_size = nir_fmax(b, point_size, nir_imm_float(b, 1.0f)); + point_size = nir_fmin(b, point_size, nir_imm_float(b, D3D12_MAX_POINT_SIZE)); + } else { + /* Use static point size (from uniform) if the shader output was not set */ + point_size = nir_channel(b, uniform, 2); + } + + point_size = nir_fmul(b, point_size, nir_channel(b, state->point_pos, 3)); + *x = nir_fmul(b, point_size, nir_channel(b, uniform, 0)); + *y = nir_fmul(b, point_size, nir_channel(b, uniform, 1)); +} + +static bool +lower_store(nir_intrinsic_instr *instr, nir_builder *b, struct lower_state *state) +{ + nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); + if (nir_deref_mode_is(deref, nir_var_shader_out)) { + nir_variable *var = nir_deref_instr_get_variable(deref); + + switch (var->data.location) { + case VARYING_SLOT_POS: + state->point_pos = instr->src[1].ssa; + break; + case VARYING_SLOT_PSIZ: + state->point_size = instr->src[1].ssa; + break; + default: + state->varying[var->data.location] = instr->src[1].ssa; + state->varying_write_mask[var->data.location] = nir_intrinsic_write_mask(instr); + break; + } + + nir_instr_remove(&instr->instr); + return true; + } + + return false; +} + +static bool +lower_emit_vertex(nir_intrinsic_instr *instr, nir_builder *b, struct lower_state *state) +{ + unsigned stream_id = nir_intrinsic_stream_id(instr); + + nir_ssa_def *point_width, *point_height; + get_scaled_point_size(b, state, &point_width, &point_height); + + nir_instr_remove(&instr->instr); + + for (unsigned i = 0; i < 4; i++) { + /* All outputs need to be emitted for each vertex */ + for (unsigned slot = 0; slot < VARYING_SLOT_MAX; ++slot) { + if (state->varying[slot] != NULL) { + nir_store_var(b, state->varying_out[slot], state->varying[slot], + state->varying_write_mask[slot]); + } + } + + /* pos = scaled_point_size * point_dir + point_pos */ + nir_ssa_def *point_dir = get_point_dir(b, state, i); + nir_ssa_def *pos = nir_vec4(b, + nir_ffma(b, + point_width, + nir_channel(b, point_dir, 0), + nir_channel(b, state->point_pos, 0)), + nir_ffma(b, + point_height, + nir_channel(b, point_dir, 1), + nir_channel(b, state->point_pos, 1)), + nir_channel(b, state->point_pos, 2), + nir_channel(b, state->point_pos, 3)); + nir_store_var(b, state->pos_out, pos, 0xf); + + /* point coord */ + nir_ssa_def *point_coord = get_point_coord(b, state, i); + for (unsigned j = 0; j < state->num_point_coords; ++j) + nir_store_var(b, state->point_coord_out[j], point_coord, 0xf); + + /* EmitVertex */ + instr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_emit_vertex); + nir_intrinsic_set_stream_id(instr, stream_id); + nir_builder_instr_insert(b, &instr->instr); + } + + /* EndPrimitive */ + instr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_primitive); + nir_intrinsic_set_stream_id(instr, stream_id); + nir_builder_instr_insert(b, &instr->instr); + + /* Reset everything */ + state->point_pos = NULL; + state->point_size = NULL; + for (unsigned i = 0; i < VARYING_SLOT_MAX; ++i) + state->varying[i] = NULL; + + return true; +} + +static bool +lower_instr(nir_intrinsic_instr *instr, nir_builder *b, struct lower_state *state) +{ + b->cursor = nir_before_instr(&instr->instr); + + if (instr->intrinsic == nir_intrinsic_store_deref) { + return lower_store(instr, b, state); + } else if (instr->intrinsic == nir_intrinsic_emit_vertex) { + return lower_emit_vertex(instr, b, state); + } else if (instr->intrinsic == nir_intrinsic_end_primitive) { + nir_instr_remove(&instr->instr); + return true; + } + + return false; +} + +bool +d3d12_lower_point_sprite(nir_shader *shader, + bool sprite_origin_lower_left, + bool point_size_per_vertex, + unsigned point_coord_enable, + uint64_t next_inputs_read) +{ + const gl_state_index16 tokens[5] = { STATE_INTERNAL, + STATE_INTERNAL_DRIVER, + D3D12_STATE_VAR_PT_SPRITE }; + struct lower_state state; + bool progress = false; + + assert(shader->info.gs.output_primitive == GL_POINTS); + + memset(&state, 0, sizeof(state)); + find_outputs(shader, &state); + state.sprite_origin_lower_left = sprite_origin_lower_left; + state.point_size_per_vertex = point_size_per_vertex; + + /* Create uniform to retrieve inverse of viewport size and point size: + * (1/ViewportWidth, 1/ViewportHeight, PointSize, MaxPointSize) */ + state.uniform = nir_variable_create(shader, + nir_var_uniform, + glsl_vec4_type(), + "d3d12_ViewportSizeRcp"); + state.uniform->num_state_slots = 1; + state.uniform->state_slots = ralloc_array(state.uniform, nir_state_slot, 1); + memcpy(state.uniform->state_slots[0].tokens, tokens, + sizeof(state.uniform->state_slots[0].tokens)); + shader->num_uniforms++; + + /* Create new outputs for point tex coordinates */ + unsigned count = 0; + for (unsigned int sem = 0; sem < 9; sem++) { + if (point_coord_enable & BITFIELD64_BIT(sem)) { + char tmp[100]; + unsigned location = VARYING_SLOT_VAR0 + sem; + + snprintf(tmp, ARRAY_SIZE(tmp), "gl_TexCoord%dMESA", count); + + nir_variable *var = nir_variable_create(shader, + nir_var_shader_out, + glsl_vec4_type(), + tmp); + var->data.location = location; + state.point_coord_out[count++] = var; + } + } + state.num_point_coords = count; + if (point_coord_enable) { + d3d12_reassign_driver_locations(shader, nir_var_shader_out, + next_inputs_read); + } + + nir_foreach_function(function, shader) { + if (function->impl) { + nir_builder builder; + nir_builder_init(&builder, function->impl); + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_intrinsic) + progress |= lower_instr(nir_instr_as_intrinsic(instr), + &builder, + &state); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + shader->info.gs.output_primitive = GL_TRIANGLE_STRIP; + shader->info.gs.vertices_out *= 4; + + return progress; +} diff --git a/src/gallium/drivers/d3d12/d3d12_nir_lower_texcmp.c b/src/gallium/drivers/d3d12/d3d12_nir_lower_texcmp.c new file mode 100644 index 00000000000..0002b4ca737 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_nir_lower_texcmp.c @@ -0,0 +1,150 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "d3d12_nir_lower_texcmp.h" +#include "nir_builder.h" +#include "nir_builtin_builder.h" + +bool +lower_sample_tex_compare_filter(const nir_instr *instr, + UNUSED const void *_options) +{ + if (instr->type != nir_instr_type_tex) + return false; + + /* To be consistent we also want to lower tex when we lower anything, + * otherwise the differences in evaluating the shadow value might lead + * to artifacts. */ + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (tex->op != nir_texop_txb && + tex->op != nir_texop_txl && + tex->op != nir_texop_txd && + tex->op != nir_texop_tex) + return false; + + return tex->is_shadow; +} + +static const struct glsl_type * +strip_shadow(const struct glsl_type *type) +{ + const struct glsl_type *new_type = + glsl_sampler_type( + glsl_get_sampler_dim(type), + false, glsl_sampler_type_is_array(type), + GLSL_TYPE_FLOAT); + return new_type; +} + + +static const struct glsl_type * +strip_shadow_with_array(const struct glsl_type *type) +{ + if (glsl_type_is_array(type)) + return glsl_array_type(strip_shadow(glsl_without_array(type)), + glsl_get_length(type), 0); + return strip_shadow(type); +} + +typedef struct { + unsigned n_states; + enum compare_func *compare_func; + dxil_texture_swizzle_state *tex_swizzles; +} sampler_state; + +static nir_ssa_def * +lower_sample_tex_compare_impl(nir_builder *b, nir_instr *instr, + void *options) + +{ + nir_tex_instr *tex = nir_instr_as_tex(instr); + + sampler_state *state = (sampler_state *)options; + + b->cursor = nir_after_instr(instr); + tex->is_shadow = false; + + int comp_index = nir_tex_instr_src_index(tex, nir_tex_src_comparator); + + nir_deref_instr *sampler_deref = NULL; + nir_variable *sampler = NULL; + + int sampler_index = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); + assert(sampler_index >= 0); + + sampler_deref = nir_instr_as_deref(tex->src[sampler_index].src.ssa->parent_instr); + sampler = nir_deref_instr_get_variable(sampler_deref); + + /* NIR expects a vec4 result from the above texture instructions */ + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); + + nir_ssa_def *tex_r = nir_channel(b, &tex->dest.ssa, 0); + nir_ssa_def *cmp = tex->src[comp_index].src.ssa; + + int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector); + if (proj_index >= 0) + cmp = nir_fmul(b, cmp, nir_frcp(b, tex->src[proj_index].src.ssa)); + + nir_ssa_def * result = + nir_compare_func(b, + sampler->data.binding < state->n_states ? + state->compare_func[sampler->data.binding] : COMPARE_FUNC_ALWAYS, + cmp, tex_r); + + result = nir_b2f32(b, result); + nir_ssa_def *one = nir_imm_float(b, 1.0); + nir_ssa_def *zero = nir_imm_float(b, 0.0); + + nir_ssa_def *lookup[6] = {result, NULL, NULL, NULL, zero, one}; + nir_ssa_def *r[4] = {lookup[state->tex_swizzles[sampler->data.binding].swizzle_r], + lookup[state->tex_swizzles[sampler->data.binding].swizzle_g], + lookup[state->tex_swizzles[sampler->data.binding].swizzle_b], + lookup[state->tex_swizzles[sampler->data.binding].swizzle_a] + }; + + result = nir_vec(b, r, nir_dest_num_components(tex->dest)); + + sampler->type = strip_shadow_with_array(sampler->type); + sampler_deref->type = sampler->type; + + tex->is_shadow = false; + nir_tex_instr_remove_src(tex, comp_index); + + return result; +} + +bool +d3d12_lower_sample_tex_compare(nir_shader *s, + unsigned n_states, + enum compare_func *compare_func, + dxil_texture_swizzle_state *tex_swizzles) +{ + sampler_state state = {n_states, compare_func, tex_swizzles}; + + bool result = + nir_shader_lower_instructions(s, + lower_sample_tex_compare_filter, + lower_sample_tex_compare_impl, + &state); + return result; +} diff --git a/src/gallium/drivers/d3d12/d3d12_nir_lower_texcmp.h b/src/gallium/drivers/d3d12/d3d12_nir_lower_texcmp.h new file mode 100644 index 00000000000..2bb3707f597 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_nir_lower_texcmp.h @@ -0,0 +1,48 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef D3D12_NIR_LOWER_TEXCOMP_H +#define D3D12_NIR_LOWER_TEXCOMP_H + +#include "dxil_nir_lower_int_samplers.h" + +#include "pipe/p_state.h" +#include "compiler/shader_enums.h" +#include "nir.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +bool +d3d12_lower_sample_tex_compare(nir_shader *s, + unsigned n_states, + enum compare_func *compare_func, + dxil_texture_swizzle_state *tex_swizzles); + +#ifdef __cplusplus +} +#endif + +#endif // LALA_H diff --git a/src/gallium/drivers/d3d12/d3d12_nir_lower_vs_vertex_conversion.c b/src/gallium/drivers/d3d12/d3d12_nir_lower_vs_vertex_conversion.c new file mode 100644 index 00000000000..eec0ee6abda --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_nir_lower_vs_vertex_conversion.c @@ -0,0 +1,159 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "d3d12_nir_passes.h" + +#include "nir_builder.h" +#include "nir_builtin_builder.h" + +static enum pipe_format +get_input_target_format(nir_variable *var, const void *options) +{ + enum pipe_format *target_formats = (enum pipe_format *)options; + return target_formats[var->data.driver_location]; +} + +static bool +lower_vs_vertex_conversion_filter(const nir_instr *instr, const void *options) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *inst = nir_instr_as_intrinsic(instr); + if (inst->intrinsic != nir_intrinsic_load_deref) + return false; + + nir_variable *var = nir_intrinsic_get_var(inst, 0); + return (var->data.mode == nir_var_shader_in) && + (get_input_target_format(var, options) != PIPE_FORMAT_NONE); +} + +typedef nir_ssa_def * +(*shift_right_func)(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1); + +/* decoding the signed vs unsigned scaled format is handled + * by applying the signed or unsigned shift right function + * accordingly */ +static nir_ssa_def * +from_10_10_10_2_scaled(nir_builder *b, nir_ssa_def *src, + nir_ssa_def *lshift, shift_right_func shr) +{ + nir_ssa_def *rshift = nir_imm_ivec4(b, 22, 22, 22, 30); + return nir_i2f32(b, shr(b, nir_ishl(b, src, lshift), rshift)); +} + +static nir_ssa_def * +from_10_10_10_2_snorm(nir_builder *b, nir_ssa_def *src, nir_ssa_def *lshift) +{ + nir_ssa_def *split = from_10_10_10_2_scaled(b, src, lshift, nir_ishr); + nir_ssa_def *scale_rgb = nir_imm_vec4(b, + 1.0f / 0x1ff, + 1.0f / 0x1ff, + 1.0f / 0x1ff, + 1.0f); + return nir_fmul(b, split, scale_rgb); +} + +static nir_ssa_def * +from_10_10_10_2_unorm(nir_builder *b, nir_ssa_def *src, nir_ssa_def *lshift) +{ + nir_ssa_def *split = from_10_10_10_2_scaled(b, src, lshift, nir_ushr); + nir_ssa_def *scale_rgb = nir_imm_vec4(b, + 1.0f / 0x3ff, + 1.0f / 0x3ff, + 1.0f / 0x3ff, + 1.0f / 3.0f); + return nir_fmul(b, split, scale_rgb); +} + +inline static nir_ssa_def * +lshift_rgba(nir_builder *b) +{ + return nir_imm_ivec4(b, 22, 12, 2, 0); +} + +inline static nir_ssa_def * +lshift_bgra(nir_builder *b) +{ + return nir_imm_ivec4(b, 2, 12, 22, 0); +} + +static nir_ssa_def * +lower_vs_vertex_conversion_impl(nir_builder *b, nir_instr *instr, void *options) +{ + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + nir_variable *var = nir_intrinsic_get_var(intr, 0); + enum pipe_format fmt = get_input_target_format(var, options); + + if (!util_format_has_alpha(fmt)) { + /* these formats need the alpha channel replaced with 1: */ + assert(fmt == PIPE_FORMAT_R8G8B8_SINT || + fmt == PIPE_FORMAT_R8G8B8_UINT || + fmt == PIPE_FORMAT_R16G16B16_SINT || + fmt == PIPE_FORMAT_R16G16B16_UINT); + return nir_vector_insert_imm(b, &intr->dest.ssa, nir_imm_int(b, 1), 3); + } else { + nir_ssa_def *src = nir_channel(b, &intr->dest.ssa, 0); + + switch (fmt) { + case PIPE_FORMAT_R10G10B10A2_SNORM: + return from_10_10_10_2_snorm(b, src, lshift_rgba(b)); + case PIPE_FORMAT_B10G10R10A2_SNORM: + return from_10_10_10_2_snorm(b, src, lshift_bgra(b)); + case PIPE_FORMAT_B10G10R10A2_UNORM: + return from_10_10_10_2_unorm(b, src, lshift_bgra(b)); + case PIPE_FORMAT_R10G10B10A2_SSCALED: + return from_10_10_10_2_scaled(b, src, lshift_rgba(b), nir_ishr); + case PIPE_FORMAT_B10G10R10A2_SSCALED: + return from_10_10_10_2_scaled(b, src, lshift_bgra(b), nir_ishr); + case PIPE_FORMAT_R10G10B10A2_USCALED: + return from_10_10_10_2_scaled(b, src, lshift_rgba(b), nir_ushr); + case PIPE_FORMAT_B10G10R10A2_USCALED: + return from_10_10_10_2_scaled(b, src, lshift_bgra(b), nir_ushr); + + default: + unreachable("Unsupported emulated vertex format"); + } + } +} + +/* Lower emulated vertex attribute input + * The vertex attributes are passed as R32_UINT that needs to be converted + * to one of the RGB10A2 formats that need to be emulated. + * + * @param target_formats contains the per attribute format to convert to + * or PIPE_FORMAT_NONE if no conversion is needed + */ +bool +d3d12_nir_lower_vs_vertex_conversion(nir_shader *s, + enum pipe_format target_formats[]) +{ + assert(s->info.stage == MESA_SHADER_VERTEX); + + bool result = + nir_shader_lower_instructions(s, + lower_vs_vertex_conversion_filter, + lower_vs_vertex_conversion_impl, + target_formats); + return result; +} diff --git a/src/gallium/drivers/d3d12/d3d12_nir_passes.c b/src/gallium/drivers/d3d12/d3d12_nir_passes.c new file mode 100644 index 00000000000..e8f1a1fcd89 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_nir_passes.c @@ -0,0 +1,998 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "d3d12_nir_passes.h" +#include "d3d12_compiler.h" +#include "nir_builder.h" +#include "nir_builtin_builder.h" +#include "nir_format_convert.h" +#include "program/prog_instruction.h" +#include "dxil_nir.h" + +/** + * Lower Y Flip: + * + * We can't do a Y flip simply by negating the viewport height, + * so we need to lower the flip into the NIR shader. + */ + +static nir_ssa_def * +get_state_var(nir_builder *b, + enum d3d12_state_var var_enum, + const char *var_name, + const struct glsl_type *var_type, + nir_variable **out_var) +{ + const gl_state_index16 tokens[5] = { STATE_INTERNAL, STATE_INTERNAL_DRIVER, var_enum }; + if (*out_var == NULL) { + nir_variable *var = nir_variable_create(b->shader, + nir_var_uniform, + var_type, + var_name); + + var->num_state_slots = 1; + var->state_slots = ralloc_array(var, nir_state_slot, 1); + memcpy(var->state_slots[0].tokens, tokens, + sizeof(var->state_slots[0].tokens)); + var->data.how_declared = nir_var_hidden; + b->shader->num_uniforms++; + *out_var = var; + } + return nir_load_var(b, *out_var); +} + +static void +lower_pos_write(nir_builder *b, struct nir_instr *instr, nir_variable **flip) +{ + if (instr->type != nir_instr_type_intrinsic) + return; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_store_deref) + return; + + nir_variable *var = nir_intrinsic_get_var(intr, 0); + if (var->data.mode != nir_var_shader_out || + var->data.location != VARYING_SLOT_POS) + return; + + b->cursor = nir_before_instr(&intr->instr); + + nir_ssa_def *pos = nir_ssa_for_src(b, intr->src[1], 4); + nir_ssa_def *flip_y = get_state_var(b, D3D12_STATE_VAR_Y_FLIP, "d3d12_FlipY", + glsl_float_type(), flip); + nir_ssa_def *def = nir_vec4(b, + nir_channel(b, pos, 0), + nir_fmul(b, nir_channel(b, pos, 1), flip_y), + nir_channel(b, pos, 2), + nir_channel(b, pos, 3)); + nir_instr_rewrite_src(&intr->instr, intr->src + 1, nir_src_for_ssa(def)); +} + +void +d3d12_lower_yflip(nir_shader *nir) +{ + nir_variable *flip = NULL; + + if (nir->info.stage != MESA_SHADER_VERTEX && + nir->info.stage != MESA_SHADER_GEOMETRY) + return; + + nir_foreach_function(function, nir) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + lower_pos_write(&b, instr, &flip); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } +} + +static void +lower_load_face(nir_builder *b, struct nir_instr *instr, nir_variable *var) +{ + if (instr->type != nir_instr_type_intrinsic) + return; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_front_face) + return; + + b->cursor = nir_before_instr(&intr->instr); + + nir_ssa_def *load = nir_load_var(b, var); + + nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load)); + nir_instr_remove(instr); +} + +void +d3d12_forward_front_face(nir_shader *nir) +{ + assert(nir->info.stage == MESA_SHADER_FRAGMENT); + + nir_variable *var = nir_variable_create(nir, nir_var_shader_in, + glsl_bool_type(), + "gl_FrontFacing"); + var->data.location = VARYING_SLOT_VAR12; + var->data.interpolation = INTERP_MODE_FLAT; + + + nir_foreach_function(function, nir) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + lower_load_face(&b, instr, var); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } +} + +static void +lower_pos_read(nir_builder *b, struct nir_instr *instr, + nir_variable **depth_transform_var) +{ + if (instr->type != nir_instr_type_intrinsic) + return; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_deref) + return; + + nir_variable *var = nir_intrinsic_get_var(intr, 0); + if (var->data.mode != nir_var_shader_in || + var->data.location != VARYING_SLOT_POS) + return; + + b->cursor = nir_after_instr(instr); + + nir_ssa_def *pos = nir_instr_ssa_def(instr); + nir_ssa_def *depth = nir_channel(b, pos, 2); + + assert(depth_transform_var); + nir_ssa_def *depth_transform = get_state_var(b, D3D12_STATE_VAR_DEPTH_TRANSFORM, + "d3d12_DepthTransform", + glsl_vec_type(2), + depth_transform_var); + depth = nir_fmad(b, depth, nir_channel(b, depth_transform, 0), + nir_channel(b, depth_transform, 1)); + + pos = nir_vector_insert_imm(b, pos, depth, 2); + + assert(intr->dest.is_ssa); + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, nir_src_for_ssa(pos), + pos->parent_instr); +} + +void +d3d12_lower_depth_range(nir_shader *nir) +{ + assert(nir->info.stage == MESA_SHADER_FRAGMENT); + nir_variable *depth_transform = NULL; + nir_foreach_function(function, nir) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + lower_pos_read(&b, instr, &depth_transform); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } +} + +static bool +is_color_output(nir_variable *var) +{ + return (var->data.mode == nir_var_shader_out && + (var->data.location == FRAG_RESULT_COLOR || + var->data.location >= FRAG_RESULT_DATA0)); +} + +static void +lower_uint_color_write(nir_builder *b, struct nir_instr *instr, bool is_signed) +{ + const unsigned NUM_BITS = 8; + const unsigned bits[4] = { NUM_BITS, NUM_BITS, NUM_BITS, NUM_BITS }; + + if (instr->type != nir_instr_type_intrinsic) + return; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_store_deref) + return; + + nir_variable *var = nir_intrinsic_get_var(intr, 0); + if (!is_color_output(var)) + return; + + b->cursor = nir_before_instr(&intr->instr); + + nir_ssa_def *col = nir_ssa_for_src(b, intr->src[1], intr->num_components); + nir_ssa_def *def = is_signed ? nir_format_float_to_snorm(b, col, bits) : + nir_format_float_to_unorm(b, col, bits); + if (is_signed) + def = nir_bcsel(b, nir_ilt(b, def, nir_imm_int(b, 0)), + nir_iadd(b, def, nir_imm_int(b, 1 << NUM_BITS)), + def); + nir_instr_rewrite_src(&intr->instr, intr->src + 1, nir_src_for_ssa(def)); +} + +void +d3d12_lower_uint_cast(nir_shader *nir, bool is_signed) +{ + if (nir->info.stage != MESA_SHADER_FRAGMENT) + return; + + nir_foreach_function(function, nir) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + lower_uint_color_write(&b, instr, is_signed); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } +} + +static bool +lower_load_first_vertex(nir_builder *b, nir_instr *instr, nir_variable **first_vertex) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + if (intr->intrinsic != nir_intrinsic_load_first_vertex) + return false; + + b->cursor = nir_before_instr(&intr->instr); + + nir_ssa_def *load = get_state_var(b, D3D12_STATE_VAR_FIRST_VERTEX, "d3d12_FirstVertex", + glsl_uint_type(), first_vertex); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load)); + nir_instr_remove(instr); + + return true; +} + +bool +d3d12_lower_load_first_vertex(struct nir_shader *nir) +{ + nir_variable *first_vertex = NULL; + bool progress = false; + + if (nir->info.stage != MESA_SHADER_VERTEX) + return false; + + nir_foreach_function(function, nir) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + progress |= lower_load_first_vertex(&b, instr, &first_vertex); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + return progress; +} + +static void +invert_depth(nir_builder *b, struct nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_store_deref) + return; + + nir_variable *var = nir_intrinsic_get_var(intr, 0); + if (var->data.mode != nir_var_shader_out || + var->data.location != VARYING_SLOT_POS) + return; + + b->cursor = nir_before_instr(&intr->instr); + + nir_ssa_def *pos = nir_ssa_for_src(b, intr->src[1], 4); + nir_ssa_def *def = nir_vec4(b, + nir_channel(b, pos, 0), + nir_channel(b, pos, 1), + nir_fneg(b, nir_channel(b, pos, 2)), + nir_channel(b, pos, 3)); + nir_instr_rewrite_src(&intr->instr, intr->src + 1, nir_src_for_ssa(def)); +} + +/* In OpenGL the windows space depth value z_w is evaluated according to "s * z_d + b" + * with "s + (far - near) / 2" (depth clip:minus_one_to_one) [OpenGL 3.3, 2.13.1]. + * When we switch the far and near value to satisfy DirectX requirements we have + * to compensate by inverting "z_d' = -z_d" with this lowering pass. + */ +void +d3d12_nir_invert_depth(nir_shader *shader) +{ + if (shader->info.stage != MESA_SHADER_VERTEX && + shader->info.stage != MESA_SHADER_GEOMETRY) + return; + + nir_foreach_function(function, shader) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + invert_depth(&b, instr); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } +} + + +/** + * Lower State Vars: + * + * All uniforms related to internal D3D12 variables are + * condensed into a UBO that is appended at the end of the + * current ones. + */ + +static unsigned +get_state_var_offset(struct d3d12_shader *shader, enum d3d12_state_var var) +{ + for (unsigned i = 0; i < shader->num_state_vars; ++i) { + if (shader->state_vars[i].var == var) + return shader->state_vars[i].offset; + } + + unsigned offset = shader->state_vars_size; + shader->state_vars[shader->num_state_vars].offset = offset; + shader->state_vars[shader->num_state_vars].var = var; + shader->state_vars_size += 4; /* Use 4-words slots no matter the variable size */ + shader->num_state_vars++; + + return offset; +} + +static bool +lower_instr(nir_intrinsic_instr *instr, nir_builder *b, + struct d3d12_shader *shader, unsigned binding) +{ + nir_variable *variable = NULL; + nir_deref_instr *deref = NULL; + + b->cursor = nir_before_instr(&instr->instr); + + if (instr->intrinsic == nir_intrinsic_load_uniform) { + nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) { + if (var->data.driver_location == nir_intrinsic_base(instr)) { + variable = var; + break; + } + } + } else if (instr->intrinsic == nir_intrinsic_load_deref) { + deref = nir_src_as_deref(instr->src[0]); + variable = nir_intrinsic_get_var(instr, 0); + } + + if (variable == NULL || + variable->num_state_slots != 1 || + variable->state_slots[0].tokens[1] != STATE_INTERNAL_DRIVER) + return false; + + enum d3d12_state_var var = variable->state_slots[0].tokens[2]; + nir_ssa_def *ubo_idx = nir_imm_int(b, binding); + nir_ssa_def *ubo_offset = nir_imm_int(b, get_state_var_offset(shader, var) * 4); + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); + load->num_components = instr->num_components; + load->src[0] = nir_src_for_ssa(ubo_idx); + load->src[1] = nir_src_for_ssa(ubo_offset); + assert(instr->dest.ssa.bit_size >= 8); + nir_intrinsic_set_align(load, instr->dest.ssa.bit_size / 8, 0); + nir_intrinsic_set_range_base(load, 0); + nir_intrinsic_set_range(load, ~0); + + nir_ssa_dest_init(&load->instr, &load->dest, + load->num_components, instr->dest.ssa.bit_size, + instr->dest.ssa.name); + nir_builder_instr_insert(b, &load->instr); + nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa)); + + /* Remove the old load_* instruction and any parent derefs */ + nir_instr_remove(&instr->instr); + for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { + /* If anyone is using this deref, leave it alone */ + assert(d->dest.is_ssa); + if (!list_is_empty(&d->dest.ssa.uses)) + break; + + nir_instr_remove(&d->instr); + } + + return true; +} + +bool +d3d12_lower_state_vars(nir_shader *nir, struct d3d12_shader *shader) +{ + bool progress = false; + + /* The state var UBO is added after all the other UBOs if it already + * exists it will be replaced by using the same binding. + * In the event there are no other UBO's, use binding slot 1 to + * be consistent with other non-default UBO's */ + unsigned binding = max(nir->info.num_ubos, 1); + + nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) { + if (var->num_state_slots == 1 && + var->state_slots[0].tokens[1] == STATE_INTERNAL_DRIVER) { + if (var->data.mode == nir_var_mem_ubo) { + binding = var->data.binding; + } + } + } + + nir_foreach_function(function, nir) { + if (function->impl) { + nir_builder builder; + nir_builder_init(&builder, function->impl); + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_intrinsic) + progress |= lower_instr(nir_instr_as_intrinsic(instr), + &builder, + shader, + binding); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + if (progress) { + assert(shader->num_state_vars > 0); + + shader->state_vars_used = true; + + /* Remove state variables */ + nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) { + if (var->num_state_slots == 1 && + var->state_slots[0].tokens[1] == STATE_INTERNAL_DRIVER) { + exec_node_remove(&var->node); + nir->num_uniforms--; + } + } + + const gl_state_index16 tokens[5] = { STATE_INTERNAL, STATE_INTERNAL_DRIVER }; + const struct glsl_type *type = glsl_array_type(glsl_vec4_type(), + shader->state_vars_size / 4, 0); + nir_variable *ubo = nir_variable_create(nir, nir_var_mem_ubo, type, + "d3d12_state_vars"); + if (binding >= nir->info.num_ubos) + nir->info.num_ubos = binding + 1; + ubo->data.binding = binding; + ubo->num_state_slots = 1; + ubo->state_slots = ralloc_array(ubo, nir_state_slot, 1); + memcpy(ubo->state_slots[0].tokens, tokens, + sizeof(ubo->state_slots[0].tokens)); + + struct glsl_struct_field field = { + .type = type, + .name = "data", + .location = -1, + }; + ubo->interface_type = + glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, + false, "__d3d12_state_vars_interface"); + } + + return progress; +} + +static const struct glsl_type * +get_bare_samplers_for_type(const struct glsl_type *type) +{ + if (glsl_type_is_sampler(type)) { + if (glsl_sampler_type_is_shadow(type)) + return glsl_bare_shadow_sampler_type(); + else + return glsl_bare_sampler_type(); + } else if (glsl_type_is_array(type)) { + return glsl_array_type( + get_bare_samplers_for_type(glsl_get_array_element(type)), + glsl_get_length(type), + 0 /*explicit size*/); + } + assert(!"Unexpected type"); + return NULL; +} + +void +d3d12_create_bare_samplers(nir_shader *nir) +{ + nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) { + const struct glsl_type *type = glsl_without_array(var->type); + if (glsl_type_is_sampler(type) && glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) { + /* Since samplers are already lowered to be accessed by index, all we need to do + /* here is create a bare sampler with the same binding */ + nir_variable *clone = nir_variable_clone(var, nir); + clone->type = get_bare_samplers_for_type(var->type); + nir_shader_add_variable(nir, clone); + } + } +} + +bool +lower_bool_input_filter(const nir_instr *instr, + UNUSED const void *_options) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == nir_intrinsic_load_front_face) + return true; + + if (intr->intrinsic == nir_intrinsic_load_deref) { + nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + return var->data.mode == nir_var_shader_in && + glsl_get_base_type(var->type) == GLSL_TYPE_BOOL; + } + + return false; +} + +static nir_ssa_def * +lower_bool_input_impl(nir_builder *b, nir_instr *instr, + UNUSED void *_options) +{ + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + if (intr->intrinsic == nir_intrinsic_load_deref) { + nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + + /* rewrite var->type */ + var->type = glsl_vector_type(GLSL_TYPE_UINT, + glsl_get_vector_elements(var->type)); + deref->type = var->type; + } + + intr->dest.ssa.bit_size = 32; + return nir_i2b1(b, &intr->dest.ssa); +} + +bool +d3d12_lower_bool_input(struct nir_shader *s) +{ + return nir_shader_lower_instructions(s, lower_bool_input_filter, + lower_bool_input_impl, NULL); +} + +static bool +lower_color_write(nir_builder *b, struct nir_instr *instr, unsigned nr_cbufs) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_store_deref) + return false; + + nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + + if (var->data.mode != nir_var_shader_out || + var->data.location != FRAG_RESULT_COLOR) + return false; + + /* lower the original write to data #0 */ + var->name = ralloc_strdup(var, "gl_FragData[0]"); + var->data.location = FRAG_RESULT_DATA0; + var->data.driver_location = 0; + + b->cursor = nir_after_instr(&intr->instr); + + /* Then create new variables and write them as well */ + nir_ssa_def *value = nir_ssa_for_src(b, intr->src[1], + nir_src_num_components(intr->src[1])); + unsigned writemask = nir_intrinsic_write_mask(intr); + for (int i = 1; i < nr_cbufs; ++i) { + char name[256]; + snprintf(name, sizeof(name), "gl_FragData[%d]", i); + nir_variable *new_var = nir_variable_create(b->shader, + nir_var_shader_out, + var->type, name); + new_var->data.location = FRAG_RESULT_DATA0 + i; + new_var->data.driver_location = i; + nir_store_var(b, new_var, value, writemask); + } + + return true; +} + +bool +d3d12_lower_frag_result(struct nir_shader *nir, unsigned nr_cbufs) +{ + bool progress = false; + if (nir->info.stage != MESA_SHADER_FRAGMENT) + return false; + + nir_foreach_function(function, nir) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + progress |= lower_color_write(&b, instr, nr_cbufs); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + return progress; +} + +void +d3d12_add_missing_dual_src_target(struct nir_shader *s, + unsigned missing_mask) +{ + assert(missing_mask != 0); + nir_builder b; + nir_function_impl *impl = nir_shader_get_entrypoint(s); + nir_builder_init(&b, impl); + b.cursor = nir_before_cf_list(&impl->body); + + nir_ssa_def *zero = nir_imm_zero(&b, 4, 32); + for (unsigned i = 0; i < 2; ++i) { + + if (!(missing_mask & (1u << i))) + continue; + + const char *name = i == 0 ? "gl_FragData[0]" : + "gl_SecondaryFragDataEXT[0]"; + nir_variable *out = nir_variable_create(s, nir_var_shader_out, + glsl_vec4_type(), name); + out->data.location = FRAG_RESULT_DATA0; + out->data.driver_location = i; + out->data.index = i; + + nir_store_var(&b, out, zero, 0xf); + } + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +static bool +fix_io_uint_type(struct nir_shader *s, nir_variable_mode modes, int slot) +{ + nir_variable *fixed_var = NULL; + nir_foreach_variable_with_modes(var, s, modes) { + if (var->data.location == slot) { + var->type = glsl_uint_type(); + fixed_var = var; + break; + } + } + + assert(fixed_var); + + nir_foreach_function(function, s) { + if (function->impl) { + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_deref) { + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (deref->var == fixed_var) + deref->type = fixed_var->type; + } + } + } + } + } + return true; +} + +bool +d3d12_fix_io_uint_type(struct nir_shader *s, uint64_t in_mask, uint64_t out_mask) +{ + if (!(s->info.outputs_written & out_mask) && + !(s->info.inputs_read & in_mask)) + return false; + + bool progress = false; + + while (in_mask) { + int slot = u_bit_scan64(&in_mask); + progress |= (s->info.inputs_read & (1ull << slot)) && + fix_io_uint_type(s, nir_var_shader_in, slot); + } + + while (out_mask) { + int slot = u_bit_scan64(&out_mask); + progress |= (s->info.outputs_written & (1ull << slot)) && + fix_io_uint_type(s, nir_var_shader_out, slot); + } + + return progress; +} + +bool +lower_load_ubo_packed_filter(const nir_instr *instr, + UNUSED const void *_options) { + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + return intr->intrinsic == nir_intrinsic_load_ubo; +} + +static nir_ssa_def * +lower_load_ubo_packed_impl(nir_builder *b, nir_instr *instr, + UNUSED const void *_options) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + nir_ssa_def *buffer = intr->src[0].ssa; + nir_ssa_def *offset = intr->src[1].ssa; + + nir_ssa_def *result = + build_load_ubo_dxil(b, buffer, + offset, + nir_dest_num_components(intr->dest), + nir_dest_bit_size(intr->dest)); + return result; +} + +bool +nir_lower_packed_ubo_loads(nir_shader *nir) { + return nir_shader_lower_instructions(nir, + lower_load_ubo_packed_filter, + lower_load_ubo_packed_impl, + NULL); +} + +void +d3d12_lower_primitive_id(nir_shader *shader) +{ + nir_builder b; + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + nir_ssa_def *primitive_id; + nir_builder_init(&b, impl); + + nir_variable *primitive_id_var = nir_variable_create(shader, nir_var_shader_out, + glsl_uint_type(), "primitive_id"); + primitive_id_var->data.location = VARYING_SLOT_PRIMITIVE_ID; + primitive_id_var->data.interpolation = INTERP_MODE_FLAT; + + nir_foreach_block(block, impl) { + b.cursor = nir_before_block(block); + primitive_id = nir_load_primitive_id(&b); + + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic || + nir_instr_as_intrinsic(instr)->intrinsic != nir_intrinsic_emit_vertex) + continue; + + b.cursor = nir_before_instr(instr); + nir_store_var(&b, primitive_id_var, primitive_id, 0x1); + } + } + + nir_metadata_preserve(impl, 0); +} + +static void +lower_triangle_strip_store(nir_builder *b, nir_intrinsic_instr *intr, + nir_variable *vertex_count_var, + nir_variable **varyings) +{ + /** + * tmp_varying[slot][min(vertex_count, 2)] = src + */ + nir_ssa_def *vertex_count = nir_load_var(b, vertex_count_var); + nir_ssa_def *index = nir_imin(b, vertex_count, nir_imm_int(b, 2)); + nir_variable *var = nir_intrinsic_get_var(intr, 0); + + if (var->data.mode != nir_var_shader_out) + return; + + nir_deref_instr *deref = nir_build_deref_array(b, nir_build_deref_var(b, varyings[var->data.location]), index); + nir_ssa_def *value = nir_ssa_for_src(b, intr->src[1], intr->num_components); + nir_store_deref(b, deref, value, 0xf); + nir_instr_remove(&intr->instr); +} + +static void +nir_emit_vertex(nir_builder *b, unsigned stream_id) +{ + nir_intrinsic_instr *instr; + + instr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_emit_vertex); + nir_intrinsic_set_stream_id(instr, stream_id); + nir_builder_instr_insert(b, &instr->instr); +} + +static void +lower_triangle_strip_emit_vertex(nir_builder *b, nir_intrinsic_instr *intr, + nir_variable *vertex_count_var, + nir_variable **varyings, + nir_variable **out_varyings) +{ + // TODO xfb + flat shading + last_pv + /** + * if (vertex_count >= 2) { + * for (i = 0; i < 3; i++) { + * foreach(slot) + * out[slot] = tmp_varying[slot][i]; + * EmitVertex(); + * } + * EndPrimitive(); + * foreach(slot) + * tmp_varying[slot][vertex_count % 2] = tmp_varying[slot][2]; + * } + * vertex_count++; + */ + + nir_intrinsic_instr *instr; + nir_ssa_def *two = nir_imm_int(b, 2); + nir_ssa_def *vertex_count = nir_load_var(b, vertex_count_var); + nir_ssa_def *count_cmp = nir_uge(b, vertex_count, two); + nir_if *count_check = nir_push_if(b, count_cmp); + + for (int j = 0; j < 3; ++j) { + for (int i = 0; i < VARYING_SLOT_MAX; ++i) { + if (!varyings[i]) + continue; + nir_copy_deref(b, nir_build_deref_var(b, out_varyings[i]), + nir_build_deref_array_imm(b, nir_build_deref_var(b, varyings[i]), j)); + } + nir_emit_vertex(b, 0); + } + + for (int i = 0; i < VARYING_SLOT_MAX; ++i) { + if (!varyings[i]) + continue; + nir_copy_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, varyings[i]), nir_umod(b, vertex_count, two)), + nir_build_deref_array(b, nir_build_deref_var(b, varyings[i]), two)); + } + + instr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_primitive); + nir_intrinsic_set_stream_id(instr, 0); + nir_builder_instr_insert(b, &instr->instr); + + nir_pop_if(b, count_check); + + vertex_count = nir_iadd(b, vertex_count, nir_imm_int(b, 1)); + nir_store_var(b, vertex_count_var, vertex_count, 0x1); + + nir_instr_remove(&intr->instr); +} + +static void +lower_triangle_strip_end_primitive(nir_builder *b, nir_intrinsic_instr *intr, + nir_variable *vertex_count_var) +{ + /** + * vertex_count = 0; + */ + nir_store_var(b, vertex_count_var, nir_imm_int(b, 0), 0x1); + nir_instr_remove(&intr->instr); +} + +void +d3d12_lower_triangle_strip(nir_shader *shader) +{ + nir_builder b; + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + nir_variable *tmp_vars[VARYING_SLOT_MAX] = {0}; + nir_variable *out_vars[VARYING_SLOT_MAX] = {0}; + nir_builder_init(&b, impl); + + shader->info.gs.vertices_out = (shader->info.gs.vertices_out - 2) * 3; + + nir_variable *vertex_count_var = + nir_local_variable_create(impl, glsl_uint_type(), "vertex_count"); + + nir_block *first = nir_start_block(impl); + b.cursor = nir_before_block(first); + nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) { + const struct glsl_type *type = glsl_array_type(var->type, 3, 0); + tmp_vars[var->data.location] = nir_local_variable_create(impl, type, "tmp_var"); + out_vars[var->data.location] = var; + } + nir_store_var(&b, vertex_count_var, nir_imm_int(&b, 0), 1); + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_store_deref: + b.cursor = nir_before_instr(instr); + lower_triangle_strip_store(&b, intrin, vertex_count_var, tmp_vars); + break; + case nir_intrinsic_emit_vertex_with_counter: + case nir_intrinsic_emit_vertex: + b.cursor = nir_before_instr(instr); + lower_triangle_strip_emit_vertex(&b, intrin, vertex_count_var, + tmp_vars, out_vars); + break; + case nir_intrinsic_end_primitive: + case nir_intrinsic_end_primitive_with_counter: + b.cursor = nir_before_instr(instr); + lower_triangle_strip_end_primitive(&b, intrin, vertex_count_var); + break; + default: + break; + } + } + } + + nir_metadata_preserve(impl, 0); + NIR_PASS_V(shader, nir_lower_var_copies); +} diff --git a/src/gallium/drivers/d3d12/d3d12_nir_passes.h b/src/gallium/drivers/d3d12/d3d12_nir_passes.h new file mode 100644 index 00000000000..65af5a52f1b --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_nir_passes.h @@ -0,0 +1,98 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef D3D12_NIR_PASSES_H +#define D3D12_NIR_PASSES_H + +#include "nir.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct d3d12_shader; + +bool +d3d12_lower_point_sprite(nir_shader *shader, + bool sprite_origin_lower_left, + bool point_size_per_vertex, + unsigned point_coord_enable, + uint64_t next_inputs_read); + +bool +d3d12_lower_state_vars(struct nir_shader *s, struct d3d12_shader *shader); + +void +d3d12_lower_yflip(nir_shader *s); + +void +d3d12_forward_front_face(nir_shader *nir); + +void +d3d12_lower_depth_range(nir_shader *nir); + +bool +d3d12_lower_load_first_vertex(nir_shader *nir); + +void +d3d12_create_bare_samplers(nir_shader *s); + +bool +d3d12_lower_bool_input(struct nir_shader *s); + +void +d3d12_lower_uint_cast(nir_shader *nir, bool is_signed); + +bool +d3d12_lower_frag_result(struct nir_shader *s, unsigned nr_cbufs); + +void +d3d12_add_missing_dual_src_target(struct nir_shader *s, + unsigned missing_mask); + +bool +d3d12_fix_io_uint_type(struct nir_shader *s, uint64_t in_mask, uint64_t out_mask); + +void +d3d12_nir_invert_depth(nir_shader *s); + +bool +d3d12_lower_int_cubmap_to_array(nir_shader *s); + +bool +nir_lower_packed_ubo_loads(struct nir_shader *nir); + +bool +d3d12_nir_lower_vs_vertex_conversion(nir_shader *s, enum pipe_format target_formats[]); + +void +d3d12_lower_primitive_id(nir_shader *shader); + +void +d3d12_lower_triangle_strip(nir_shader *shader); + +#ifdef __cplusplus +} +#endif + +#endif // D3D12_NIR_PASSES_H diff --git a/src/gallium/drivers/d3d12/d3d12_pipeline_state.cpp b/src/gallium/drivers/d3d12/d3d12_pipeline_state.cpp new file mode 100644 index 00000000000..0498c0bf75b --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_pipeline_state.cpp @@ -0,0 +1,381 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_pipeline_state.h" +#include "d3d12_compiler.h" +#include "d3d12_context.h" +#include "d3d12_screen.h" + +#include "util/hash_table.h" +#include "util/set.h" +#include "util/u_memory.h" +#include "util/u_prim.h" + +struct d3d12_pso_entry { + struct d3d12_gfx_pipeline_state key; + ID3D12PipelineState *pso; +}; + +static const char *slot_to_varying[] = { + "VARYINGAA", "VARYINGAB", "VARYINGAC", "VARYINGAD", "VARYINGAE", "VARYINGAF", "VARYINGAG", "VARYINGAH", + "VARYINGAI", "VARYINGAJ", "VARYINGAK", "VARYINGAL", "VARYINGAM", "VARYINGAN", "VARYINGAO", "VARYINGAP", + "VARYINGBA", "VARYINGBB", "VARYINGBC", "VARYINGBD", "VARYINGBE", "VARYINGBF", "VARYINGBG", "VARYINGBH", + "VARYINGBI", "VARYINGBJ", "VARYINGBK", "VARYINGBL", "VARYINGBM", "VARYINGBN", "VARYINGBO", "VARYINGBP", + "VARYINGCA", "VARYINGCB", "VARYINGCC", "VARYINGCD", "VARYINGCE", "VARYINGCF", "VARYINGCG", "VARYINGCH", + "VARYINGCI", "VARYINGCJ", "VARYINGCK", "VARYINGCL", "VARYINGCM", "VARYINGCN", "VARYINGCO", "VARYINGCP", + "VARYINGDA", "VARYINGDB", "VARYINGDC", "VARYINGDD", "VARYINGDE", "VARYINGDF", "VARYINGDG", "VARYINGDH", + "VARYINGDI", "VARYINGDJ", "VARYINGDK", "VARYINGDL", "VARYINGDM", "VARYINGDN", "VARYINGDO", "VARYINGDP", +}; + +static const char * +get_semantic_name(int slot, unsigned *index) +{ + *index = 0; /* Default index */ + + switch (slot) { + + case VARYING_SLOT_POS: + return "SV_Position"; + + case VARYING_SLOT_FACE: + return "SV_IsFrontFace"; + + case VARYING_SLOT_CLIP_DIST1: + *index = 1; + /* fallthrough */ + case VARYING_SLOT_CLIP_DIST0: + return "SV_ClipDistance"; + + case VARYING_SLOT_PRIMITIVE_ID: + return "SV_PrimitiveID"; + + default: { + int index = slot - VARYING_SLOT_POS; + return slot_to_varying[index]; + } + } +} + +static void +fill_so_declaration(const struct pipe_stream_output_info *info, + D3D12_SO_DECLARATION_ENTRY *entries, UINT *num_entries, + UINT *strides, UINT *num_strides) +{ + int next_offset[MAX_VERTEX_STREAMS] = { 0 }; + + *num_entries = 0; + + for (unsigned i = 0; i < info->num_outputs; i++) { + const struct pipe_stream_output *output = &info->output[i]; + const int buffer = output->output_buffer; + const int varying = output->register_index; + unsigned index; + + /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[] + * array. Instead, it simply increments DstOffset for the following + * input by the number of components that should be skipped. + * + * DirectX12 requires that we create gap entries. + */ + int skip_components = output->dst_offset - next_offset[buffer]; + + if (skip_components > 0) { + entries[*num_entries].Stream = output->stream; + entries[*num_entries].SemanticName = NULL; + entries[*num_entries].ComponentCount = skip_components; + entries[*num_entries].OutputSlot = buffer; + (*num_entries)++; + } + + next_offset[buffer] = output->dst_offset + output->num_components; + + entries[*num_entries].Stream = output->stream; + entries[*num_entries].SemanticName = get_semantic_name(output->register_index, &index); + entries[*num_entries].SemanticIndex = index; + entries[*num_entries].StartComponent = output->start_component; + entries[*num_entries].ComponentCount = output->num_components; + entries[*num_entries].OutputSlot = buffer; + (*num_entries)++; + } + + for (unsigned i = 0; i < MAX_VERTEX_STREAMS; i++) + strides[i] = info->stride[i] * 4; + *num_strides = MAX_VERTEX_STREAMS; +} + +static bool +depth_bias(struct d3d12_rasterizer_state *state, enum pipe_prim_type reduced_prim) +{ + /* glPolygonOffset is supposed to be only enabled when rendering polygons. + * In d3d12 case, all polygons (and quads) are lowered to triangles */ + if (reduced_prim != PIPE_PRIM_TRIANGLES) + return false; + + unsigned fill_mode = state->base.cull_face == PIPE_FACE_FRONT ? state->base.fill_back + : state->base.fill_front; + + switch (fill_mode) { + case PIPE_POLYGON_MODE_FILL: + return state->base.offset_tri; + + case PIPE_POLYGON_MODE_LINE: + return state->base.offset_line; + + case PIPE_POLYGON_MODE_POINT: + return state->base.offset_point; + + default: + unreachable("unexpected fill mode"); + } +} + +static D3D12_PRIMITIVE_TOPOLOGY_TYPE +topology_type(enum pipe_prim_type reduced_prim) +{ + switch (reduced_prim) { + case PIPE_PRIM_POINTS: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + + case PIPE_PRIM_LINES: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + + case PIPE_PRIM_TRIANGLES: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + + case PIPE_PRIM_PATCHES: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH; + + default: + debug_printf("pipe_prim_type: %s\n", u_prim_name(reduced_prim)); + unreachable("unexpected enum pipe_prim_type"); + } +} + +DXGI_FORMAT +d3d12_rtv_format(struct d3d12_context *ctx, unsigned index) +{ + DXGI_FORMAT fmt = ctx->gfx_pipeline_state.rtv_formats[index]; + + if (ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable && + !ctx->gfx_pipeline_state.has_float_rtv) { + switch (fmt) { + case DXGI_FORMAT_R8G8B8A8_SNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + return DXGI_FORMAT_R8G8B8A8_UINT; + default: + unreachable("unsupported logic-op format"); + } + } + + return fmt; +} + +static ID3D12PipelineState * +create_gfx_pipeline_state(struct d3d12_context *ctx) +{ + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + struct d3d12_gfx_pipeline_state *state = &ctx->gfx_pipeline_state; + enum pipe_prim_type reduced_prim = u_reduced_prim(state->prim_type); + D3D12_SO_DECLARATION_ENTRY entries[PIPE_MAX_SO_OUTPUTS] = { 0 }; + UINT strides[PIPE_MAX_SO_OUTPUTS] = { 0 }; + UINT num_entries = 0, num_strides = 0; + + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { 0 }; + pso_desc.pRootSignature = state->root_signature; + + bool last_vertex_stage_writes_pos = false; + + if (state->stages[PIPE_SHADER_VERTEX]) { + auto shader = state->stages[PIPE_SHADER_VERTEX]; + pso_desc.VS.BytecodeLength = shader->bytecode_length; + pso_desc.VS.pShaderBytecode = shader->bytecode; + last_vertex_stage_writes_pos = (shader->nir->info.outputs_written & VARYING_BIT_POS) != 0; + } + + if (state->stages[PIPE_SHADER_GEOMETRY]) { + auto shader = state->stages[PIPE_SHADER_GEOMETRY]; + pso_desc.GS.BytecodeLength = shader->bytecode_length; + pso_desc.GS.pShaderBytecode = shader->bytecode; + last_vertex_stage_writes_pos = (shader->nir->info.outputs_written & VARYING_BIT_POS) != 0; + } + + if (last_vertex_stage_writes_pos && state->stages[PIPE_SHADER_FRAGMENT] && + !state->rast->base.rasterizer_discard) { + auto shader = state->stages[PIPE_SHADER_FRAGMENT]; + pso_desc.PS.BytecodeLength = shader->bytecode_length; + pso_desc.PS.pShaderBytecode = shader->bytecode; + } + + if (state->num_so_targets) + fill_so_declaration(&state->so_info, entries, &num_entries, + strides, &num_strides); + pso_desc.StreamOutput.NumEntries = num_entries; + pso_desc.StreamOutput.pSODeclaration = entries; + pso_desc.StreamOutput.RasterizedStream = state->rast->base.rasterizer_discard ? D3D12_SO_NO_RASTERIZED_STREAM : 0; + pso_desc.StreamOutput.NumStrides = num_strides; + pso_desc.StreamOutput.pBufferStrides = strides; + + pso_desc.BlendState = state->blend->desc; + if (state->has_float_rtv) + pso_desc.BlendState.RenderTarget[0].LogicOpEnable = FALSE; + + pso_desc.DepthStencilState = state->zsa->desc; + pso_desc.SampleMask = state->sample_mask; + pso_desc.RasterizerState = state->rast->desc; + + if (reduced_prim != PIPE_PRIM_TRIANGLES) + pso_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + + if (depth_bias(state->rast, reduced_prim)) { + pso_desc.RasterizerState.DepthBias = state->rast->base.offset_units * 2; + pso_desc.RasterizerState.DepthBiasClamp = state->rast->base.offset_clamp; + pso_desc.RasterizerState.SlopeScaledDepthBias = state->rast->base.offset_scale; + } + + pso_desc.InputLayout.pInputElementDescs = state->ves->elements; + pso_desc.InputLayout.NumElements = state->ves->num_elements; + + pso_desc.IBStripCutValue = state->ib_strip_cut_value; + + pso_desc.PrimitiveTopologyType = topology_type(reduced_prim); + + pso_desc.NumRenderTargets = state->num_cbufs; + for (int i = 0; i < state->num_cbufs; ++i) + pso_desc.RTVFormats[i] = d3d12_rtv_format(ctx, i); + pso_desc.DSVFormat = state->dsv_format; + + pso_desc.SampleDesc.Count = state->samples; + pso_desc.SampleDesc.Quality = 0; + + pso_desc.NodeMask = 0; + + pso_desc.CachedPSO.pCachedBlob = NULL; + pso_desc.CachedPSO.CachedBlobSizeInBytes = 0; + + pso_desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + + ID3D12PipelineState *ret; + if (FAILED(screen->dev->CreateGraphicsPipelineState(&pso_desc, + __uuidof(ret), + (void **)&ret))) { + debug_printf("D3D12: CreateGraphicsPipelineState failed!\n"); + return NULL; + } + + return ret; +} + +static uint32_t +hash_gfx_pipeline_state(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct d3d12_gfx_pipeline_state)); +} + +static bool +equals_gfx_pipeline_state(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct d3d12_gfx_pipeline_state)) == 0; +} + +ID3D12PipelineState * +d3d12_get_gfx_pipeline_state(struct d3d12_context *ctx) +{ + uint32_t hash = hash_gfx_pipeline_state(&ctx->gfx_pipeline_state); + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ctx->pso_cache, hash, + &ctx->gfx_pipeline_state); + if (!entry) { + struct d3d12_pso_entry *data = (struct d3d12_pso_entry *)MALLOC(sizeof(struct d3d12_pso_entry)); + if (!data) + return NULL; + + data->key = ctx->gfx_pipeline_state; + data->pso = create_gfx_pipeline_state(ctx); + if (!data->pso) + return NULL; + + entry = _mesa_hash_table_insert_pre_hashed(ctx->pso_cache, hash, &data->key, data); + assert(entry); + } + + return ((struct d3d12_pso_entry *)(entry->data))->pso; +} + +void +d3d12_gfx_pipeline_state_cache_init(struct d3d12_context *ctx) +{ + ctx->pso_cache = _mesa_hash_table_create(NULL, NULL, equals_gfx_pipeline_state); +} + +static void +delete_entry(struct hash_entry *entry) +{ + struct d3d12_pso_entry *data = (struct d3d12_pso_entry *)entry->data; + data->pso->Release(); + FREE(data); +} + +static void +remove_entry(struct d3d12_context *ctx, struct hash_entry *entry) +{ + struct d3d12_pso_entry *data = (struct d3d12_pso_entry *)entry->data; + + if (ctx->current_pso == data->pso) + ctx->current_pso = NULL; + _mesa_hash_table_remove(ctx->pso_cache, entry); + delete_entry(entry); +} + +void +d3d12_gfx_pipeline_state_cache_destroy(struct d3d12_context *ctx) +{ + _mesa_hash_table_destroy(ctx->pso_cache, delete_entry); +} + +void +d3d12_gfx_pipeline_state_cache_invalidate(struct d3d12_context *ctx, const void *state) +{ + hash_table_foreach(ctx->pso_cache, entry) { + const struct d3d12_gfx_pipeline_state *key = (struct d3d12_gfx_pipeline_state *)entry->key; + if (key->blend == state || key->zsa == state || key->rast == state) + remove_entry(ctx, entry); + } +} + +void +d3d12_gfx_pipeline_state_cache_invalidate_shader(struct d3d12_context *ctx, + enum pipe_shader_type stage, + struct d3d12_shader_selector *selector) +{ + struct d3d12_shader *shader = selector->first; + + while (shader) { + hash_table_foreach(ctx->pso_cache, entry) { + const struct d3d12_gfx_pipeline_state *key = (struct d3d12_gfx_pipeline_state *)entry->key; + if (key->stages[stage] == shader) + remove_entry(ctx, entry); + } + shader = shader->next_variant; + } +} diff --git a/src/gallium/drivers/d3d12/d3d12_pipeline_state.h b/src/gallium/drivers/d3d12/d3d12_pipeline_state.h new file mode 100644 index 00000000000..015b1a1176f --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_pipeline_state.h @@ -0,0 +1,100 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_PIPELINE_STATE_H +#define D3D12_PIPELINE_STATE_H + +#include "pipe/p_state.h" + +#define D3D12_IGNORE_SDK_LAYERS +#include + +struct d3d12_context; +struct d3d12_root_signature; + +struct d3d12_vertex_elements_state { + D3D12_INPUT_ELEMENT_DESC elements[PIPE_MAX_ATTRIBS]; + enum pipe_format format_conversion[PIPE_MAX_ATTRIBS]; + unsigned num_elements:6; // <= PIPE_MAX_ATTRIBS + unsigned needs_format_emulation:1; + unsigned unused:25; +}; + +struct d3d12_rasterizer_state { + struct pipe_rasterizer_state base; + D3D12_RASTERIZER_DESC desc; + void *twoface_back; +}; + +struct d3d12_blend_state { + D3D12_BLEND_DESC desc; + unsigned blend_factor_flags; + bool is_dual_src; +}; + +struct d3d12_depth_stencil_alpha_state { + D3D12_DEPTH_STENCIL_DESC desc; +}; + +struct d3d12_gfx_pipeline_state { + ID3D12RootSignature *root_signature; + struct d3d12_shader *stages[PIPE_SHADER_TYPES - 1]; + struct pipe_stream_output_info so_info; + + struct d3d12_vertex_elements_state *ves; + struct d3d12_blend_state *blend; + struct d3d12_depth_stencil_alpha_state *zsa; + struct d3d12_rasterizer_state *rast; + + unsigned samples; + unsigned sample_mask; + unsigned num_cbufs; + unsigned num_so_targets; + bool has_float_rtv; + DXGI_FORMAT rtv_formats[8]; + DXGI_FORMAT dsv_format; + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE ib_strip_cut_value; + enum pipe_prim_type prim_type; +}; + +DXGI_FORMAT +d3d12_rtv_format(struct d3d12_context *ctx, unsigned index); + +void +d3d12_gfx_pipeline_state_cache_init(struct d3d12_context *ctx); + +void +d3d12_gfx_pipeline_state_cache_destroy(struct d3d12_context *ctx); + +ID3D12PipelineState * +d3d12_get_gfx_pipeline_state(struct d3d12_context *ctx); + +void +d3d12_gfx_pipeline_state_cache_invalidate(struct d3d12_context *ctx, const void *state); + +void +d3d12_gfx_pipeline_state_cache_invalidate_shader(struct d3d12_context *ctx, + enum pipe_shader_type stage, + struct d3d12_shader_selector *selector); + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_public.h b/src/gallium/drivers/d3d12/d3d12_public.h new file mode 100644 index 00000000000..bd485b3a6c4 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_public.h @@ -0,0 +1,41 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_PUBLIC_H +#define D3D12_PUBLIC_H + +struct pipe_screen; +struct sw_winsys; + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_screen * +d3d12_create_screen(struct sw_winsys *winsys, LUID *adapter_luid); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_query.cpp b/src/gallium/drivers/d3d12/d3d12_query.cpp new file mode 100644 index 00000000000..c6da498ada7 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_query.cpp @@ -0,0 +1,524 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_query.h" +#include "d3d12_context.h" +#include "d3d12_resource.h" +#include "d3d12_screen.h" + +#include "util/u_dump.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" + +struct d3d12_query { + enum pipe_query_type type; + + ID3D12QueryHeap *query_heap; + unsigned curr_query, num_queries; + size_t query_size; + struct d3d12_query *subquery; + + D3D12_QUERY_TYPE d3d12qtype; + + pipe_resource *buffer; + unsigned buffer_offset; + uint64_t fence_value; + + struct list_head active_list; + struct d3d12_resource *predicate; +}; + +static D3D12_QUERY_HEAP_TYPE +d3d12_query_heap_type(unsigned query_type) +{ + switch (query_type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + return D3D12_QUERY_HEAP_TYPE_OCCLUSION; + case PIPE_QUERY_PIPELINE_STATISTICS: + return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_SO_STATISTICS: + return D3D12_QUERY_HEAP_TYPE_SO_STATISTICS; + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIME_ELAPSED: + return D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + + default: + debug_printf("unknown query: %s\n", + util_str_query_type(query_type, true)); + unreachable("d3d12: unknown query type"); + } +} + +static D3D12_QUERY_TYPE +d3d12_query_type(unsigned query_type) +{ + switch (query_type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + return D3D12_QUERY_TYPE_OCCLUSION; + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + return D3D12_QUERY_TYPE_BINARY_OCCLUSION; + case PIPE_QUERY_PIPELINE_STATISTICS: + return D3D12_QUERY_TYPE_PIPELINE_STATISTICS; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_SO_STATISTICS: + return D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0; + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIME_ELAPSED: + return D3D12_QUERY_TYPE_TIMESTAMP; + default: + debug_printf("unknown query: %s\n", + util_str_query_type(query_type, true)); + unreachable("d3d12: unknown query type"); + } +} + +static struct pipe_query * +d3d12_create_query(struct pipe_context *pctx, + unsigned query_type, unsigned index) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_screen *screen = d3d12_screen(pctx->screen); + struct d3d12_query *query = CALLOC_STRUCT(d3d12_query); + D3D12_QUERY_HEAP_DESC desc = {}; + D3D12_RESOURCE_DESC res_desc = {}; + + if (!query) + return NULL; + + query->type = (pipe_query_type)query_type; + query->d3d12qtype = d3d12_query_type(query_type); + query->num_queries = 16; + + /* With timer queries we want a few more queries, especially since we need two slots + * per query for TIME_ELAPSED queries */ + if (unlikely(query->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP)) + query->num_queries = 64; + + query->curr_query = 0; + + switch (query->d3d12qtype) { + case D3D12_QUERY_TYPE_PIPELINE_STATISTICS: + query->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS); + break; + case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0: + query->query_size = sizeof(D3D12_QUERY_DATA_SO_STATISTICS); + break; + default: + query->query_size = sizeof(uint64_t); + break; + } + + desc.Count = query->num_queries; + desc.Type = d3d12_query_heap_type(query_type); + if (FAILED(screen->dev->CreateQueryHeap(&desc, + __uuidof(query->query_heap), + (void **)&query->query_heap))) { + FREE(query); + return NULL; + } + + /* Query result goes into a readback buffer */ + size_t buffer_size = query->query_size * query->num_queries; + u_suballocator_alloc(ctx->query_allocator, buffer_size, 256, + &query->buffer_offset, &query->buffer); + + return (struct pipe_query *)query; +} + +static void +d3d12_destroy_query(struct pipe_context *pctx, + struct pipe_query *q) +{ + struct d3d12_query *query = (struct d3d12_query *)q; + pipe_resource *predicate = &query->predicate->base; + if (query->subquery) + d3d12_destroy_query(pctx, (struct pipe_query *)query->subquery); + pipe_resource_reference(&predicate, NULL); + query->query_heap->Release(); + FREE(query); +} + +static bool +accumulate_result(struct d3d12_context *ctx, struct d3d12_query *q, + union pipe_query_result *result, bool write) +{ + struct pipe_transfer *transfer = NULL; + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + unsigned access = PIPE_MAP_READ; + void *results; + + if (write) + access |= PIPE_MAP_WRITE; + results = pipe_buffer_map_range(&ctx->base, q->buffer, q->buffer_offset, + q->num_queries * q->query_size, + access, &transfer); + + if (results == NULL) + return false; + + uint64_t *results_u64 = (uint64_t *)results; + D3D12_QUERY_DATA_PIPELINE_STATISTICS *results_stats = (D3D12_QUERY_DATA_PIPELINE_STATISTICS *)results; + D3D12_QUERY_DATA_SO_STATISTICS *results_so = (D3D12_QUERY_DATA_SO_STATISTICS *)results; + + util_query_clear_result(result, q->type); + for (int i = 0; i < q->curr_query; ++i) { + switch (q->type) { + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + result->b |= results_u64[i] != 0; + break; + + case PIPE_QUERY_OCCLUSION_COUNTER: + result->u64 += results_u64[i]; + break; + case PIPE_QUERY_TIMESTAMP: + result->u64 = results_u64[i]; + break; + + case PIPE_QUERY_PIPELINE_STATISTICS: + result->pipeline_statistics.ia_vertices += results_stats[i].IAVertices; + result->pipeline_statistics.ia_primitives += results_stats[i].IAPrimitives; + result->pipeline_statistics.vs_invocations += results_stats[i].VSInvocations; + result->pipeline_statistics.gs_invocations += results_stats[i].GSInvocations; + result->pipeline_statistics.gs_primitives += results_stats[i].GSPrimitives; + result->pipeline_statistics.c_invocations += results_stats[i].CInvocations; + result->pipeline_statistics.c_primitives += results_stats[i].CPrimitives; + result->pipeline_statistics.ps_invocations += results_stats[i].PSInvocations; + result->pipeline_statistics.hs_invocations += results_stats[i].HSInvocations; + result->pipeline_statistics.ds_invocations += results_stats[i].DSInvocations; + result->pipeline_statistics.cs_invocations += results_stats[i].CSInvocations; + break; + + case PIPE_QUERY_PRIMITIVES_GENERATED: + result->u64 += results_so[i].PrimitivesStorageNeeded; + break; + + case PIPE_QUERY_PRIMITIVES_EMITTED: + result->u64 += results_so[i].NumPrimitivesWritten; + break; + + case PIPE_QUERY_TIME_ELAPSED: + result->u64 += results_u64[2 * i + 1] - results_u64[2 * i]; + break; + + case PIPE_QUERY_SO_STATISTICS: + result->so_statistics.num_primitives_written += results_so[i].NumPrimitivesWritten; + result->so_statistics.primitives_storage_needed += results_so[i].PrimitivesStorageNeeded; + break; + + default: + debug_printf("unsupported query type: %s\n", + util_str_query_type(q->type, true)); + unreachable("unexpected query type"); + } + } + + if (q->subquery) { + union pipe_query_result subresult; + + accumulate_result(ctx, q->subquery, &subresult, false); + q->subquery->curr_query = 0; + if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED) + result->u64 += subresult.pipeline_statistics.ia_primitives; + } + + if (write) { + if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) { + results_stats[0].IAVertices = result->pipeline_statistics.ia_vertices; + results_stats[0].IAPrimitives = result->pipeline_statistics.ia_primitives; + results_stats[0].VSInvocations = result->pipeline_statistics.vs_invocations; + results_stats[0].GSInvocations = result->pipeline_statistics.gs_invocations; + results_stats[0].GSPrimitives = result->pipeline_statistics.gs_primitives; + results_stats[0].CInvocations = result->pipeline_statistics.c_invocations; + results_stats[0].CPrimitives = result->pipeline_statistics.c_primitives; + results_stats[0].PSInvocations = result->pipeline_statistics.ps_invocations; + results_stats[0].HSInvocations = result->pipeline_statistics.hs_invocations; + results_stats[0].DSInvocations = result->pipeline_statistics.ds_invocations; + results_stats[0].CSInvocations = result->pipeline_statistics.cs_invocations; + } else if (q->type == PIPE_QUERY_SO_STATISTICS) { + results_so[0].NumPrimitivesWritten = result->so_statistics.num_primitives_written; + results_so[0].PrimitivesStorageNeeded = result->so_statistics.primitives_storage_needed; + } else { + if (unlikely(q->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP)) { + results_u64[0] = 0; + results_u64[1] = result->u64; + } else { + results_u64[0] = result->u64; + } + } + } + + pipe_buffer_unmap(&ctx->base, transfer); + + if (q->type == PIPE_QUERY_TIME_ELAPSED || + q->type == PIPE_QUERY_TIMESTAMP) + result->u64 = static_cast(screen->timestamp_multiplier * result->u64); + + return true; +} + +static void +begin_query(struct d3d12_context *ctx, struct d3d12_query *q, bool restart) +{ + if (restart) { + q->curr_query = 0; + } else if (q->curr_query == q->num_queries) { + union pipe_query_result result; + + /* Accumulate current results and store in first slot */ + d3d12_flush_cmdlist_and_wait(ctx); + accumulate_result(ctx, q, &result, true); + q->curr_query = 1; + } + + if (q->subquery) + begin_query(ctx, q->subquery, restart); + + ctx->cmdlist->BeginQuery(q->query_heap, q->d3d12qtype, q->curr_query); +} + + +static void +begin_timer_query(struct d3d12_context *ctx, struct d3d12_query *q, bool restart) +{ + /* For PIPE_QUERY_TIME_ELAPSED we record one time with BeginQuery and one in + * EndQuery, so we need two query slots */ + unsigned query_index = 2 * q->curr_query; + + if (restart) { + q->curr_query = 0; + query_index = 0; + } else if (query_index == q->num_queries) { + union pipe_query_result result; + + /* Accumulate current results and store in first slot */ + d3d12_flush_cmdlist_and_wait(ctx); + accumulate_result(ctx, q, &result, true); + q->curr_query = 2; + } + + ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, query_index); +} + +static bool +d3d12_begin_query(struct pipe_context *pctx, + struct pipe_query *q) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_query *query = (struct d3d12_query *)q; + + assert(query->type != PIPE_QUERY_TIMESTAMP); + + if (unlikely(query->type == PIPE_QUERY_TIME_ELAPSED)) + begin_timer_query(ctx, query, true); + else { + begin_query(ctx, query, true); + list_addtail(&query->active_list, &ctx->active_queries); + } + + return true; +} + +static void +end_query(struct d3d12_context *ctx, struct d3d12_query *q) +{ + uint64_t offset = 0; + struct d3d12_batch *batch = d3d12_current_batch(ctx); + struct d3d12_resource *res = (struct d3d12_resource *)q->buffer; + ID3D12Resource *d3d12_res = d3d12_resource_underlying(res, &offset); + + /* End subquery first so that we can use fence value from parent */ + if (q->subquery) + end_query(ctx, q->subquery); + + /* With QUERY_TIME_ELAPSED we have recorded one value at + * (2 * q->curr_query), and now we record a value at (2 * q->curr_query + 1) + * and when resolving the query we subtract the latter from the former */ + + unsigned resolve_count = q->type == PIPE_QUERY_TIME_ELAPSED ? 2 : 1; + unsigned resolve_index = resolve_count * q->curr_query; + unsigned end_index = resolve_index + resolve_count - 1; + + offset += q->buffer_offset + resolve_index * q->query_size; + ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, end_index); + d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_DEST); + d3d12_apply_resource_states(ctx); + ctx->cmdlist->ResolveQueryData(q->query_heap, q->d3d12qtype, resolve_index, + resolve_count, d3d12_res, offset); + + d3d12_batch_reference_object(batch, q->query_heap); + d3d12_batch_reference_resource(batch, res); + + assert(q->curr_query < q->num_queries); + q->curr_query++; +} + +static bool +d3d12_end_query(struct pipe_context *pctx, + struct pipe_query *q) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_query *query = (struct d3d12_query *)q; + + end_query(ctx, query); + + if (query->type != PIPE_QUERY_TIMESTAMP && + query->type != PIPE_QUERY_TIME_ELAPSED) + list_delinit(&query->active_list); + + query->fence_value = ctx->fence_value; + return true; +} + +static bool +d3d12_get_query_result(struct pipe_context *pctx, + struct pipe_query *q, + bool wait, + union pipe_query_result *result) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_query *query = (struct d3d12_query *)q; + + if (ctx->cmdqueue_fence->GetCompletedValue() < query->fence_value) { + if (!wait) + return false; + d3d12_flush_cmdlist_and_wait(ctx); + } + + return accumulate_result(ctx, query, result, false); +} + +void +d3d12_suspend_queries(struct d3d12_context *ctx) +{ + list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) { + end_query(ctx, query); + } +} + +void +d3d12_resume_queries(struct d3d12_context *ctx) +{ + list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) { + begin_query(ctx, query, false); + } +} + +void +d3d12_validate_queries(struct d3d12_context *ctx) +{ + bool have_xfb = !!ctx->gfx_pipeline_state.num_so_targets; + + list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) { + if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED && !have_xfb && !query->subquery) { + struct pipe_query *subquery = d3d12_create_query(&ctx->base, PIPE_QUERY_PIPELINE_STATISTICS, 0); + query->subquery = (struct d3d12_query *)subquery; + if (!ctx->queries_disabled) + begin_query(ctx, query->subquery, true); + } + } +} + +static void +d3d12_set_active_query_state(struct pipe_context *pctx, bool enable) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + ctx->queries_disabled = !enable; + + if (enable) + d3d12_resume_queries(ctx); + else + d3d12_suspend_queries(ctx); +} + +static void +d3d12_render_condition(struct pipe_context *pctx, + struct pipe_query *pquery, + bool condition, + enum pipe_render_cond_flag mode) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_query *query = (struct d3d12_query *)pquery; + + if (query == nullptr) { + ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO); + ctx->current_predication = nullptr; + return; + } + + if (!query->predicate) + query->predicate = d3d12_resource(pipe_buffer_create(pctx->screen, 0, + PIPE_USAGE_DEFAULT, sizeof(uint64_t))); + + if (mode == PIPE_RENDER_COND_WAIT) { + d3d12_flush_cmdlist_and_wait(ctx); + union pipe_query_result result; + accumulate_result(ctx, (d3d12_query *)pquery, &result, true); + } + + struct d3d12_resource *res = (struct d3d12_resource *)query->buffer; + d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_SOURCE); + d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_COPY_DEST); + d3d12_apply_resource_states(ctx); + ctx->cmdlist->CopyBufferRegion(d3d12_resource_resource(query->predicate), 0, + d3d12_resource_resource(res), 0, + sizeof(uint64_t)); + + d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_PREDICATION); + d3d12_apply_resource_states(ctx); + + ctx->current_predication = query->predicate; + /* documentation of ID3D12GraphicsCommandList::SetPredication method: + * "resource manipulation commands are _not_ actually performed + * if the resulting predicate data of the predicate is equal to + * the operation specified." + */ + ctx->cmdlist->SetPredication(d3d12_resource_resource(query->predicate), 0, + condition ? D3D12_PREDICATION_OP_NOT_EQUAL_ZERO : + D3D12_PREDICATION_OP_EQUAL_ZERO); +} + +void +d3d12_context_query_init(struct pipe_context *pctx) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + list_inithead(&ctx->active_queries); + + ctx->query_allocator = + u_suballocator_create(&ctx->base, 4096, 0, PIPE_USAGE_STAGING, + 0, true); + + pctx->create_query = d3d12_create_query; + pctx->destroy_query = d3d12_destroy_query; + pctx->begin_query = d3d12_begin_query; + pctx->end_query = d3d12_end_query; + pctx->get_query_result = d3d12_get_query_result; + pctx->set_active_query_state = d3d12_set_active_query_state; + pctx->render_condition = d3d12_render_condition; +} diff --git a/src/gallium/drivers/d3d12/d3d12_query.h b/src/gallium/drivers/d3d12/d3d12_query.h new file mode 100644 index 00000000000..61ce2250a4f --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_query.h @@ -0,0 +1,38 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_QUERY_H +#define D3D12_QUERY_H + +struct d3d12_context; + +void +d3d12_suspend_queries(struct d3d12_context *ctx); + +void +d3d12_resume_queries(struct d3d12_context *ctx); + +void +d3d12_validate_queries(struct d3d12_context *ctx); + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_resource.cpp b/src/gallium/drivers/d3d12/d3d12_resource.cpp new file mode 100644 index 00000000000..52adedccb96 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_resource.cpp @@ -0,0 +1,999 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_resource.h" + +#include "d3d12_blit.h" +#include "d3d12_context.h" +#include "d3d12_format.h" +#include "d3d12_screen.h" +#include "d3d12_debug.h" + +#include "pipebuffer/pb_bufmgr.h" +#include "util/slab.h" +#include "util/format/u_format.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/format/u_format_zs.h" + +#include "frontend/sw_winsys.h" + +#include +#include + +static bool +can_map_directly(struct pipe_resource *pres) +{ + return pres->bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_LINEAR) || + pres->target == PIPE_BUFFER; +} + +static void +init_valid_range(struct d3d12_resource *res) +{ + if (can_map_directly(&res->base)) + util_range_init(&res->valid_buffer_range); +} + +static void +d3d12_resource_destroy(struct pipe_screen *pscreen, + struct pipe_resource *presource) +{ + struct d3d12_resource *resource = d3d12_resource(presource); + if (can_map_directly(presource)) + util_range_destroy(&resource->valid_buffer_range); + if (resource->bo) + d3d12_bo_unreference(resource->bo); + FREE(resource); +} + +static bool +resource_is_busy(struct d3d12_context *ctx, + struct d3d12_resource *res) +{ + bool busy = false; + + for (int i = 0; i < ARRAY_SIZE(ctx->batches); i++) + busy |= d3d12_batch_has_references(&ctx->batches[i], res->bo); + + return busy; +} + +void +d3d12_resource_wait_idle(struct d3d12_context *ctx, + struct d3d12_resource *res) +{ + if (d3d12_batch_has_references(d3d12_current_batch(ctx), res->bo)) { + d3d12_flush_cmdlist_and_wait(ctx); + } else { + d3d12_foreach_submitted_batch(ctx, batch) { + d3d12_reset_batch(ctx, batch, PIPE_TIMEOUT_INFINITE); + if (!resource_is_busy(ctx, res)) + break; + } + } +} + +void +d3d12_resource_release(struct d3d12_resource *resource) +{ + if (!resource->bo) + return; + d3d12_bo_unreference(resource->bo); + resource->bo = NULL; +} + +static bool +init_buffer(struct d3d12_screen *screen, + struct d3d12_resource *res, + const struct pipe_resource *templ) +{ + struct pb_desc buf_desc; + struct pb_manager *bufmgr; + struct pb_buffer *buf; + + /* Assert that we don't want to create a buffer with one of the emulated + * formats, these are (currently) only supported when passing the vertex + * element state */ + assert(templ->format == d3d12_emulated_vtx_format(templ->format)); + + /* Don't use slab buffer manager for GPU writable buffers */ + bufmgr = templ->bind & PIPE_BIND_STREAM_OUTPUT ? screen->cache_bufmgr + : screen->slab_bufmgr; + buf_desc.alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + buf_desc.usage = (pb_usage_flags)PB_USAGE_ALL; + res->dxgi_format = DXGI_FORMAT_UNKNOWN; + buf = bufmgr->create_buffer(bufmgr, templ->width0, &buf_desc); + if (!buf) + return false; + res->bo = d3d12_bo_wrap_buffer(buf); + + return true; +} + +static bool +init_texture(struct d3d12_screen *screen, + struct d3d12_resource *res, + const struct pipe_resource *templ) +{ + ID3D12Resource *d3d12_res; + + res->mip_levels = templ->last_level + 1; + res->dxgi_format = d3d12_get_format(templ->format); + + D3D12_RESOURCE_DESC desc; + desc.Format = res->dxgi_format; + desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + desc.Width = templ->width0; + desc.Height = templ->height0; + desc.DepthOrArraySize = templ->array_size; + desc.MipLevels = templ->last_level + 1; + + desc.SampleDesc.Count = MAX2(templ->nr_samples, 1); + desc.SampleDesc.Quality = 0; /* TODO: figure this one out */ + + switch (templ->target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE1D; + break; + + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + desc.DepthOrArraySize *= 6; + /* fall-through */ + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_RECT: + desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + break; + + case PIPE_TEXTURE_3D: + desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D; + desc.DepthOrArraySize = templ->depth0; + break; + } + + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + + if (templ->bind & PIPE_BIND_SHADER_BUFFER) + desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + if (templ->bind & PIPE_BIND_RENDER_TARGET) + desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + + if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { + desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + + /* Sadly, we can't set D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE in the + * case where PIPE_BIND_SAMPLER_VIEW isn't set, because that would + * prevent us from using the resource with u_blitter, which requires + * sneaking in sampler-usage throught the back-door. + */ + } + + desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + if (templ->bind & (PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED | PIPE_BIND_LINEAR)) + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + D3D12_HEAP_TYPE heap_type = D3D12_HEAP_TYPE_DEFAULT; + + if (templ->bind & (PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) + heap_type = D3D12_HEAP_TYPE_READBACK; + else if (templ->usage == PIPE_USAGE_STAGING) + heap_type = D3D12_HEAP_TYPE_UPLOAD; + + D3D12_HEAP_PROPERTIES heap_pris = screen->dev->GetCustomHeapProperties(0, heap_type); + + HRESULT hres = screen->dev->CreateCommittedResource(&heap_pris, + D3D12_HEAP_FLAG_NONE, + &desc, + D3D12_RESOURCE_STATE_COMMON, + NULL, + __uuidof(ID3D12Resource), + (void **)&d3d12_res); + if (FAILED(hres)) + return false; + + if (screen->winsys && (templ->bind & (PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED))) { + struct sw_winsys *winsys = screen->winsys; + res->dt = winsys->displaytarget_create(screen->winsys, + res->base.bind, + res->base.format, + templ->width0, + templ->height0, + 64, NULL, + &res->dt_stride); + } + + res->bo = d3d12_bo_wrap_res(d3d12_res, templ->format); + + return true; +} + +static struct pipe_resource * +d3d12_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *templ) +{ + struct d3d12_screen *screen = d3d12_screen(pscreen); + struct d3d12_resource *res = CALLOC_STRUCT(d3d12_resource); + bool ret; + + res->base = *templ; + + if (D3D12_DEBUG_RESOURCE & d3d12_debug) { + debug_printf("D3D12: Create %sresource %s@%d %dx%dx%d as:%d mip:%d\n", + templ->usage == PIPE_USAGE_STAGING ? "STAGING " :"", + util_format_name(templ->format), templ->nr_samples, + templ->width0, templ->height0, templ->depth0, + templ->array_size, templ->last_level, templ); + } + + pipe_reference_init(&res->base.reference, 1); + res->base.screen = pscreen; + + if (templ->target == PIPE_BUFFER) { + ret = init_buffer(screen, res, templ); + } else { + ret = init_texture(screen, res, templ); + } + + if (!ret) { + FREE(res); + return NULL; + } + + init_valid_range(res); + + return &res->base; +} + +static struct pipe_resource * +d3d12_resource_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *templ, + struct winsys_handle *handle, unsigned usage) +{ + return NULL; +} + +static bool +d3d12_resource_get_handle(struct pipe_screen *pscreen, + struct pipe_context *pcontext, + struct pipe_resource *pres, + struct winsys_handle *handle, + unsigned usage) +{ + return false; +} + +void +d3d12_screen_resource_init(struct pipe_screen *pscreen) +{ + pscreen->resource_create = d3d12_resource_create; + pscreen->resource_from_handle = d3d12_resource_from_handle; + pscreen->resource_get_handle = d3d12_resource_get_handle; + pscreen->resource_destroy = d3d12_resource_destroy; +} + +unsigned int +get_subresource_id(struct d3d12_resource *res, unsigned resid, + unsigned z, unsigned base_level) +{ + unsigned resource_stride = res->base.last_level + 1; + if (res->base.target == PIPE_TEXTURE_1D_ARRAY || + res->base.target == PIPE_TEXTURE_2D_ARRAY) + resource_stride *= res->base.array_size; + + if (res->base.target == PIPE_TEXTURE_CUBE) + resource_stride *= 6; + + if (res->base.target == PIPE_TEXTURE_CUBE_ARRAY) + resource_stride *= 6 * res->base.array_size; + + unsigned layer_stride = res->base.last_level + 1; + + return resid * resource_stride + z * layer_stride + + base_level; +} + +static D3D12_TEXTURE_COPY_LOCATION +fill_texture_location(struct d3d12_resource *res, + struct d3d12_transfer *trans, unsigned resid, unsigned z) +{ + D3D12_TEXTURE_COPY_LOCATION tex_loc = {0}; + int subres = get_subresource_id(res, resid, z, trans->base.level); + + tex_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + tex_loc.SubresourceIndex = subres; + tex_loc.pResource = d3d12_resource_resource(res); + return tex_loc; +} + +static D3D12_TEXTURE_COPY_LOCATION +fill_buffer_location(struct d3d12_context *ctx, + struct d3d12_resource *res, + struct d3d12_resource *staging_res, + struct d3d12_transfer *trans, + unsigned depth, + unsigned resid, unsigned z) +{ + D3D12_TEXTURE_COPY_LOCATION buf_loc = {0}; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint; + uint64_t offset = 0; + auto descr = d3d12_resource_underlying(res, &offset)->GetDesc(); + ID3D12Device* dev = d3d12_screen(ctx->base.screen)->dev; + + unsigned sub_resid = get_subresource_id(res, resid, z, trans->base.level); + dev->GetCopyableFootprints(&descr, sub_resid, 1, 0, &footprint, nullptr, nullptr, nullptr); + + buf_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + buf_loc.pResource = d3d12_resource_underlying(staging_res, &offset); + buf_loc.PlacedFootprint = footprint; + buf_loc.PlacedFootprint.Offset += offset; + + buf_loc.PlacedFootprint.Footprint.Width = ALIGN(trans->base.box.width, + util_format_get_blockwidth(res->base.format)); + buf_loc.PlacedFootprint.Footprint.Height = ALIGN(trans->base.box.height, + util_format_get_blockheight(res->base.format)); + buf_loc.PlacedFootprint.Footprint.Depth = ALIGN(depth, + util_format_get_blockdepth(res->base.format)); + + buf_loc.PlacedFootprint.Footprint.RowPitch = trans->base.stride; + + return buf_loc; +} + +struct copy_info { + struct d3d12_resource *dst; + D3D12_TEXTURE_COPY_LOCATION dst_loc; + UINT dst_x, dst_y, dst_z; + struct d3d12_resource *src; + D3D12_TEXTURE_COPY_LOCATION src_loc; + D3D12_BOX *src_box; +}; + + +static void +copy_texture_region(struct d3d12_context *ctx, + struct copy_info& info) +{ + auto batch = d3d12_current_batch(ctx); + + d3d12_batch_reference_resource(batch, info.src); + d3d12_batch_reference_resource(batch, info.dst); + + d3d12_transition_resource_state(ctx, info.src, D3D12_RESOURCE_STATE_COPY_SOURCE); + d3d12_transition_resource_state(ctx, info.dst, D3D12_RESOURCE_STATE_COPY_DEST); + d3d12_apply_resource_states(ctx); + ctx->cmdlist->CopyTextureRegion(&info.dst_loc, info.dst_x, info.dst_y, info.dst_z, + &info.src_loc, info.src_box); +} + +static void +transfer_buf_to_image_part(struct d3d12_context *ctx, + struct d3d12_resource *res, + struct d3d12_resource *staging_res, + struct d3d12_transfer *trans, + int z, int depth, int start_z, int dest_z, + int resid) +{ + if (D3D12_DEBUG_RESOURCE & d3d12_debug) { + debug_printf("D3D12: Copy %dx%dx%d + %dx%dx%d from buffer %s to image %s\n", + trans->base.box.x, trans->base.box.y, trans->base.box.z, + trans->base.box.width, trans->base.box.height, trans->base.box.depth, + util_format_name(staging_res->base.format), + util_format_name(res->base.format)); + } + + struct copy_info copy_info; + copy_info.src = staging_res; + copy_info.src_loc = fill_buffer_location(ctx, res, staging_res, trans, depth, resid, z); + copy_info.src_loc.PlacedFootprint.Offset = (z - start_z) * trans->base.layer_stride; + copy_info.src_box = nullptr; + copy_info.dst = res; + copy_info.dst_loc = fill_texture_location(res, trans, resid, z); + copy_info.dst_x = trans->base.box.x; + copy_info.dst_y = trans->base.box.y; + copy_info.dst_z = res->base.target == PIPE_TEXTURE_CUBE ? 0 : dest_z; + copy_info.src_box = nullptr; + + copy_texture_region(ctx, copy_info); +} + +static bool +transfer_buf_to_image(struct d3d12_context *ctx, + struct d3d12_resource *res, + struct d3d12_resource *staging_res, + struct d3d12_transfer *trans, int resid) +{ + if (res->base.target == PIPE_TEXTURE_3D) { + assert(resid == 0); + transfer_buf_to_image_part(ctx, res, staging_res, trans, + 0, trans->base.box.depth, 0, + trans->base.box.z, 0); + } else { + int num_layers = trans->base.box.depth; + int start_z = trans->base.box.z; + + for (int z = start_z; z < start_z + num_layers; ++z) { + transfer_buf_to_image_part(ctx, res, staging_res, trans, + z, 1, start_z, 0, resid); + } + } + return true; +} + +static void +transfer_image_part_to_buf(struct d3d12_context *ctx, + struct d3d12_resource *res, + struct d3d12_resource *staging_res, + struct d3d12_transfer *trans, + unsigned resid, int z, int start_layer, + int start_box_z, int depth) +{ + struct pipe_box *box = &trans->base.box; + D3D12_BOX src_box = {}; + + struct copy_info copy_info; + copy_info.src_box = nullptr; + copy_info.src = res; + copy_info.src_loc = fill_texture_location(res, trans, resid, z); + copy_info.dst = staging_res; + copy_info.dst_loc = fill_buffer_location(ctx, res, staging_res, trans, + depth, resid, z); + copy_info.dst_loc.PlacedFootprint.Offset = (z - start_layer) * trans->base.layer_stride; + copy_info.dst_x = copy_info.dst_y = copy_info.dst_z = 0; + + if (!util_texrange_covers_whole_level(&res->base, trans->base.level, + box->x, box->y, start_box_z, + box->width, box->height, depth)) { + src_box.left = box->x; + src_box.right = box->x + box->width; + src_box.top = box->y; + src_box.bottom = box->y + box->height; + src_box.front = start_box_z; + src_box.back = start_box_z + depth; + copy_info.src_box = &src_box; + } + + copy_texture_region(ctx, copy_info); +} + +static bool +transfer_image_to_buf(struct d3d12_context *ctx, + struct d3d12_resource *res, + struct d3d12_resource *staging_res, + struct d3d12_transfer *trans, + unsigned resid) +{ + + /* We only suppport loading from either an texture array + * or a ZS texture, so either resid is zero, or num_layers == 1) + */ + assert(resid == 0 || trans->base.box.depth == 1); + + if (D3D12_DEBUG_RESOURCE & d3d12_debug) { + debug_printf("D3D12: Copy %dx%dx%d + %dx%dx%d from %s@%d to %s\n", + trans->base.box.x, trans->base.box.y, trans->base.box.z, + trans->base.box.width, trans->base.box.height, trans->base.box.depth, + util_format_name(res->base.format), resid, + util_format_name(staging_res->base.format)); + } + + struct pipe_resource *resolved_resource = nullptr; + if (res->base.nr_samples > 1) { + struct pipe_resource tmpl = res->base; + tmpl.nr_samples = 0; + resolved_resource = d3d12_resource_create(ctx->base.screen, &tmpl); + struct pipe_blit_info resolve_info = {0}; + struct pipe_box box = {0,0,0, (int)res->base.width0, (int16_t)res->base.height0, (int16_t)res->base.depth0}; + resolve_info.dst.resource = resolved_resource; + resolve_info.dst.box = box; + resolve_info.dst.format = res->base.format; + resolve_info.src.resource = &res->base; + resolve_info.src.box = box; + resolve_info.src.format = res->base.format; + resolve_info.filter = PIPE_TEX_FILTER_NEAREST; + resolve_info.mask = util_format_get_mask(tmpl.format); + + + + d3d12_blit(&ctx->base, &resolve_info); + res = (struct d3d12_resource *)resolved_resource; + } + + + if (res->base.target == PIPE_TEXTURE_3D) { + transfer_image_part_to_buf(ctx, res, staging_res, trans, resid, + 0, 0, trans->base.box.z, trans->base.box.depth); + } else { + int start_layer = trans->base.box.z; + for (int z = start_layer; z < start_layer + trans->base.box.depth; ++z) { + transfer_image_part_to_buf(ctx, res, staging_res, trans, resid, + z, start_layer, 0, 1); + } + } + + pipe_resource_reference(&resolved_resource, NULL); + + return true; +} + +static unsigned +linear_offset(int x, int y, int z, unsigned stride, unsigned layer_stride) +{ + return x + + y * stride + + z * layer_stride; +} + +static D3D12_RANGE +linear_range(const struct pipe_box *box, unsigned stride, unsigned layer_stride) +{ + D3D12_RANGE range; + + range.Begin = linear_offset(box->x, box->y, box->z, + stride, layer_stride); + range.End = linear_offset(box->x + box->width, + box->y + box->height - 1, + box->z + box->depth - 1, + stride, layer_stride); + + return range; +} + +static bool +synchronize(struct d3d12_context *ctx, + struct d3d12_resource *res, + unsigned usage, + D3D12_RANGE *range) +{ + assert(can_map_directly(&res->base)); + + /* Check whether that range contains valid data; if not, we might not need to sync */ + if (!(usage & PIPE_MAP_UNSYNCHRONIZED) && + usage & PIPE_MAP_WRITE && + !util_ranges_intersect(&res->valid_buffer_range, range->Begin, range->End)) { + usage |= PIPE_MAP_UNSYNCHRONIZED; + } + + if (!(usage & PIPE_MAP_UNSYNCHRONIZED) && resource_is_busy(ctx, res)) { + if (usage & PIPE_MAP_DONTBLOCK) + return false; + + d3d12_resource_wait_idle(ctx, res); + } + + if (usage & PIPE_MAP_WRITE) + util_range_add(&res->base, &res->valid_buffer_range, + range->Begin, range->End); + + return true; +} + +/* A wrapper to make sure local resources are freed and unmapped with + * any exit path */ +struct local_resource { + local_resource(pipe_screen *s, struct pipe_resource *tmpl) + { + res = d3d12_resource(d3d12_resource_create(s, tmpl)); + } + + ~local_resource() { + if (res) { + if (mapped) + d3d12_bo_unmap(res->bo, nullptr); + pipe_resource_reference((struct pipe_resource **)&res, NULL); + } + } + + void * + map() { + void *ptr; + ptr = d3d12_bo_map(res->bo, nullptr); + if (ptr) + mapped = true; + return ptr; + } + + void unmap() + { + if (mapped) + d3d12_bo_unmap(res->bo, nullptr); + mapped = false; + } + + operator struct d3d12_resource *() { + return res; + } + + bool operator !() { + return !res; + } +private: + struct d3d12_resource *res; + bool mapped; +}; + +/* Combined depth-stencil needs a special handling for reading back: DX handled + * depth and stencil parts as separate resources and handles copying them only + * by using seperate texture copy calls with different formats. So create two + * buffers, read back both resources and interleave the data. + */ +static void +prepare_zs_layer_strides(struct d3d12_resource *res, + const struct pipe_box *box, + struct d3d12_transfer *trans) +{ + trans->base.stride = align(util_format_get_stride(res->base.format, box->width), + D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + trans->base.layer_stride = util_format_get_2d_size(res->base.format, + trans->base.stride, + box->height); +} + +static void * +read_zs_surface(struct d3d12_context *ctx, struct d3d12_resource *res, + const struct pipe_box *box, + struct d3d12_transfer *trans) +{ + pipe_screen *pscreen = ctx->base.screen; + + prepare_zs_layer_strides(res, box, trans); + + struct pipe_resource tmpl; + memset(&tmpl, 0, sizeof tmpl); + tmpl.target = PIPE_BUFFER; + tmpl.format = PIPE_FORMAT_R32_UNORM; + tmpl.bind = 0; + tmpl.usage = PIPE_USAGE_STAGING; + tmpl.flags = 0; + tmpl.width0 = trans->base.layer_stride; + tmpl.height0 = 1; + tmpl.depth0 = 1; + tmpl.array_size = 1; + + local_resource depth_buffer(pscreen, &tmpl); + if (!depth_buffer) { + debug_printf("Allocating staging buffer for depth failed\n"); + return NULL; + } + + if (!transfer_image_to_buf(ctx, res, depth_buffer, trans, 0)) + return NULL; + + tmpl.format = PIPE_FORMAT_R8_UINT; + + local_resource stencil_buffer(pscreen, &tmpl); + if (!stencil_buffer) { + debug_printf("Allocating staging buffer for stencilfailed\n"); + return NULL; + } + + if (!transfer_image_to_buf(ctx, res, stencil_buffer, trans, 1)) + return NULL; + + d3d12_flush_cmdlist_and_wait(ctx); + + void *depth_ptr = depth_buffer.map(); + if (!depth_ptr) { + debug_printf("Mapping staging depth buffer failed\n"); + return NULL; + } + + uint8_t *stencil_ptr = (uint8_t *)stencil_buffer.map(); + if (!stencil_ptr) { + debug_printf("Mapping staging stencil buffer failed\n"); + return NULL; + } + + uint8_t *buf = (uint8_t *)malloc(trans->base.layer_stride); + if (!buf) + return NULL; + + trans->data = buf; + + switch (res->base.format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + util_format_z24_unorm_s8_uint_pack_separate(buf, trans->base.stride, + (uint32_t *)depth_ptr, trans->base.stride, + stencil_ptr, trans->base.stride, + trans->base.box.width, trans->base.box.height); + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + util_format_z32_float_s8x24_uint_pack_z_float(buf, trans->base.stride, + (float *)depth_ptr, trans->base.stride, + trans->base.box.width, trans->base.box.height); + util_format_z32_float_s8x24_uint_pack_s_8uint(buf, trans->base.stride, + stencil_ptr, trans->base.stride, + trans->base.box.width, trans->base.box.height); + break; + default: + unreachable("Unsupported depth steancil format"); + }; + + return trans->data; +} + +static void * +prepare_write_zs_surface(struct d3d12_resource *res, + const struct pipe_box *box, + struct d3d12_transfer *trans) +{ + prepare_zs_layer_strides(res, box, trans); + uint32_t *buf = (uint32_t *)malloc(trans->base.layer_stride); + if (!buf) + return NULL; + + trans->data = buf; + return trans->data; +} + +static void +write_zs_surface(struct pipe_context *pctx, struct d3d12_resource *res, + struct d3d12_transfer *trans) +{ + struct pipe_resource tmpl; + memset(&tmpl, 0, sizeof tmpl); + tmpl.target = PIPE_BUFFER; + tmpl.format = PIPE_FORMAT_R32_UNORM; + tmpl.bind = 0; + tmpl.usage = PIPE_USAGE_STAGING; + tmpl.flags = 0; + tmpl.width0 = trans->base.layer_stride; + tmpl.height0 = 1; + tmpl.depth0 = 1; + tmpl.array_size = 1; + + local_resource depth_buffer(pctx->screen, &tmpl); + if (!depth_buffer) { + debug_printf("Allocating staging buffer for depth failed\n"); + return; + } + + local_resource stencil_buffer(pctx->screen, &tmpl); + if (!stencil_buffer) { + debug_printf("Allocating staging buffer for depth failed\n"); + return; + } + + void *depth_ptr = depth_buffer.map(); + if (!depth_ptr) { + debug_printf("Mapping staging depth buffer failed\n"); + return; + } + + uint8_t *stencil_ptr = (uint8_t *)stencil_buffer.map(); + if (!stencil_ptr) { + debug_printf("Mapping staging stencil buffer failed\n"); + return; + } + + switch (res->base.format) { + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + util_format_z32_unorm_unpack_z_32unorm((uint32_t *)depth_ptr, trans->base.stride, (uint8_t*)trans->data, + trans->base.stride, trans->base.box.width, + trans->base.box.height); + util_format_z24_unorm_s8_uint_unpack_s_8uint(stencil_ptr, trans->base.stride, (uint8_t*)trans->data, + trans->base.stride, trans->base.box.width, + trans->base.box.height); + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + util_format_z32_float_s8x24_uint_unpack_z_float((float *)depth_ptr, trans->base.stride, (uint8_t*)trans->data, + trans->base.stride, trans->base.box.width, + trans->base.box.height); + util_format_z32_float_s8x24_uint_unpack_s_8uint(stencil_ptr, trans->base.stride, (uint8_t*)trans->data, + trans->base.stride, trans->base.box.width, + trans->base.box.height); + break; + default: + unreachable("Unsupported depth steancil format"); + }; + + stencil_buffer.unmap(); + depth_buffer.unmap(); + + transfer_buf_to_image(d3d12_context(pctx), res, depth_buffer, trans, 0); + transfer_buf_to_image(d3d12_context(pctx), res, stencil_buffer, trans, 1); +} + +static void * +d3d12_transfer_map(struct pipe_context *pctx, + struct pipe_resource *pres, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **transfer) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_resource *res = d3d12_resource(pres); + + if (usage & PIPE_MAP_DIRECTLY || !res->bo) + return NULL; + + struct d3d12_transfer *trans = (struct d3d12_transfer *)slab_alloc(&ctx->transfer_pool); + struct pipe_transfer *ptrans = &trans->base; + if (!trans) + return NULL; + + memset(trans, 0, sizeof(*trans)); + pipe_resource_reference(&ptrans->resource, pres); + + ptrans->resource = pres; + ptrans->level = level; + ptrans->usage = (enum pipe_map_flags)usage; + ptrans->box = *box; + + D3D12_RANGE range; + range.Begin = 0; + + void *ptr; + if (can_map_directly(&res->base)) { + if (pres->target == PIPE_BUFFER) { + ptrans->stride = 0; + ptrans->layer_stride = 0; + } else { + ptrans->stride = util_format_get_stride(pres->format, box->width); + ptrans->layer_stride = util_format_get_2d_size(pres->format, + ptrans->stride, + box->height); + } + + range = linear_range(box, ptrans->stride, ptrans->layer_stride); + if (!synchronize(ctx, res, usage, &range)) + return NULL; + ptr = d3d12_bo_map(res->bo, &range); + } else if (unlikely(pres->format == PIPE_FORMAT_Z24_UNORM_S8_UINT || + pres->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { + if (usage & PIPE_MAP_READ) { + ptr = read_zs_surface(ctx, res, box, trans); + } else if (usage & PIPE_MAP_WRITE){ + ptr = prepare_write_zs_surface(res, box, trans); + } + } else { + ptrans->stride = align(util_format_get_stride(pres->format, box->width), + D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + ptrans->layer_stride = util_format_get_2d_size(pres->format, + ptrans->stride, + box->height); + + if (res->base.target != PIPE_TEXTURE_3D) + ptrans->layer_stride = align(ptrans->layer_stride, + D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + + trans->staging_res = pipe_buffer_create(pctx->screen, 0, + PIPE_USAGE_STAGING, + ptrans->layer_stride * box->depth); + if (!trans->staging_res) + return NULL; + + struct d3d12_resource *staging_res = d3d12_resource(trans->staging_res); + + if (usage & PIPE_MAP_READ) { + bool ret = transfer_image_to_buf(ctx, res, staging_res, trans, 0); + if (ret == false) + return NULL; + d3d12_flush_cmdlist_and_wait(ctx); + } + + range.Begin = 0; + range.End = ptrans->layer_stride * box->depth; + + ptr = d3d12_bo_map(staging_res->bo, &range); + } + + *transfer = ptrans; + return ptr; +} + +static void +d3d12_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *ptrans) +{ + struct d3d12_resource *res = d3d12_resource(ptrans->resource); + struct d3d12_transfer *trans = (struct d3d12_transfer *)ptrans; + D3D12_RANGE range = { 0, 0 }; + + if (trans->data != nullptr) { + if (trans->base.usage & PIPE_MAP_WRITE) + write_zs_surface(pctx, res, trans); + free(trans->data); + } else if (trans->staging_res) { + struct d3d12_resource *staging_res = d3d12_resource(trans->staging_res); + + if (trans->base.usage & PIPE_MAP_WRITE) { + range.Begin = 0; + range.End = ptrans->layer_stride * ptrans->box.depth; + } + d3d12_bo_unmap(staging_res->bo, &range); + + if (trans->base.usage & PIPE_MAP_WRITE) { + struct d3d12_context *ctx = d3d12_context(pctx); + transfer_buf_to_image(ctx, res, staging_res, trans, 0); + } + + pipe_resource_reference(&trans->staging_res, NULL); + } else { + if (trans->base.usage & PIPE_MAP_WRITE) { + range.Begin = ptrans->box.x; + range.End = ptrans->box.x + ptrans->box.width; + } + d3d12_bo_unmap(res->bo, &range); + } + + pipe_resource_reference(&ptrans->resource, NULL); + slab_free(&d3d12_context(pctx)->transfer_pool, ptrans); +} + +void +d3d12_resource_make_writeable(struct pipe_context *pctx, + struct pipe_resource *pres) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_resource *res = d3d12_resource(pres); + struct d3d12_resource *dup_res; + + if (!res->bo || !d3d12_bo_is_suballocated(res->bo)) + return; + + dup_res = d3d12_resource(pipe_buffer_create(pres->screen, + pres->bind & PIPE_BIND_STREAM_OUTPUT, + (pipe_resource_usage) pres->usage, + pres->width0)); + + if (res->valid_buffer_range.end > res->valid_buffer_range.start) { + struct pipe_box box; + + box.x = res->valid_buffer_range.start; + box.y = 0; + box.z = 0; + box.width = res->valid_buffer_range.end - res->valid_buffer_range.start; + box.height = 1; + box.depth = 1; + + d3d12_direct_copy(ctx, dup_res, 0, &box, res, 0, &box, PIPE_MASK_RGBAZS); + } + + /* Move new BO to old resource */ + d3d12_bo_unreference(res->bo); + res->bo = dup_res->bo; + d3d12_bo_reference(res->bo); + + d3d12_resource_destroy(dup_res->base.screen, &dup_res->base); +} + +void +d3d12_context_resource_init(struct pipe_context *pctx) +{ + pctx->transfer_map = d3d12_transfer_map; + pctx->transfer_unmap = d3d12_transfer_unmap; + + pctx->transfer_flush_region = u_default_transfer_flush_region; + pctx->buffer_subdata = u_default_buffer_subdata; + pctx->texture_subdata = u_default_texture_subdata; +} diff --git a/src/gallium/drivers/d3d12/d3d12_resource.h b/src/gallium/drivers/d3d12/d3d12_resource.h new file mode 100644 index 00000000000..62ecf6bb51b --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_resource.h @@ -0,0 +1,118 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_RESOURCE_H +#define D3D12_RESOURCE_H + +struct pipe_screen; +#include "d3d12_bufmgr.h" +#include "util/u_range.h" +#include "util/u_transfer.h" + +#include + +struct d3d12_resource { + struct pipe_resource base; + struct d3d12_bo *bo; + DXGI_FORMAT dxgi_format; + unsigned mip_levels; + struct sw_displaytarget *dt; + unsigned dt_stride; + struct util_range valid_buffer_range; +}; + +struct d3d12_transfer { + struct pipe_transfer base; + struct pipe_resource *staging_res; + void *data; +}; + +static inline struct d3d12_resource * +d3d12_resource(struct pipe_resource *r) +{ + return (struct d3d12_resource *)r; +} + +/* Returns the underlying ID3D12Resource and offset for this resource */ +static inline ID3D12Resource * +d3d12_resource_underlying(struct d3d12_resource *res, uint64_t *offset) +{ + if (!res->bo) + return NULL; + + return d3d12_bo_get_base(res->bo, offset)->res; +} + +/* Returns the underlying ID3D12Resource for this resource. */ +static inline ID3D12Resource * +d3d12_resource_resource(struct d3d12_resource *res) +{ + ID3D12Resource *ret; + uint64_t offset; + ret = d3d12_resource_underlying(res, &offset); + return ret; +} + +static inline struct TransitionableResourceState * +d3d12_resource_state(struct d3d12_resource *res) +{ + uint64_t offset; + if (!res->bo) + return NULL; + return d3d12_bo_get_base(res->bo, &offset)->trans_state; +} + +static inline D3D12_GPU_VIRTUAL_ADDRESS +d3d12_resource_gpu_virtual_address(struct d3d12_resource *res) +{ + uint64_t offset; + ID3D12Resource *base_res = d3d12_resource_underlying(res, &offset); + return base_res->GetGPUVirtualAddress() + offset; +} + +static inline bool +d3d12_subresource_id_uses_layer(enum pipe_texture_target target) +{ + return target == PIPE_TEXTURE_CUBE || + target == PIPE_TEXTURE_1D_ARRAY || + target == PIPE_TEXTURE_2D_ARRAY; +} + +void +d3d12_resource_release(struct d3d12_resource *res); + +void +d3d12_resource_wait_idle(struct d3d12_context *ctx, + struct d3d12_resource *res); + +void +d3d12_resource_make_writeable(struct pipe_context *pctx, + struct pipe_resource *pres); + +void +d3d12_screen_resource_init(struct pipe_screen *pscreen); + +void +d3d12_context_resource_init(struct pipe_context *pctx); + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_root_signature.cpp b/src/gallium/drivers/d3d12/d3d12_root_signature.cpp new file mode 100644 index 00000000000..26229f5ec81 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_root_signature.cpp @@ -0,0 +1,255 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_root_signature.h" +#include "d3d12_compiler.h" +#include "d3d12_screen.h" + +#include "util/u_memory.h" + +#include +using Microsoft::WRL::ComPtr; + +struct d3d12_root_signature { + struct d3d12_root_signature_key key; + ID3D12RootSignature *sig; +}; + +static D3D12_SHADER_VISIBILITY +get_shader_visibility(enum pipe_shader_type stage) +{ + switch (stage) { + case PIPE_SHADER_VERTEX: + return D3D12_SHADER_VISIBILITY_VERTEX; + case PIPE_SHADER_FRAGMENT: + return D3D12_SHADER_VISIBILITY_PIXEL; + case PIPE_SHADER_GEOMETRY: + return D3D12_SHADER_VISIBILITY_GEOMETRY; + case PIPE_SHADER_TESS_CTRL: + return D3D12_SHADER_VISIBILITY_HULL; + case PIPE_SHADER_TESS_EVAL: + return D3D12_SHADER_VISIBILITY_DOMAIN; + default: + unreachable("unknown shader stage"); + } +} + +static inline void +init_constant_root_param(D3D12_ROOT_PARAMETER1 *param, + unsigned reg, + unsigned size, + D3D12_SHADER_VISIBILITY visibility) +{ + param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + param->ShaderVisibility = visibility; + param->Constants.RegisterSpace = 0; + param->Constants.ShaderRegister = reg; + param->Constants.Num32BitValues = size; +} + +static inline void +init_range_root_param(D3D12_ROOT_PARAMETER1 *param, + D3D12_DESCRIPTOR_RANGE1 *range, + D3D12_DESCRIPTOR_RANGE_TYPE type, + uint32_t num_descs, + D3D12_SHADER_VISIBILITY visibility, + uint32_t base_shader_register) +{ + range->RangeType = type; + range->NumDescriptors = num_descs; + range->BaseShaderRegister = base_shader_register; + range->RegisterSpace = 0; + if (type == D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER) + range->Flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; + else + range->Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE; + range->OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + param->DescriptorTable.NumDescriptorRanges = 1; + param->DescriptorTable.pDescriptorRanges = range; + param->ShaderVisibility = visibility; +} + +static ID3D12RootSignature * +create_root_signature(struct d3d12_context *ctx, struct d3d12_root_signature_key *key) +{ + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + D3D12_ROOT_PARAMETER1 root_params[D3D12_GFX_SHADER_STAGES * D3D12_NUM_BINDING_TYPES]; + D3D12_DESCRIPTOR_RANGE1 desc_ranges[D3D12_GFX_SHADER_STAGES * D3D12_NUM_BINDING_TYPES]; + unsigned num_params = 0; + + for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) { + D3D12_SHADER_VISIBILITY visibility = get_shader_visibility((enum pipe_shader_type)i); + + if (key->stages[i].num_cb_bindings > 0) { + assert(num_params < PIPE_SHADER_TYPES * D3D12_NUM_BINDING_TYPES); + init_range_root_param(&root_params[num_params], + &desc_ranges[num_params], + D3D12_DESCRIPTOR_RANGE_TYPE_CBV, + key->stages[i].num_cb_bindings, + visibility, + key->stages[i].has_default_ubo0 ? 0 : 1); + num_params++; + } + + if (key->stages[i].num_srv_bindings > 0) { + init_range_root_param(&root_params[num_params], + &desc_ranges[num_params], + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + key->stages[i].num_srv_bindings, + visibility, + 0); + num_params++; + } + + if (key->stages[i].num_srv_bindings > 0) { + init_range_root_param(&root_params[num_params], + &desc_ranges[num_params], + D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, + key->stages[i].num_srv_bindings, + visibility, + 0); + num_params++; + } + + if (key->stages[i].state_vars_size > 0) { + init_constant_root_param(&root_params[num_params], + key->stages[i].num_cb_bindings + (key->stages[i].has_default_ubo0 ? 0 : 1), + key->stages[i].state_vars_size, + visibility); + num_params++; + } + } + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc; + root_sig_desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; + root_sig_desc.Desc_1_1.NumParameters = num_params; + root_sig_desc.Desc_1_1.pParameters = (num_params > 0) ? root_params : NULL; + root_sig_desc.Desc_1_1.NumStaticSamplers = 0; + root_sig_desc.Desc_1_1.pStaticSamplers = NULL; + root_sig_desc.Desc_1_1.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + /* TODO Only enable this flag when needed (optimization) */ + root_sig_desc.Desc_1_1.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + if (key->has_stream_output) + root_sig_desc.Desc_1_1.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT; + + ComPtr sig, error; + if (FAILED(ctx->D3D12SerializeVersionedRootSignature(&root_sig_desc, + &sig, &error))) { + debug_printf("D3D12SerializeRootSignature failed\n"); + return NULL; + } + + ID3D12RootSignature *ret; + if (FAILED(screen->dev->CreateRootSignature(0, + sig->GetBufferPointer(), + sig->GetBufferSize(), + __uuidof(ret), + (void **)&ret))) { + debug_printf("CreateRootSignature failed\n"); + return NULL; + } + return ret; +} + +static void +fill_key(struct d3d12_context *ctx, struct d3d12_root_signature_key *key) +{ + memset(key, 0, sizeof(struct d3d12_root_signature_key)); + + for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) { + struct d3d12_shader *shader = ctx->gfx_pipeline_state.stages[i]; + + if (shader) { + key->stages[i].num_cb_bindings = shader->num_cb_bindings; + key->stages[i].num_srv_bindings = shader->num_srv_bindings; + key->stages[i].state_vars_size = shader->state_vars_size; + key->stages[i].has_default_ubo0 = shader->has_default_ubo0; + + if (ctx->gfx_stages[i]->so_info.num_outputs > 0) + key->has_stream_output = true; + } + } +} + +ID3D12RootSignature * +d3d12_get_root_signature(struct d3d12_context *ctx) +{ + struct d3d12_root_signature_key key; + + fill_key(ctx, &key); + struct hash_entry *entry = _mesa_hash_table_search(ctx->root_signature_cache, &key); + if (!entry) { + struct d3d12_root_signature *data = + (struct d3d12_root_signature *)MALLOC(sizeof(struct d3d12_root_signature)); + if (!data) + return NULL; + + data->key = key; + data->sig = create_root_signature(ctx, &key); + if (!data->sig) + return NULL; + + entry = _mesa_hash_table_insert(ctx->root_signature_cache, &data->key, data); + assert(entry); + } + + return ((struct d3d12_root_signature *)entry->data)->sig; +} + +static uint32_t +hash_root_signature_key(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct d3d12_root_signature_key)); +} + +static bool +equals_root_signature_key(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct d3d12_root_signature_key)) == 0; +} + +void +d3d12_root_signature_cache_init(struct d3d12_context *ctx) +{ + ctx->root_signature_cache = _mesa_hash_table_create(NULL, + hash_root_signature_key, + equals_root_signature_key); +} + +static void +delete_entry(struct hash_entry *entry) +{ + struct d3d12_root_signature *data = (struct d3d12_root_signature *)entry->data; + data->sig->Release(); + FREE(data); +} + +void +d3d12_root_signature_cache_destroy(struct d3d12_context *ctx) +{ + _mesa_hash_table_destroy(ctx->root_signature_cache, delete_entry); +} diff --git a/src/gallium/drivers/d3d12/d3d12_root_signature.h b/src/gallium/drivers/d3d12/d3d12_root_signature.h new file mode 100644 index 00000000000..1a821a5b28f --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_root_signature.h @@ -0,0 +1,48 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_ROOT_SIGNATURE_H +#define D3D12_ROOT_SIGNATURE_H + +#include "d3d12_context.h" + +struct d3d12_root_signature_key { + bool has_stream_output; + struct { + unsigned num_cb_bindings; + unsigned num_srv_bindings; + unsigned state_vars_size; + bool has_default_ubo0; + } stages[D3D12_GFX_SHADER_STAGES]; +}; + +void +d3d12_root_signature_cache_init(struct d3d12_context *ctx); + +void +d3d12_root_signature_cache_destroy(struct d3d12_context *ctx); + +ID3D12RootSignature * +d3d12_get_root_signature(struct d3d12_context *ctx); + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_screen.cpp b/src/gallium/drivers/d3d12/d3d12_screen.cpp new file mode 100644 index 00000000000..f273e559d87 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_screen.cpp @@ -0,0 +1,970 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_screen.h" + +#include "d3d12_bufmgr.h" +#include "d3d12_compiler.h" +#include "d3d12_context.h" +#include "d3d12_debug.h" +#include "d3d12_fence.h" +#include "d3d12_format.h" +#include "d3d12_public.h" +#include "d3d12_resource.h" +#include "d3d12_nir_passes.h" + +#include "pipebuffer/pb_bufmgr.h" +#include "util/debug.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_screen.h" + +#include "nir.h" +#include "frontend/sw_winsys.h" + +#include +#include + +static const struct debug_named_value +debug_options[] = { + { "verbose", D3D12_DEBUG_VERBOSE, NULL }, + { "blit", D3D12_DEBUG_BLIT, "Trace blit and copy resource calls" }, + { "experimental", D3D12_DEBUG_EXPERIMENTAL, "Enable experimental shader models feature" }, + { "dxil", D3D12_DEBUG_DXIL, "Dump DXIL during program compile" }, + { "disass", D3D12_DEBUG_DISASS, "Dump disassambly of created DXIL shader" }, + { "res", D3D12_DEBUG_RESOURCE, "Debug resources" }, + { "debuglayer", D3D12_DEBUG_DEBUG_LAYER, "Enable debug layer" }, + { "gpuvalidator", D3D12_DEBUG_GPU_VALIDATOR, "Enable GPU validator" }, + DEBUG_NAMED_VALUE_END +}; + +DEBUG_GET_ONCE_FLAGS_OPTION(d3d12_debug, "D3D12_DEBUG", debug_options, 0) + +uint32_t +d3d12_debug; + +enum { + HW_VENDOR_AMD = 0x1002, + HW_VENDOR_INTEL = 0x8086, + HW_VENDOR_MICROSOFT = 0x1414, + HW_VENDOR_NVIDIA = 0x10de, +}; + +static const char * +d3d12_get_vendor(struct pipe_screen *pscreen) +{ + return "Microsoft Corporation"; +} + +static const char * +d3d12_get_device_vendor(struct pipe_screen *pscreen) +{ + struct d3d12_screen* screen = d3d12_screen(pscreen); + + switch (screen->adapter_desc.VendorId) { + case HW_VENDOR_MICROSOFT: + return "Microsoft"; + case HW_VENDOR_AMD: + return "AMD"; + case HW_VENDOR_NVIDIA: + return "NVIDIA"; + case HW_VENDOR_INTEL: + return "Intel"; + default: + return "Unknown"; + } +} + +static const char * +d3d12_get_name(struct pipe_screen *pscreen) +{ + struct d3d12_screen* screen = d3d12_screen(pscreen); + + if (screen->adapter_desc.Description[0] == '\0') + return "D3D12 (Unknown)"; + + static char buf[1000]; + snprintf(buf, sizeof(buf), "D3D12 (%S)", screen->adapter_desc.Description); + return buf; +} + +static int +d3d12_get_video_mem(struct pipe_screen *pscreen) +{ + struct d3d12_screen* screen = d3d12_screen(pscreen); + + // Note: memory sizes in bytes, but stored in size_t, so may be capped at 4GB. + // In that case, adding before conversion to MB can easily overflow. + return (screen->adapter_desc.DedicatedVideoMemory >> 20) + + (screen->adapter_desc.DedicatedSystemMemory >> 20) + + (screen->adapter_desc.SharedSystemMemory >> 20); +} + +static int +d3d12_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + struct d3d12_screen *screen = d3d12_screen(pscreen); + + switch (param) { + case PIPE_CAP_NPOT_TEXTURES: + return 1; + + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + /* D3D12 only supports dual-source blending for a single + * render-target. From the D3D11 functional spec (which also defines + * this for D3D12): + * + * "When Dual Source Color Blending is enabled, the Pixel Shader must + * have only a single RenderTarget bound, at slot 0, and must output + * both o0 and o1. Writing to other outputs (o2, o3 etc.) produces + * undefined results for the corresponding RenderTargets, if bound + * illegally." + * + * Source: https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#17.6%20Dual%20Source%20Color%20Blending + */ + return 1; + + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + + case PIPE_CAP_MAX_RENDER_TARGETS: + if (screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0) + return 8; + else if (screen->max_feature_level == D3D_FEATURE_LEVEL_9_3) + return 4; + return 1; + + case PIPE_CAP_TEXTURE_SWIZZLE: + return 1; + + case PIPE_CAP_MAX_TEXTURE_2D_SIZE: + if (screen->max_feature_level >= D3D_FEATURE_LEVEL_11_0) + return 16384; + else if (screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0) + return 8192; + else if (screen->max_feature_level >= D3D_FEATURE_LEVEL_9_3) + return 4096; + return 2048; + + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + if (screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0) + return 11; + return 9; + + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + if (screen->max_feature_level >= D3D_FEATURE_LEVEL_11_0) + return 14; + else if (screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0) + return 13; + else if (screen->max_feature_level == D3D_FEATURE_LEVEL_9_3) + return 12; + return 9; + + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD: + case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES: + case PIPE_CAP_VERTEX_SHADER_SATURATE: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND: + return 1; + + /* We need to do some lowering that requires a link to the sampler */ + case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: + return 1; + + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + if (screen->max_feature_level >= D3D_FEATURE_LEVEL_11_0) + return 1 << 14; + else if (screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0) + return 1 << 13; + return 0; + + case PIPE_CAP_DEPTH_CLIP_DISABLE: + return 1; + + case PIPE_CAP_TGSI_TEXCOORD: + return 0; + + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + return 1; + + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + return 1; + + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 330; + case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: + return 140; + +#if 0 /* TODO: Enable me */ + case PIPE_CAP_COMPUTE: + return 0; +#endif + + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return 1; + +#if 0 /* TODO: Enable me */ + case PIPE_CAP_CUBE_MAP_ARRAY: + return screen->max_feature_level >= D3D_FEATURE_LEVEL_10_1; +#endif + + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + return 1; + + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 0; /* unsure */ + + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_NATIVE; /* unsure */ + + case PIPE_CAP_MAX_VIEWPORTS: + return 1; /* probably wrong */ + + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + return 1; + +#if 0 /* TODO: Enable me. Enables ARB_texture_gather */ + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + return 4; +#endif + + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + return 1; + + case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + return 1; + + case PIPE_CAP_ACCELERATED: + return 1; + + case PIPE_CAP_VIDEO_MEMORY: + return d3d12_get_video_mem(pscreen); + + case PIPE_CAP_UMA: + return screen->architecture.UMA; + + case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: + return 2048; /* FIXME: no clue how to query this */ + + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + return 1; + + case PIPE_CAP_SHAREABLE_SHADERS: + return 1; + +#if 0 /* TODO: Enable me. Enables GL_ARB_shader_storage_buffer_object */ + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + return screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0; +#endif + + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + + case PIPE_CAP_PCI_GROUP: + case PIPE_CAP_PCI_BUS: + case PIPE_CAP_PCI_DEVICE: + case PIPE_CAP_PCI_FUNCTION: + return 0; /* TODO: figure these out */ + + case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: + return 0; /* not sure */ + + case PIPE_CAP_FLATSHADE: + case PIPE_CAP_ALPHA_TEST: + case PIPE_CAP_TWO_SIDED_COLOR: + case PIPE_CAP_CLIP_PLANES: + return 0; + + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return screen->opts.PSSpecifiedStencilRefSupported; + + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_TEXTURE_QUERY_LOD: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_TGSI_TEX_TXF_LZ: + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED: + case PIPE_CAP_PSIZ_CLAMPED: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_QUERY_TIMESTAMP: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + return 1; + + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return 4; + + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 16 * 4; + + /* Geometry shader output. */ + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + return 256; + case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: + return 256 * 4; + + case PIPE_CAP_MAX_VARYINGS: + return 32; + + default: + return u_pipe_screen_get_param_defaults(pscreen, param); + } +} + +static float +d3d12_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + struct d3d12_screen *screen = d3d12_screen(pscreen); + + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 1.0f; /* no clue */ + + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return D3D12_MAX_POINT_SIZE; + + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0 ? 16.0f : 2.0f; + + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 15.99f; + + case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE: + case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE: + case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY: + return 0.0f; /* not implemented */ + + default: + unreachable("unknown pipe_capf"); + } + + return 0.0; +} + +static int +d3d12_get_shader_param(struct pipe_screen *pscreen, + enum pipe_shader_type shader, + enum pipe_shader_cap param) +{ + struct d3d12_screen *screen = d3d12_screen(pscreen); + + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + if (shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_FRAGMENT || + shader == PIPE_SHADER_GEOMETRY) + return INT_MAX; + return 0; + + case PIPE_SHADER_CAP_MAX_INPUTS: + return screen->max_feature_level >= D3D_FEATURE_LEVEL_10_1 ? 32 : 16; + + case PIPE_SHADER_CAP_MAX_OUTPUTS: + if (shader == PIPE_SHADER_FRAGMENT) { + /* same as max MRTs (not sure if this is correct) */ + if (screen->max_feature_level >= D3D_FEATURE_LEVEL_10_0) + return 8; + else if (screen->max_feature_level == D3D_FEATURE_LEVEL_9_3) + return 4; + return 1; + } + return screen->max_feature_level >= D3D_FEATURE_LEVEL_10_1 ? 32 : 16; + + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + if (screen->opts.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1) + return 16; + return PIPE_MAX_SAMPLERS; + + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 65536; + + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 13; /* 15 - 2 for lowered uniforms and state vars*/ + + case PIPE_SHADER_CAP_MAX_TEMPS: + return INT_MAX; + + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; /* not implemented */ + + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + case PIPE_SHADER_CAP_INTEGERS: + return 1; + + case PIPE_SHADER_CAP_INT64_ATOMICS: + case PIPE_SHADER_CAP_FP16: + return 0; /* not implemented */ + + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_NIR; + + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + return 0; /* not implemented */ + + case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: + if (screen->opts.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1) + return 128; + return PIPE_MAX_SHADER_SAMPLER_VIEWS; + + case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + return 0; /* not implemented */ + + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: + return 0; /* no idea */ + + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; /* arbitrary */ + +#if 0 + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + return 8; /* no clue */ +#endif + + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 1 << PIPE_SHADER_IR_NIR; + + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + return 0; /* TODO: enable me */ + + case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: + case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: + return 0; /* unsure */ + + case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 0; /* not implemented */ + } + + /* should only get here on unhandled cases */ + return 0; +} + +static bool +d3d12_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned storage_sample_count, + unsigned bind) +{ + struct d3d12_screen *screen = d3d12_screen(pscreen); + + if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) + return false; + + if (target == PIPE_BUFFER) { + /* Replace emulated vertex element formats for the tests */ + format = d3d12_emulated_vtx_format(format); + } else { + /* Allow 3-comp 32 bit formats only for BOs (needed for ARB_tbo_rgb32) */ + if ((format == PIPE_FORMAT_R32G32B32_FLOAT || + format == PIPE_FORMAT_R32G32B32_SINT || + format == PIPE_FORMAT_R32G32B32_UINT)) + return false; + } + + /* Don't advertise alpha/luminance_alpha formats because they can't be used + * for render targets (except A8_UNORM) and can't be emulated by R/RG formats. + * Let the state tracker choose an RGBA format instead. */ + if (format != PIPE_FORMAT_A8_UNORM && + (util_format_is_alpha(format) || + util_format_is_luminance_alpha(format))) + return false; + + DXGI_FORMAT dxgi_format = d3d12_get_format(format); + if (dxgi_format == DXGI_FORMAT_UNKNOWN) + return false; + + enum D3D12_FORMAT_SUPPORT1 dim_support = D3D12_FORMAT_SUPPORT1_NONE; + switch (target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + dim_support = D3D12_FORMAT_SUPPORT1_TEXTURE1D; + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D_ARRAY: + dim_support = D3D12_FORMAT_SUPPORT1_TEXTURE2D; + break; + case PIPE_TEXTURE_3D: + dim_support = D3D12_FORMAT_SUPPORT1_TEXTURE3D; + break; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + dim_support = D3D12_FORMAT_SUPPORT1_TEXTURECUBE; + break; + case PIPE_BUFFER: + dim_support = D3D12_FORMAT_SUPPORT1_BUFFER; + break; + default: + unreachable("Unknown target"); + } + + D3D12_FEATURE_DATA_FORMAT_SUPPORT fmt_info; + fmt_info.Format = d3d12_get_resource_rt_format(format); + if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, + &fmt_info, sizeof(fmt_info)))) + return false; + + if (!(fmt_info.Support1 & dim_support)) + return false; + + if (target == PIPE_BUFFER) { + if (bind & PIPE_BIND_VERTEX_BUFFER && + !(fmt_info.Support1 & D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER)) + return false; + + if (bind & PIPE_BIND_INDEX_BUFFER && + !(fmt_info.Support1 & D3D12_FORMAT_SUPPORT1_IA_INDEX_BUFFER)) + return false; + + if (sample_count > 0) + return false; + } else { + /* all other targets are texture-targets */ + if (bind & PIPE_BIND_RENDER_TARGET && + !(fmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET)) + return false; + + if (bind & PIPE_BIND_BLENDABLE && + !(fmt_info.Support1 & D3D12_FORMAT_SUPPORT1_BLENDABLE)) + return false; + + D3D12_FEATURE_DATA_FORMAT_SUPPORT fmt_info_sv; + if (util_format_is_depth_or_stencil(format)) { + fmt_info_sv.Format = d3d12_get_resource_srv_format(format, target); + if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, + &fmt_info_sv, sizeof(fmt_info_sv)))) + return false; + } else + fmt_info_sv = fmt_info; + + if (bind & PIPE_BIND_DISPLAY_TARGET && + (!(fmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DISPLAY) || + // Disable formats that don't support flip model + dxgi_format == DXGI_FORMAT_B8G8R8X8_UNORM || + dxgi_format == DXGI_FORMAT_B5G5R5A1_UNORM || + dxgi_format == DXGI_FORMAT_B5G6R5_UNORM || + dxgi_format == DXGI_FORMAT_B4G4R4A4_UNORM)) + return false; + + if (bind & PIPE_BIND_DEPTH_STENCIL && + !(fmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL)) + return false; + + if (sample_count > 0) { + if (!(fmt_info_sv.Support1 & D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD)) + return false; + + if (!util_is_power_of_two_nonzero(sample_count)) + return false; + + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS ms_info = {}; + ms_info.Format = dxgi_format; + ms_info.SampleCount = sample_count; + if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + &ms_info, + sizeof(ms_info))) || + !ms_info.NumQualityLevels) + return false; + } + } + return true; +} + +static void +d3d12_destroy_screen(struct pipe_screen *pscreen) +{ + struct d3d12_screen *screen = d3d12_screen(pscreen); + slab_destroy_parent(&screen->transfer_pool); + screen->slab_bufmgr->destroy(screen->slab_bufmgr); + screen->cache_bufmgr->destroy(screen->cache_bufmgr); + screen->bufmgr->destroy(screen->bufmgr); + FREE(screen); +} + +static void +d3d12_flush_frontbuffer(struct pipe_screen * pscreen, + struct pipe_resource *pres, + unsigned level, unsigned layer, + void *winsys_drawable_handle, + struct pipe_box *sub_box) +{ + struct d3d12_screen *screen = d3d12_screen(pscreen); + struct sw_winsys *winsys = screen->winsys; + struct d3d12_resource *res = d3d12_resource(pres); + ID3D12Resource *d3d12_res = d3d12_resource_resource(res); + + if (!winsys) + return; + + assert(res->dt); + void *map = winsys->displaytarget_map(winsys, res->dt, 0); + + if (map) { + d3d12_res->ReadFromSubresource(map, res->dt_stride, 0, 0, NULL); + winsys->displaytarget_unmap(winsys, res->dt); + } + + ID3D12SharingContract *sharing_contract; + if (SUCCEEDED(screen->cmdqueue->QueryInterface(__uuidof(sharing_contract), + (void **)&sharing_contract))) + sharing_contract->Present(d3d12_res, 0, WindowFromDC((HDC)winsys_drawable_handle)); + + winsys->displaytarget_display(winsys, res->dt, winsys_drawable_handle, sub_box); +} + +static ID3D12Debug * +get_debug_interface() +{ + typedef HRESULT(WINAPI *PFN_D3D12_GET_DEBUG_INTERFACE)(REFIID riid, void **ppFactory); + PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface; + + HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL"); + if (!hD3D12Mod) { + debug_printf("D3D12: failed to load D3D12.DLL\n"); + return NULL; + } + + D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(hD3D12Mod, "D3D12GetDebugInterface"); + if (!D3D12GetDebugInterface) { + debug_printf("D3D12: failed to load D3D12GetDebugInterface from D3D12.DLL\n"); + return NULL; + } + + ID3D12Debug *debug; + if (FAILED(D3D12GetDebugInterface(__uuidof(ID3D12Debug), (void **)&debug))) { + debug_printf("D3D12: D3D12GetDebugInterface failed\n"); + return NULL; + } + + return debug; +} + +static void +enable_d3d12_debug_layer() +{ + ID3D12Debug *debug = get_debug_interface(); + if (debug) + debug->EnableDebugLayer(); +} + +static void +enable_gpu_validation() +{ + ID3D12Debug *debug = get_debug_interface(); + ID3D12Debug3 *debug3; + if (debug && + SUCCEEDED(debug->QueryInterface(__uuidof(debug), (void **)&debug3))) + debug3->SetEnableGPUBasedValidation(true); +} + +static IDXGIFactory4 * +get_dxgi_factory() +{ + static const GUID IID_IDXGIFactory4 = { + 0x1bc6ea02, 0xef36, 0x464f, + { 0xbf, 0x0c, 0x21, 0xca, 0x39, 0xe5, 0x16, 0x8a } + }; + + typedef HRESULT(WINAPI *PFN_CREATE_DXGI_FACTORY)(REFIID riid, void **ppFactory); + PFN_CREATE_DXGI_FACTORY CreateDXGIFactory; + + HMODULE hDXGIMod = LoadLibrary("DXGI.DLL"); + if (!hDXGIMod) { + debug_printf("D3D12: failed to load DXGI.DLL\n"); + return NULL; + } + + CreateDXGIFactory = (PFN_CREATE_DXGI_FACTORY)GetProcAddress(hDXGIMod, "CreateDXGIFactory"); + if (!CreateDXGIFactory) { + debug_printf("D3D12: failed to load CreateDXGIFactory from DXGI.DLL\n"); + return NULL; + } + + IDXGIFactory4 *factory = NULL; + HRESULT hr = CreateDXGIFactory(IID_IDXGIFactory4, (void **)&factory); + if (FAILED(hr)) { + debug_printf("D3D12: CreateDXGIFactory failed: %08x\n", hr); + return NULL; + } + + return factory; +} + +static IDXGIAdapter1 * +choose_adapter(IDXGIFactory4 *factory, LUID *adapter) +{ + IDXGIAdapter1 *ret; + if (adapter) { + if (SUCCEEDED(factory->EnumAdapterByLuid(*adapter, + __uuidof(IDXGIAdapter1), + (void**)&ret))) + return ret; + debug_printf("D3D12: requested adapter missing, falling back to auto-detection...\n"); + } + + bool want_warp = env_var_as_boolean("LIBGL_ALWAYS_SOFTWARE", false); + if (want_warp) { + if (SUCCEEDED(factory->EnumWarpAdapter(__uuidof(IDXGIAdapter1), + (void**)&ret))) + return ret; + debug_printf("D3D12: failed to enum warp adapter\n"); + return NULL; + } + + // The first adapter is the default + if (SUCCEEDED(factory->EnumAdapters1(0, &ret))) + return ret; + + return NULL; +} + +static ID3D12Device * +create_device(IDXGIAdapter1 *adapter) +{ + typedef HRESULT(WINAPI *PFN_D3D12CREATEDEVICE)(IUnknown*, D3D_FEATURE_LEVEL, REFIID, void**); + typedef HRESULT(WINAPI *PFN_D3D12ENABLEEXPERIMENTALFEATURES)(UINT, const IID*, void*, UINT*); + PFN_D3D12CREATEDEVICE D3D12CreateDevice; + PFN_D3D12ENABLEEXPERIMENTALFEATURES D3D12EnableExperimentalFeatures; + + HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL"); + if (!hD3D12Mod) { + debug_printf("D3D12: failed to load D3D12.DLL\n"); + return NULL; + } + + if (d3d12_debug & D3D12_DEBUG_EXPERIMENTAL) { + D3D12EnableExperimentalFeatures = (PFN_D3D12ENABLEEXPERIMENTALFEATURES)GetProcAddress(hD3D12Mod, "D3D12EnableExperimentalFeatures"); + D3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModels, NULL, NULL); + } + + D3D12CreateDevice = (PFN_D3D12CREATEDEVICE)GetProcAddress(hD3D12Mod, "D3D12CreateDevice"); + if (!D3D12CreateDevice) { + debug_printf("D3D12: failed to load D3D12CreateDevice from D3D12.DLL\n"); + return NULL; + } + + ID3D12Device *dev; + if (SUCCEEDED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_11_0, + __uuidof(ID3D12Device), (void **)&dev))) + return dev; + + debug_printf("D3D12: D3D12CreateDevice failed\n"); + return NULL; +} + +static bool +can_attribute_at_vertex(struct d3d12_screen *screen) +{ + switch (screen->adapter_desc.VendorId) { + case HW_VENDOR_MICROSOFT: + return true; + default: + return screen->opts3.BarycentricsSupported; + } +} + +struct pipe_screen * +d3d12_create_screen(struct sw_winsys *winsys, LUID *adapter_luid) +{ + struct d3d12_screen *screen = CALLOC_STRUCT(d3d12_screen); + if (!screen) + return NULL; + + d3d12_debug = debug_get_option_d3d12_debug(); + + screen->winsys = winsys; + + screen->base.get_name = d3d12_get_name; + screen->base.get_vendor = d3d12_get_vendor; + screen->base.get_device_vendor = d3d12_get_device_vendor; + screen->base.get_param = d3d12_get_param; + screen->base.get_paramf = d3d12_get_paramf; + screen->base.get_shader_param = d3d12_get_shader_param; + screen->base.is_format_supported = d3d12_is_format_supported; + screen->base.get_compiler_options = d3d12_get_compiler_options; + screen->base.context_create = d3d12_context_create; + screen->base.flush_frontbuffer = d3d12_flush_frontbuffer; + screen->base.destroy = d3d12_destroy_screen; + +#ifndef DEBUG + if (d3d12_debug & D3D12_DEBUG_DEBUG_LAYER) +#endif + enable_d3d12_debug_layer(); + + if (d3d12_debug & D3D12_DEBUG_GPU_VALIDATOR) + enable_gpu_validation(); + + screen->factory = get_dxgi_factory(); + if (!screen->factory) { + debug_printf("D3D12: failed to create DXGI factory\n"); + goto failed; + } + + screen->adapter = choose_adapter(screen->factory, adapter_luid); + if (!screen->adapter) { + debug_printf("D3D12: no suitable adapter\n"); + return NULL; + } + + if (FAILED(screen->adapter->GetDesc1(&screen->adapter_desc))) { + debug_printf("D3D12: failed to retrieve adapter description\n"); + return NULL; + } + + screen->dev = create_device(screen->adapter); + if (!screen->dev) { + debug_printf("D3D12: failed to create device\n"); + goto failed; + } + + ID3D12InfoQueue *info_queue; + if (SUCCEEDED(screen->dev->QueryInterface(__uuidof(info_queue), + (void **)&info_queue))) { + D3D12_MESSAGE_SEVERITY severities[] = { + D3D12_MESSAGE_SEVERITY_INFO, + D3D12_MESSAGE_SEVERITY_WARNING, + }; + + D3D12_MESSAGE_ID msg_ids[] = { + D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, + }; + + D3D12_INFO_QUEUE_FILTER NewFilter = {}; + NewFilter.DenyList.NumSeverities = ARRAY_SIZE(severities); + NewFilter.DenyList.pSeverityList = severities; + NewFilter.DenyList.NumIDs = ARRAY_SIZE(msg_ids); + NewFilter.DenyList.pIDList = msg_ids; + + info_queue->PushStorageFilter(&NewFilter); + } + + if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, + &screen->opts, + sizeof(screen->opts)))) { + debug_printf("D3D12: failed to get device options\n"); + goto failed; + } + if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, + &screen->opts2, + sizeof(screen->opts2)))) { + debug_printf("D3D12: failed to get device options\n"); + goto failed; + } + if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, + &screen->opts3, + sizeof(screen->opts3)))) { + debug_printf("D3D12: failed to get device options\n"); + goto failed; + } + if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS4, + &screen->opts4, + sizeof(screen->opts4)))) { + debug_printf("D3D12: failed to get device options\n"); + goto failed; + } + + screen->architecture.NodeIndex = 0; + if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE, + &screen->architecture, + sizeof(screen->architecture)))) { + debug_printf("D3D12: failed to get device architecture\n"); + goto failed; + } + + D3D12_FEATURE_DATA_FEATURE_LEVELS feature_levels; + static const D3D_FEATURE_LEVEL levels[] = { + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_12_0, + D3D_FEATURE_LEVEL_12_1, + }; + feature_levels.NumFeatureLevels = ARRAY_SIZE(levels); + feature_levels.pFeatureLevelsRequested = levels; + if (FAILED(screen->dev->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS, + &feature_levels, + sizeof(feature_levels)))) { + debug_printf("D3D12: failed to get device feature levels\n"); + goto failed; + } + screen->max_feature_level = feature_levels.MaxSupportedFeatureLevel; + + D3D12_COMMAND_QUEUE_DESC queue_desc; + queue_desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + queue_desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL; + queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + queue_desc.NodeMask = 0; + if (FAILED(screen->dev->CreateCommandQueue(&queue_desc, + __uuidof(screen->cmdqueue), + (void **)&screen->cmdqueue))) + goto failed; + + UINT64 timestamp_freq; + if (FAILED(screen->cmdqueue->GetTimestampFrequency(×tamp_freq))) + timestamp_freq = 10000000; + screen->timestamp_multiplier = 1000000000.0 / timestamp_freq; + + d3d12_screen_fence_init(&screen->base); + d3d12_screen_resource_init(&screen->base); + slab_create_parent(&screen->transfer_pool, sizeof(struct d3d12_transfer), 16); + + struct pb_desc desc; + desc.alignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; + desc.usage = (pb_usage_flags)PB_USAGE_ALL; + + screen->bufmgr = d3d12_bufmgr_create(screen); + screen->cache_bufmgr = pb_cache_manager_create(screen->bufmgr, 0xfffff, 2, 0, 64 * 1024 * 1024); + screen->slab_bufmgr = pb_slab_range_manager_create(screen->cache_bufmgr, 16, 512, + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + &desc); + + screen->have_load_at_vertex = can_attribute_at_vertex(screen); + return &screen->base; + +failed: + FREE(screen); + return NULL; +} diff --git a/src/gallium/drivers/d3d12/d3d12_screen.h b/src/gallium/drivers/d3d12/d3d12_screen.h new file mode 100644 index 00000000000..837093987d5 --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_screen.h @@ -0,0 +1,71 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_SCREEN_H +#define D3D12_SCREEN_H + +#include "pipe/p_screen.h" + +#include "util/slab.h" + +#define D3D12_IGNORE_SDK_LAYERS +#include +#include + +struct pb_manager; + +struct d3d12_screen { + struct pipe_screen base; + struct sw_winsys *winsys; + + IDXGIFactory4 *factory; + IDXGIAdapter1 *adapter; + ID3D12Device *dev; + ID3D12CommandQueue *cmdqueue; + + struct slab_parent_pool transfer_pool; + struct pb_manager *bufmgr; + struct pb_manager *cache_bufmgr; + struct pb_manager *slab_bufmgr; + + /* capabilities */ + D3D_FEATURE_LEVEL max_feature_level; + D3D12_FEATURE_DATA_ARCHITECTURE architecture; + D3D12_FEATURE_DATA_D3D12_OPTIONS opts; + D3D12_FEATURE_DATA_D3D12_OPTIONS2 opts2; + D3D12_FEATURE_DATA_D3D12_OPTIONS3 opts3; + D3D12_FEATURE_DATA_D3D12_OPTIONS4 opts4; + + /* description */ + DXGI_ADAPTER_DESC1 adapter_desc; + double timestamp_multiplier; + bool have_load_at_vertex; +}; + +static inline struct d3d12_screen * +d3d12_screen(struct pipe_screen *pipe) +{ + return (struct d3d12_screen *)pipe; +} + +#endif diff --git a/src/gallium/drivers/d3d12/d3d12_surface.cpp b/src/gallium/drivers/d3d12/d3d12_surface.cpp new file mode 100644 index 00000000000..9ddbb7e59dc --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_surface.cpp @@ -0,0 +1,365 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "d3d12_context.h" +#include "d3d12_format.h" +#include "d3d12_resource.h" +#include "d3d12_screen.h" +#include "d3d12_surface.h" + +#include "util/format/u_format.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" + +static D3D12_DSV_DIMENSION +view_dsv_dimension(enum pipe_texture_target target, unsigned samples) +{ + switch (target) { + case PIPE_TEXTURE_1D: return D3D12_DSV_DIMENSION_TEXTURE1D; + case PIPE_TEXTURE_1D_ARRAY: return D3D12_DSV_DIMENSION_TEXTURE1DARRAY; + + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + return samples > 1 ? D3D12_DSV_DIMENSION_TEXTURE2DMS : + D3D12_DSV_DIMENSION_TEXTURE2D; + + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE: + return samples > 1 ? D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY : + D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + + default: + unreachable("unexpected target"); + } +} + +static D3D12_RTV_DIMENSION +view_rtv_dimension(enum pipe_texture_target target, unsigned samples) +{ + switch (target) { + case PIPE_BUFFER: return D3D12_RTV_DIMENSION_BUFFER; + case PIPE_TEXTURE_1D: return D3D12_RTV_DIMENSION_TEXTURE1D; + case PIPE_TEXTURE_1D_ARRAY: return D3D12_RTV_DIMENSION_TEXTURE1DARRAY; + + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + return samples > 1 ? D3D12_RTV_DIMENSION_TEXTURE2DMS : + D3D12_RTV_DIMENSION_TEXTURE2D; + + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_CUBE: + return samples > 1 ? D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY : + D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + + case PIPE_TEXTURE_3D: return D3D12_RTV_DIMENSION_TEXTURE3D; + + default: + unreachable("unexpected target"); + } +} + +static void +initialize_dsv(struct pipe_context *pctx, + struct pipe_resource *pres, + const struct pipe_surface *tpl, + struct d3d12_descriptor_handle *handle, + DXGI_FORMAT dxgi_format) +{ + struct d3d12_resource *res = d3d12_resource(pres); + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_screen *screen = d3d12_screen(pctx->screen); + + D3D12_DEPTH_STENCIL_VIEW_DESC desc; + desc.Format = dxgi_format; + desc.Flags = D3D12_DSV_FLAG_NONE; + + desc.ViewDimension = view_dsv_dimension(pres->target, pres->nr_samples); + switch (desc.ViewDimension) { + case D3D12_DSV_DIMENSION_TEXTURE1D: + if (tpl->u.tex.first_layer > 0) + debug_printf("D3D12: can't create 1D DSV from layer %d\n", + tpl->u.tex.first_layer); + + desc.Texture1D.MipSlice = tpl->u.tex.level; + break; + + case D3D12_DSV_DIMENSION_TEXTURE1DARRAY: + desc.Texture1DArray.MipSlice = tpl->u.tex.level; + desc.Texture1DArray.FirstArraySlice = tpl->u.tex.first_layer; + desc.Texture1DArray.ArraySize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1; + break; + + case D3D12_DSV_DIMENSION_TEXTURE2DMS: + if (tpl->u.tex.first_layer > 0) + debug_printf("D3D12: can't create 2DMS DSV from layer %d\n", + tpl->u.tex.first_layer); + + break; + + case D3D12_DSV_DIMENSION_TEXTURE2D: + if (tpl->u.tex.first_layer > 0) + debug_printf("D3D12: can't create 2D DSV from layer %d\n", + tpl->u.tex.first_layer); + + desc.Texture2D.MipSlice = tpl->u.tex.level; + break; + + case D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY: + desc.Texture2DMSArray.FirstArraySlice = tpl->u.tex.first_layer; + desc.Texture2DMSArray.ArraySize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1; + break; + + case D3D12_DSV_DIMENSION_TEXTURE2DARRAY: + desc.Texture2DArray.MipSlice = tpl->u.tex.level; + desc.Texture2DArray.FirstArraySlice = tpl->u.tex.first_layer; + desc.Texture2DArray.ArraySize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1; + break; + } + + d3d12_descriptor_pool_alloc_handle(ctx->dsv_pool, handle); + screen->dev->CreateDepthStencilView(d3d12_resource_resource(res), &desc, + handle->cpu_handle); +} + +static void +initialize_rtv(struct pipe_context *pctx, + struct pipe_resource *pres, + const struct pipe_surface *tpl, + struct d3d12_descriptor_handle *handle, + DXGI_FORMAT dxgi_format) +{ + struct d3d12_resource *res = d3d12_resource(pres); + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_screen *screen = d3d12_screen(pctx->screen); + + D3D12_RENDER_TARGET_VIEW_DESC desc; + desc.Format = dxgi_format; + + desc.ViewDimension = view_rtv_dimension(pres->target, pres->nr_samples); + switch (desc.ViewDimension) { + case D3D12_RTV_DIMENSION_BUFFER: + desc.Buffer.FirstElement = 0; + desc.Buffer.NumElements = pres->width0 / util_format_get_blocksize(tpl->format); + break; + + case D3D12_RTV_DIMENSION_TEXTURE1D: + if (tpl->u.tex.first_layer > 0) + debug_printf("D3D12: can't create 1D RTV from layer %d\n", + tpl->u.tex.first_layer); + + desc.Texture1D.MipSlice = tpl->u.tex.level; + break; + + case D3D12_RTV_DIMENSION_TEXTURE1DARRAY: + desc.Texture1DArray.MipSlice = tpl->u.tex.level; + desc.Texture1DArray.FirstArraySlice = tpl->u.tex.first_layer; + desc.Texture1DArray.ArraySize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1; + break; + + case D3D12_RTV_DIMENSION_TEXTURE2DMS: + if (tpl->u.tex.first_layer > 0) + debug_printf("D3D12: can't create 2DMS RTV from layer %d\n", + tpl->u.tex.first_layer); + break; + + case D3D12_RTV_DIMENSION_TEXTURE2D: + if (tpl->u.tex.first_layer > 0) + debug_printf("D3D12: can't create 2D RTV from layer %d\n", + tpl->u.tex.first_layer); + + desc.Texture2D.MipSlice = tpl->u.tex.level; + desc.Texture2D.PlaneSlice = 0; + break; + + case D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY: + desc.Texture2DMSArray.FirstArraySlice = tpl->u.tex.first_layer; + desc.Texture2DMSArray.ArraySize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1; + break; + + case D3D12_RTV_DIMENSION_TEXTURE2DARRAY: + desc.Texture2DArray.MipSlice = tpl->u.tex.level; + desc.Texture2DArray.FirstArraySlice = tpl->u.tex.first_layer; + desc.Texture2DArray.ArraySize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1; + desc.Texture2DArray.PlaneSlice = 0; + break; + + case D3D12_RTV_DIMENSION_TEXTURE3D: + desc.Texture3D.MipSlice = tpl->u.tex.level; + desc.Texture3D.FirstWSlice = tpl->u.tex.first_layer; + desc.Texture3D.WSize = tpl->u.tex.last_layer - tpl->u.tex.first_layer + 1; + break; + } + + d3d12_descriptor_pool_alloc_handle(ctx->rtv_pool, handle); + screen->dev->CreateRenderTargetView(d3d12_resource_resource(res), &desc, + handle->cpu_handle); +} + +static struct pipe_surface * +d3d12_create_surface(struct pipe_context *pctx, + struct pipe_resource *pres, + const struct pipe_surface *tpl) +{ + struct d3d12_resource *res = d3d12_resource(pres); + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_screen *screen = d3d12_screen(pctx->screen); + + bool is_depth_or_stencil = util_format_is_depth_or_stencil(tpl->format); + unsigned bind = is_depth_or_stencil ? PIPE_BIND_DEPTH_STENCIL : PIPE_BIND_RENDER_TARGET; + + /* Don't bother if we don't support the requested format as RT or DS */ + if (!pctx->screen->is_format_supported(pctx->screen, tpl->format, PIPE_TEXTURE_2D, + tpl->nr_samples, tpl->nr_samples,bind)) + return NULL; + + struct d3d12_surface *surface = CALLOC_STRUCT(d3d12_surface); + if (!surface) + return NULL; + + pipe_resource_reference(&surface->base.texture, pres); + pipe_reference_init(&surface->base.reference, 1); + surface->base.context = pctx; + surface->base.format = tpl->format; + surface->base.width = u_minify(pres->width0, tpl->u.tex.level); + surface->base.height = u_minify(pres->height0, tpl->u.tex.level); + surface->base.u.tex.level = tpl->u.tex.level; + surface->base.u.tex.first_layer = tpl->u.tex.first_layer; + surface->base.u.tex.last_layer = tpl->u.tex.last_layer; + + DXGI_FORMAT dxgi_format = d3d12_get_resource_rt_format(tpl->format); + if (is_depth_or_stencil) + initialize_dsv(pctx, pres, tpl, &surface->desc_handle, dxgi_format); + else + initialize_rtv(pctx, pres, tpl, &surface->desc_handle, dxgi_format); + + return &surface->base; +} + +static void +d3d12_surface_destroy(struct pipe_context *pctx, + struct pipe_surface *psurf) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_surface *surface = (struct d3d12_surface*) psurf; + + d3d12_descriptor_handle_free(&surface->desc_handle); + if (d3d12_descriptor_handle_is_allocated(&surface->uint_rtv_handle)) + d3d12_descriptor_handle_free(&surface->uint_rtv_handle); + pipe_resource_reference(&psurf->texture, NULL); + pipe_resource_reference(&surface->rgba_texture, NULL); + FREE(surface); +} + +static void +blit_surface(struct d3d12_surface *surface, bool pre) +{ + struct pipe_blit_info info = {0}; + + info.src.resource = pre ? surface->base.texture : surface->rgba_texture; + info.dst.resource = pre ? surface->rgba_texture : surface->base.texture; + info.src.format = pre ? surface->base.texture->format : PIPE_FORMAT_R8G8B8A8_UNORM; + info.dst.format = pre ? PIPE_FORMAT_R8G8B8A8_UNORM : surface->base.texture->format; + info.src.level = info.dst.level = 0; + info.src.box.x = info.dst.box.x = 0; + info.src.box.y = info.dst.box.y = 0; + info.src.box.z = info.dst.box.z = 0; + info.src.box.width = info.dst.box.width = surface->base.width; + info.src.box.height = info.dst.box.height = surface->base.height; + info.src.box.depth = info.dst.box.depth = 0; + info.mask = PIPE_MASK_RGBA; + + d3d12_blit(surface->base.context, &info); +} + +enum d3d12_surface_conversion_mode +d3d12_surface_update_pre_draw(struct d3d12_surface *surface, + DXGI_FORMAT format) +{ + struct d3d12_screen *screen = d3d12_screen(surface->base.context->screen); + struct d3d12_resource *res = d3d12_resource(surface->base.texture); + DXGI_FORMAT dxgi_format = d3d12_get_resource_rt_format(surface->base.format); + enum d3d12_surface_conversion_mode mode; + + if (dxgi_format == format) + return D3D12_SURFACE_CONVERSION_NONE; + + if (dxgi_format == DXGI_FORMAT_B8G8R8A8_UNORM || + dxgi_format == DXGI_FORMAT_B8G8R8X8_UNORM) + mode = D3D12_SURFACE_CONVERSION_BGRA_UINT; + else + mode = D3D12_SURFACE_CONVERSION_RGBA_UINT; + + if (mode == D3D12_SURFACE_CONVERSION_BGRA_UINT) { + if (!surface->rgba_texture) { + struct pipe_resource templ = {{0}}; + struct pipe_resource *src = surface->base.texture; + + templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; + templ.width0 = src->width0; + templ.height0 = src->height0; + templ.depth0 = src->depth0; + templ.array_size = src->array_size; + templ.nr_samples = src->nr_samples; + templ.nr_storage_samples = src->nr_storage_samples; + templ.usage = PIPE_USAGE_DEFAULT | PIPE_USAGE_STAGING; + templ.bind = src->bind; + templ.target = src->target; + + surface->rgba_texture = screen->base.resource_create(&screen->base, &templ); + } + + blit_surface(surface, true); + res = d3d12_resource(surface->rgba_texture); + } + + if (!d3d12_descriptor_handle_is_allocated(&surface->uint_rtv_handle)) { + initialize_rtv(surface->base.context, &res->base, &surface->base, + &surface->uint_rtv_handle, DXGI_FORMAT_R8G8B8A8_UINT); + } + + return mode; +} + +void +d3d12_surface_update_post_draw(struct d3d12_surface *surface, + enum d3d12_surface_conversion_mode mode) +{ + if (mode == D3D12_SURFACE_CONVERSION_BGRA_UINT) + blit_surface(surface, false); +} + +D3D12_CPU_DESCRIPTOR_HANDLE +d3d12_surface_get_handle(struct d3d12_surface *surface, + enum d3d12_surface_conversion_mode mode) +{ + if (mode != D3D12_SURFACE_CONVERSION_NONE) + return surface->uint_rtv_handle.cpu_handle; + return surface->desc_handle.cpu_handle; +} + +void +d3d12_context_surface_init(struct pipe_context *context) +{ + context->create_surface = d3d12_create_surface; + context->surface_destroy = d3d12_surface_destroy; +} diff --git a/src/gallium/drivers/d3d12/d3d12_surface.h b/src/gallium/drivers/d3d12/d3d12_surface.h new file mode 100644 index 00000000000..c21933a5e5d --- /dev/null +++ b/src/gallium/drivers/d3d12/d3d12_surface.h @@ -0,0 +1,69 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef D3D12_SURFACE_H +#define D3D12_SURFACE_H + +#include "pipe/p_state.h" + +#include + +struct d3d12_descriptor_handle; +struct pipe_context; + +struct d3d12_surface { + struct pipe_surface base; + struct d3d12_descriptor_handle uint_rtv_handle; + struct pipe_resource *rgba_texture; + + struct d3d12_descriptor_handle desc_handle; +}; + +enum d3d12_surface_conversion_mode { + D3D12_SURFACE_CONVERSION_NONE, + D3D12_SURFACE_CONVERSION_RGBA_UINT, + D3D12_SURFACE_CONVERSION_BGRA_UINT, +}; + +static inline struct d3d12_surface * +d3d12_surface(struct pipe_surface *psurf) +{ + return (struct d3d12_surface *)psurf; +} + +enum d3d12_surface_conversion_mode +d3d12_surface_update_pre_draw(struct d3d12_surface *surface, + DXGI_FORMAT format); + +void +d3d12_surface_update_post_draw(struct d3d12_surface *surface, + enum d3d12_surface_conversion_mode mode); + +D3D12_CPU_DESCRIPTOR_HANDLE +d3d12_surface_get_handle(struct d3d12_surface *surface, + enum d3d12_surface_conversion_mode mode); + +void +d3d12_context_surface_init(struct pipe_context *context); + +#endif diff --git a/src/gallium/drivers/d3d12/meson.build b/src/gallium/drivers/d3d12/meson.build new file mode 100644 index 00000000000..dc2ae22aa45 --- /dev/null +++ b/src/gallium/drivers/d3d12/meson.build @@ -0,0 +1,57 @@ +# Copyright © Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +files_libd3d12 = files( + 'd3d12_batch.cpp', + 'd3d12_blit.cpp', + 'd3d12_bufmgr.cpp', + 'd3d12_compiler.cpp', + 'd3d12_context.cpp', + 'd3d12_descriptor_pool.cpp', + 'd3d12_draw.cpp', + 'd3d12_fence.cpp', + 'd3d12_format.c', + 'd3d12_gs_variant.cpp', + 'd3d12_lower_int_cubemap_to_array.c', + 'd3d12_lower_point_sprite.c', + 'd3d12_nir_lower_texcmp.c', + 'd3d12_nir_lower_vs_vertex_conversion.c', + 'd3d12_nir_passes.c', + 'd3d12_pipeline_state.cpp', + 'd3d12_query.cpp', + 'd3d12_resource.cpp', + 'd3d12_root_signature.cpp', + 'd3d12_screen.cpp', + 'd3d12_surface.cpp', +) + +libd3d12 = static_library( + 'd3d12', + files_libd3d12, + gnu_symbol_visibility : 'hidden', + include_directories : [inc_include, inc_src, inc_mesa, inc_gallium, inc_gallium_aux], + dependencies: [idep_nir_headers, idep_libdxil_compiler, idep_libd3d12_resource_state], +) + +driver_d3d12 = declare_dependency( + compile_args : '-DGALLIUM_D3D12', + link_with : [libd3d12], +) diff --git a/src/gallium/meson.build b/src/gallium/meson.build index 7d3b6c3230d..ae0e7fcb3c3 100644 --- a/src/gallium/meson.build +++ b/src/gallium/meson.build @@ -156,13 +156,16 @@ if with_gallium_lima else driver_lima = declare_dependency() endif - if with_gallium_zink subdir('drivers/zink') else driver_zink = declare_dependency() endif - +if with_gallium_d3d12 + subdir('drivers/d3d12') +else + driver_d3d12 = declare_dependency() +endif if with_gallium_opencl # TODO: this isn't really clover specific, but ATM clover is the only # consumer diff --git a/src/gallium/targets/graw-gdi/meson.build b/src/gallium/targets/graw-gdi/meson.build index 782ffeaf16e..5ef0c9f137f 100644 --- a/src/gallium/targets/graw-gdi/meson.build +++ b/src/gallium/targets/graw-gdi/meson.build @@ -31,7 +31,7 @@ libgraw_gdi = shared_library( libgraw_util, libgallium, libwsgdi, ], dependencies : [ - dep_ws2_32, idep_mesautil, driver_swrast, + dep_ws2_32, idep_mesautil, driver_swrast, driver_d3d12 ], name_prefix : host_machine.system() == 'windows' ? '' : 'lib', # otherwise mingw will create libgraw.dll ) diff --git a/src/gallium/targets/libgl-gdi/libgl_gdi.c b/src/gallium/targets/libgl-gdi/libgl_gdi.c index 5f4fbacc3c5..00b9de0f94f 100644 --- a/src/gallium/targets/libgl-gdi/libgl_gdi.c +++ b/src/gallium/targets/libgl-gdi/libgl_gdi.c @@ -56,6 +56,9 @@ #ifdef GALLIUM_SWR #include "swr/swr_public.h" #endif +#ifdef GALLIUM_D3D12 +#include "d3d12/d3d12_public.h" +#endif #ifdef GALLIUM_LLVMPIPE static boolean use_llvmpipe = FALSE; @@ -63,6 +66,9 @@ static boolean use_llvmpipe = FALSE; #ifdef GALLIUM_SWR static boolean use_swr = FALSE; #endif +#ifdef GALLIUM_D3D12 +static boolean use_d3d12 = FALSE; +#endif static struct pipe_screen * gdi_screen_create(void) @@ -101,6 +107,13 @@ gdi_screen_create(void) if (screen) use_swr = TRUE; } +#endif +#ifdef GALLIUM_D3D12 + if (strcmp(driver, "d3d12") == 0) { + screen = d3d12_create_screen( winsys, NULL ); + if (screen) + use_d3d12 = TRUE; + } #endif (void) driver; @@ -154,6 +167,13 @@ gdi_present(struct pipe_screen *screen, } #endif +#ifdef GALLIUM_D3D12 + if (use_d3d12) { + screen->flush_frontbuffer(screen, res, 0, 0, hDC, NULL); + return; + } +#endif + #ifdef GALLIUM_SOFTPIPE winsys = softpipe_screen(screen)->winsys, dt = softpipe_resource(res)->dt, diff --git a/src/gallium/targets/libgl-gdi/meson.build b/src/gallium/targets/libgl-gdi/meson.build index d7539442df4..50a308bcf50 100644 --- a/src/gallium/targets/libgl-gdi/meson.build +++ b/src/gallium/targets/libgl-gdi/meson.build @@ -39,6 +39,7 @@ libopengl32 = shared_library( ], dependencies : [ dep_ws2_32, idep_nir, idep_mesautil, driver_swrast, driver_swr, + driver_d3d12 ], name_prefix : '', # otherwise mingw will create libopengl32.dll install : true, diff --git a/src/meson.build b/src/meson.build index 895d6277dc2..b5f4933abca 100644 --- a/src/meson.build +++ b/src/meson.build @@ -91,6 +91,9 @@ endif if with_any_intel subdir('intel') endif +if with_gallium_d3d12 + subdir('microsoft') +endif subdir('mesa') subdir('loader') if with_platform_haiku