/* * Copyright © Microsoft Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "nir_to_dxil.h" #include "dxil_container.h" #include "dxil_dump.h" #include "dxil_enums.h" #include "dxil_function.h" #include "dxil_module.h" #include "dxil_nir.h" #include "dxil_signature.h" #include "nir/nir_builder.h" #include "util/u_debug.h" #include "util/u_dynarray.h" #include "util/u_math.h" #include "git_sha1.h" #include "vulkan/vulkan_core.h" #include int debug_dxil = 0; static const struct debug_named_value dxil_debug_options[] = { { "verbose", DXIL_DEBUG_VERBOSE, NULL }, { "dump_blob", DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" }, { "trace", DXIL_DEBUG_TRACE , "Trace instruction conversion" }, { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"}, DEBUG_NAMED_VALUE_END }; DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0) #define NIR_INSTR_UNSUPPORTED(instr) \ if (debug_dxil & DXIL_DEBUG_VERBOSE) \ do { \ fprintf(stderr, "Unsupported instruction:"); \ nir_print_instr(instr, stderr); \ fprintf(stderr, "\n"); \ } while (0) #define TRACE_CONVERSION(instr) \ if (debug_dxil & DXIL_DEBUG_TRACE) \ do { \ fprintf(stderr, "Convert '"); \ nir_print_instr(instr, stderr); \ fprintf(stderr, "'\n"); \ } while (0) static const nir_shader_compiler_options nir_options = { .lower_ineg = true, .lower_fneg = true, .lower_ffma16 = true, .lower_ffma32 = true, .lower_isign = true, .lower_fsign = true, .lower_iabs = true, .lower_fmod = true, .lower_fpow = true, .lower_scmp = true, .lower_ldexp = true, .lower_flrp16 = true, .lower_flrp32 = true, .lower_flrp64 = true, .lower_bitfield_extract = true, .lower_find_msb_to_reverse = true, .lower_extract_word = true, .lower_extract_byte = true, .lower_insert_word = true, .lower_insert_byte = true, .lower_all_io_to_elements = true, .lower_all_io_to_temps = true, .lower_hadd = true, .lower_uadd_sat = true, .lower_usub_sat = true, .lower_iadd_sat = true, .lower_uadd_carry = true, .lower_mul_high = true, .lower_rotate = true, .lower_pack_64_2x32_split = true, .lower_pack_32_2x16_split = true, .lower_unpack_64_2x32_split = true, .lower_unpack_32_2x16_split = true, .lower_unpack_half_2x16 = true, .lower_unpack_snorm_2x16 = true, .lower_unpack_snorm_4x8 = true, .lower_unpack_unorm_2x16 = true, .lower_unpack_unorm_4x8 = true, .lower_interpolate_at = true, .has_fsub = true, .has_isub = true, .use_scoped_barrier = true, .vertex_id_zero_based = true, .lower_base_vertex = true, .lower_helper_invocation = true, .has_cs_global_id = true, .has_txs = true, .lower_mul_2x32_64 = true, .lower_doubles_options = nir_lower_drcp | nir_lower_dsqrt | nir_lower_drsq | nir_lower_dfract | nir_lower_dtrunc | nir_lower_dfloor | nir_lower_dceil | nir_lower_dround_even, .max_unroll_iterations = 32, /* arbitrary */ .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), }; const nir_shader_compiler_options* dxil_get_nir_compiler_options(void) { return &nir_options; } static bool emit_llvm_ident(struct dxil_module *m) { const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1); if (!compiler) return false; const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1); return llvm_ident && dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1); } static bool emit_named_version(struct dxil_module *m, const char *name, int major, int minor) { const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major); const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor); const struct dxil_mdnode *version_nodes[] = { major_node, minor_node }; const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes, ARRAY_SIZE(version_nodes)); return dxil_add_metadata_named_node(m, name, &version, 1); } static const char * get_shader_kind_str(enum dxil_shader_kind kind) { switch (kind) { case DXIL_PIXEL_SHADER: return "ps"; case DXIL_VERTEX_SHADER: return "vs"; case DXIL_GEOMETRY_SHADER: return "gs"; case DXIL_HULL_SHADER: return "hs"; case DXIL_DOMAIN_SHADER: return "ds"; case DXIL_COMPUTE_SHADER: return "cs"; default: unreachable("invalid shader kind"); } } static bool emit_dx_shader_model(struct dxil_module *m) { const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind)); const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version); const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version); const struct dxil_mdnode *shader_model[] = { type_node, major_node, minor_node }; const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model)); return dxil_add_metadata_named_node(m, "dx.shaderModel", &dx_shader_model, 1); } enum { DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0, DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1 }; enum dxil_intr { DXIL_INTR_LOAD_INPUT = 4, DXIL_INTR_STORE_OUTPUT = 5, DXIL_INTR_FABS = 6, DXIL_INTR_SATURATE = 7, DXIL_INTR_ISFINITE = 10, DXIL_INTR_ISNORMAL = 11, DXIL_INTR_FCOS = 12, DXIL_INTR_FSIN = 13, DXIL_INTR_FEXP2 = 21, DXIL_INTR_FRC = 22, DXIL_INTR_FLOG2 = 23, DXIL_INTR_SQRT = 24, DXIL_INTR_RSQRT = 25, DXIL_INTR_ROUND_NE = 26, DXIL_INTR_ROUND_NI = 27, DXIL_INTR_ROUND_PI = 28, DXIL_INTR_ROUND_Z = 29, DXIL_INTR_BFREV = 30, DXIL_INTR_COUNTBITS = 31, DXIL_INTR_FIRSTBIT_LO = 32, DXIL_INTR_FIRSTBIT_HI = 33, DXIL_INTR_FIRSTBIT_SHI = 34, DXIL_INTR_FMAX = 35, DXIL_INTR_FMIN = 36, DXIL_INTR_IMAX = 37, DXIL_INTR_IMIN = 38, DXIL_INTR_UMAX = 39, DXIL_INTR_UMIN = 40, DXIL_INTR_FMA = 47, DXIL_INTR_IBFE = 51, DXIL_INTR_UBFE = 52, DXIL_INTR_BFI = 53, DXIL_INTR_CREATE_HANDLE = 57, DXIL_INTR_CBUFFER_LOAD_LEGACY = 59, DXIL_INTR_SAMPLE = 60, DXIL_INTR_SAMPLE_BIAS = 61, DXIL_INTR_SAMPLE_LEVEL = 62, DXIL_INTR_SAMPLE_GRAD = 63, DXIL_INTR_SAMPLE_CMP = 64, DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65, DXIL_INTR_TEXTURE_LOAD = 66, DXIL_INTR_TEXTURE_STORE = 67, DXIL_INTR_BUFFER_LOAD = 68, DXIL_INTR_BUFFER_STORE = 69, DXIL_INTR_TEXTURE_SIZE = 72, DXIL_INTR_TEXTURE_GATHER = 73, DXIL_INTR_TEXTURE_GATHER_CMP = 74, DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77, DXIL_INTR_ATOMIC_BINOP = 78, DXIL_INTR_ATOMIC_CMPXCHG = 79, DXIL_INTR_BARRIER = 80, DXIL_INTR_TEXTURE_LOD = 81, DXIL_INTR_DISCARD = 82, DXIL_INTR_DDX_COARSE = 83, DXIL_INTR_DDY_COARSE = 84, DXIL_INTR_DDX_FINE = 85, DXIL_INTR_DDY_FINE = 86, DXIL_INTR_EVAL_SNAPPED = 87, DXIL_INTR_EVAL_SAMPLE_INDEX = 88, DXIL_INTR_EVAL_CENTROID = 89, DXIL_INTR_SAMPLE_INDEX = 90, DXIL_INTR_COVERAGE = 91, DXIL_INTR_THREAD_ID = 93, DXIL_INTR_GROUP_ID = 94, DXIL_INTR_THREAD_ID_IN_GROUP = 95, DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96, DXIL_INTR_EMIT_STREAM = 97, DXIL_INTR_CUT_STREAM = 98, DXIL_INTR_GS_INSTANCE_ID = 100, DXIL_INTR_MAKE_DOUBLE = 101, DXIL_INTR_SPLIT_DOUBLE = 102, DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103, DXIL_INTR_LOAD_PATCH_CONSTANT = 104, DXIL_INTR_DOMAIN_LOCATION = 105, DXIL_INTR_STORE_PATCH_CONSTANT = 106, DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107, DXIL_INTR_PRIMITIVE_ID = 108, DXIL_INTR_LEGACY_F32TOF16 = 130, DXIL_INTR_LEGACY_F16TOF32 = 131, DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137, }; enum dxil_atomic_op { DXIL_ATOMIC_ADD = 0, DXIL_ATOMIC_AND = 1, DXIL_ATOMIC_OR = 2, DXIL_ATOMIC_XOR = 3, DXIL_ATOMIC_IMIN = 4, DXIL_ATOMIC_IMAX = 5, DXIL_ATOMIC_UMIN = 6, DXIL_ATOMIC_UMAX = 7, DXIL_ATOMIC_EXCHANGE = 8, }; typedef struct { unsigned id; unsigned binding; unsigned size; unsigned space; } resource_array_layout; static void fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields, const struct dxil_type *struct_type, const char *name, const resource_array_layout *layout) { const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type); const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type); fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound fields[5] = dxil_get_metadata_int32(m, layout->size); // range size } static const struct dxil_mdnode * emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type, const char *name, const resource_array_layout *layout, enum dxil_component_type comp_type, enum dxil_resource_kind res_kind) { const struct dxil_mdnode *fields[9]; const struct dxil_mdnode *metadata_tag_nodes[2]; fill_resource_metadata(m, fields, elem_type, name, layout); fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape fields[7] = dxil_get_metadata_int1(m, 0); // sample count if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER && res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) { metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG); metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type); fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER) fields[8] = NULL; else unreachable("Structured buffers not supported yet"); return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); } static const struct dxil_mdnode * emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type, const char *name, const resource_array_layout *layout, enum dxil_component_type comp_type, enum dxil_resource_kind res_kind) { const struct dxil_mdnode *fields[11]; const struct dxil_mdnode *metadata_tag_nodes[2]; fill_resource_metadata(m, fields, struct_type, name, layout); fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape fields[7] = dxil_get_metadata_int1(m, false); // globally-coherent fields[8] = dxil_get_metadata_int1(m, false); // has counter fields[9] = dxil_get_metadata_int1(m, false); // is ROV if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER && res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) { metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG); metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type); fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER) fields[10] = NULL; else unreachable("Structured buffers not supported yet"); return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); } static const struct dxil_mdnode * emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type, const char *name, const resource_array_layout *layout, unsigned size) { const struct dxil_mdnode *fields[8]; fill_resource_metadata(m, fields, struct_type, name, layout); fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size fields[7] = NULL; // metadata return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); } static const struct dxil_mdnode * emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type, nir_variable *var, const resource_array_layout *layout) { const struct dxil_mdnode *fields[8]; const struct glsl_type *type = glsl_without_array(var->type); fill_resource_metadata(m, fields, struct_type, var->name, layout); fields[6] = dxil_get_metadata_int32(m, DXIL_SAMPLER_KIND_DEFAULT); // sampler kind enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ? DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT; fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind fields[7] = NULL; // metadata return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); } #define MAX_SRVS 128 #define MAX_UAVS 64 #define MAX_CBVS 64 // ?? #define MAX_SAMPLERS 64 // ?? struct dxil_def { const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS]; }; struct ntd_context { void *ralloc_ctx; const struct nir_to_dxil_options *opts; struct nir_shader *shader; struct dxil_module mod; struct util_dynarray srv_metadata_nodes; const struct dxil_value *srv_handles[MAX_SRVS]; struct util_dynarray uav_metadata_nodes; const struct dxil_value *ssbo_handles[MAX_UAVS]; const struct dxil_value *image_handles[MAX_UAVS]; struct util_dynarray cbv_metadata_nodes; const struct dxil_value *cbv_handles[MAX_CBVS]; struct util_dynarray sampler_metadata_nodes; const struct dxil_value *sampler_handles[MAX_SAMPLERS]; struct util_dynarray resources; const struct dxil_mdnode *shader_property_nodes[6]; size_t num_shader_property_nodes; struct dxil_def *defs; unsigned num_defs; struct hash_table *phis; const struct dxil_value *sharedvars; const struct dxil_value *scratchvars; struct hash_table *consts; nir_variable *ps_front_face; nir_variable *system_value[SYSTEM_VALUE_MAX]; nir_function *tess_ctrl_patch_constant_func; unsigned tess_input_control_point_count; struct dxil_func_def *main_func_def; struct dxil_func_def *tess_ctrl_patch_constant_func_def; unsigned unnamed_ubo_count; }; static const char* unary_func_name(enum dxil_intr intr) { switch (intr) { case DXIL_INTR_COUNTBITS: case DXIL_INTR_FIRSTBIT_HI: case DXIL_INTR_FIRSTBIT_SHI: case DXIL_INTR_FIRSTBIT_LO: return "dx.op.unaryBits"; case DXIL_INTR_ISFINITE: case DXIL_INTR_ISNORMAL: return "dx.op.isSpecialFloat"; default: return "dx.op.unary"; } } static const struct dxil_value * emit_unary_call(struct ntd_context *ctx, enum overload_type overload, enum dxil_intr intr, const struct dxil_value *op0) { const struct dxil_func *func = dxil_get_function(&ctx->mod, unary_func_name(intr), overload); if (!func) return NULL; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); if (!opcode) return NULL; const struct dxil_value *args[] = { opcode, op0 }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_binary_call(struct ntd_context *ctx, enum overload_type overload, enum dxil_intr intr, const struct dxil_value *op0, const struct dxil_value *op1) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload); if (!func) return NULL; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); if (!opcode) return NULL; const struct dxil_value *args[] = { opcode, op0, op1 }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload, enum dxil_intr intr, const struct dxil_value *op0, const struct dxil_value *op1, const struct dxil_value *op2) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload); if (!func) return NULL; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); if (!opcode) return NULL; const struct dxil_value *args[] = { opcode, op0, op1, op2 }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload, enum dxil_intr intr, const struct dxil_value *op0, const struct dxil_value *op1, const struct dxil_value *op2, const struct dxil_value *op3) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload); if (!func) return NULL; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); if (!opcode) return NULL; const struct dxil_value *args[] = { opcode, op0, op1, op2, op3 }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32); if (!func) return NULL; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_THREAD_ID); if (!opcode) return NULL; const struct dxil_value *args[] = { opcode, comp }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_threadidingroup_call(struct ntd_context *ctx, const struct dxil_value *comp) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32); if (!func) return NULL; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_THREAD_ID_IN_GROUP); if (!opcode) return NULL; const struct dxil_value *args[] = { opcode, comp }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_flattenedthreadidingroup_call(struct ntd_context *ctx) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32); if (!func) return NULL; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP); if (!opcode) return NULL; const struct dxil_value *args[] = { opcode }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32); if (!func) return NULL; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_GROUP_ID); if (!opcode) return NULL; const struct dxil_value *args[] = { opcode, comp }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_bufferload_call(struct ntd_context *ctx, const struct dxil_value *handle, const struct dxil_value *coord[2], enum overload_type overload) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload); if (!func) return NULL; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BUFFER_LOAD); const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static bool emit_bufferstore_call(struct ntd_context *ctx, const struct dxil_value *handle, const struct dxil_value *coord[2], const struct dxil_value *value[4], const struct dxil_value *write_mask, enum overload_type overload) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload); if (!func) return false; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BUFFER_STORE); const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1], value[0], value[1], value[2], value[3], write_mask }; return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_textureload_call(struct ntd_context *ctx, const struct dxil_value *handle, const struct dxil_value *coord[3], enum overload_type overload) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload); if (!func) return NULL; const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32); const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type); const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD); const struct dxil_value *args[] = { opcode, handle, /*lod_or_sample*/ int_undef, coord[0], coord[1], coord[2], /* offsets */ int_undef, int_undef, int_undef}; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static bool emit_texturestore_call(struct ntd_context *ctx, const struct dxil_value *handle, const struct dxil_value *coord[3], const struct dxil_value *value[4], const struct dxil_value *write_mask, enum overload_type overload) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload); if (!func) return false; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_STORE); const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1], coord[2], value[0], value[1], value[2], value[3], write_mask }; return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_atomic_binop(struct ntd_context *ctx, const struct dxil_value *handle, enum dxil_atomic_op atomic_op, const struct dxil_value *coord[3], const struct dxil_value *value) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32); if (!func) return false; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP); const struct dxil_value *atomic_op_value = dxil_module_get_int32_const(&ctx->mod, atomic_op); const struct dxil_value *args[] = { opcode, handle, atomic_op_value, coord[0], coord[1], coord[2], value }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_atomic_cmpxchg(struct ntd_context *ctx, const struct dxil_value *handle, const struct dxil_value *coord[3], const struct dxil_value *cmpval, const struct dxil_value *newval) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32); if (!func) return false; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG); const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1], coord[2], cmpval, newval }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_createhandle_call(struct ntd_context *ctx, enum dxil_resource_class resource_class, unsigned resource_range_id, const struct dxil_value *resource_range_index, bool non_uniform_resource_index) { const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE); const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class); const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id); const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index); if (!opcode || !resource_class_value || !resource_range_id_value || !non_uniform_resource_index_value) return NULL; const struct dxil_value *args[] = { opcode, resource_class_value, resource_range_id_value, resource_range_index, non_uniform_resource_index_value }; const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE); if (!func) return NULL; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_createhandle_call_const_index(struct ntd_context *ctx, enum dxil_resource_class resource_class, unsigned resource_range_id, unsigned resource_range_index, bool non_uniform_resource_index) { const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index); if (!resource_range_index_value) return NULL; return emit_createhandle_call(ctx, resource_class, resource_range_id, resource_range_index_value, non_uniform_resource_index); } static void add_resource(struct ntd_context *ctx, enum dxil_resource_type type, const resource_array_layout *layout) { struct dxil_resource *resource = util_dynarray_grow(&ctx->resources, struct dxil_resource, 1); resource->resource_type = type; resource->space = layout->space; resource->lower_bound = layout->binding; if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX) resource->upper_bound = UINT_MAX; else resource->upper_bound = layout->binding + layout->size - 1; } static unsigned get_resource_id(struct ntd_context *ctx, enum dxil_resource_class class, unsigned space, unsigned binding) { unsigned offset = 0; unsigned count = 0; unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *); unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *); unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *); unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *); switch (class) { case DXIL_RESOURCE_CLASS_UAV: offset = num_srvs + num_samplers + num_cbvs; count = num_uavs; break; case DXIL_RESOURCE_CLASS_SRV: offset = num_samplers + num_cbvs; count = num_srvs; break; case DXIL_RESOURCE_CLASS_SAMPLER: offset = num_cbvs; count = num_samplers; break; case DXIL_RESOURCE_CLASS_CBV: offset = 0; count = num_cbvs; break; } assert(offset + count <= util_dynarray_num_elements(&ctx->resources, struct dxil_resource)); for (unsigned i = offset; i < offset + count; ++i) { const struct dxil_resource *resource = util_dynarray_element(&ctx->resources, struct dxil_resource, i); if (resource->space == space && resource->lower_bound <= binding && resource->upper_bound >= binding) { return i - offset; } } unreachable("Resource access for undeclared range"); return 0; } static bool emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count) { unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *); unsigned binding = var->data.binding; resource_array_layout layout = {id, binding, count, var->data.descriptor_set}; enum dxil_component_type comp_type; enum dxil_resource_kind res_kind; enum dxil_resource_type res_type; if (var->data.mode == nir_var_mem_ssbo) { comp_type = DXIL_COMP_TYPE_INVALID; res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER; res_type = DXIL_RES_SRV_RAW; } else { comp_type = dxil_get_comp_type(var->type); res_kind = dxil_get_resource_kind(var->type); res_type = DXIL_RES_SRV_TYPED; } const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, false /* readwrite */); if (glsl_type_is_array(var->type)) res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count); const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name, &layout, comp_type, res_kind); if (!srv_meta) return false; util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta); add_resource(ctx, res_type, &layout); if (res_type == DXIL_RES_SRV_RAW) ctx->mod.raw_and_structured_buffers = true; return true; } static bool emit_globals(struct ntd_context *ctx, unsigned size) { nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) size++; if (!size) return true; const struct dxil_type *struct_type = dxil_module_get_res_type(&ctx->mod, DXIL_RESOURCE_KIND_RAW_BUFFER, DXIL_COMP_TYPE_INVALID, true /* readwrite */); if (!struct_type) return false; const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, struct_type, size); if (!array_type) return false; resource_array_layout layout = {0, 0, size, 0}; const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, array_type, "globals", &layout, DXIL_COMP_TYPE_INVALID, DXIL_RESOURCE_KIND_RAW_BUFFER); if (!uav_meta) return false; util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta); if (util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8) ctx->mod.feats.use_64uavs = 1; /* Handles to UAVs used for kernel globals are created on-demand */ add_resource(ctx, DXIL_RES_UAV_RAW, &layout); ctx->mod.raw_and_structured_buffers = true; return true; } static bool emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count, enum dxil_component_type comp_type, enum dxil_resource_kind res_kind, const char *name) { unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *); resource_array_layout layout = { id, binding, count, space }; const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, true /* readwrite */); res_type = dxil_module_get_array_type(&ctx->mod, res_type, count); const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name, &layout, comp_type, res_kind); if (!uav_meta) return false; util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta); if (util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8) ctx->mod.feats.use_64uavs = 1; add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, &layout); if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER) ctx->mod.raw_and_structured_buffers = true; if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER && ctx->mod.shader_kind != DXIL_COMPUTE_SHADER) ctx->mod.feats.uavs_at_every_stage = true; return true; } static bool emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count) { unsigned binding, space; if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) { /* For GL, the image intrinsics are already lowered, using driver_location * as the 0-based image index. Use space 1 so that we can keep using these * NIR constants without having to remap them, and so they don't overlap * SSBOs, which are also 0-based UAV bindings. */ binding = var->data.driver_location; space = 1; } else { binding = var->data.binding; space = var->data.descriptor_set; } enum dxil_component_type comp_type = dxil_get_comp_type(var->type); enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type); const char *name = var->name; return emit_uav(ctx, binding, space, count, comp_type, res_kind, name); } static void var_fill_const_array_with_vector_or_scalar(struct ntd_context *ctx, const struct nir_constant *c, const struct glsl_type *type, void *const_vals, unsigned int offset) { assert(glsl_type_is_vector_or_scalar(type)); unsigned int components = glsl_get_vector_elements(type); unsigned bit_size = glsl_get_bit_size(type); unsigned int increment = bit_size / 8; for (unsigned int comp = 0; comp < components; comp++) { uint8_t *dst = (uint8_t *)const_vals + offset; switch (bit_size) { case 64: memcpy(dst, &c->values[comp].u64, sizeof(c->values[0].u64)); break; case 32: memcpy(dst, &c->values[comp].u32, sizeof(c->values[0].u32)); break; case 16: memcpy(dst, &c->values[comp].u16, sizeof(c->values[0].u16)); break; case 8: assert(glsl_base_type_is_integer(glsl_get_base_type(type))); memcpy(dst, &c->values[comp].u8, sizeof(c->values[0].u8)); break; default: unreachable("unexpeted bit-size"); } offset += increment; } } static void var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c, const struct glsl_type *type, void *const_vals, unsigned int offset) { assert(!glsl_type_is_interface(type)); if (glsl_type_is_vector_or_scalar(type)) { var_fill_const_array_with_vector_or_scalar(ctx, c, type, const_vals, offset); } else if (glsl_type_is_array(type)) { assert(!glsl_type_is_unsized_array(type)); const struct glsl_type *without = glsl_without_array(type); unsigned stride = glsl_get_explicit_stride(without); for (unsigned elt = 0; elt < glsl_get_length(type); elt++) { var_fill_const_array(ctx, c->elements[elt], without, const_vals, offset + (elt * stride)); offset += glsl_get_cl_size(without); } } else if (glsl_type_is_struct(type)) { for (unsigned int elt = 0; elt < glsl_get_length(type); elt++) { const struct glsl_type *elt_type = glsl_get_struct_field(type, elt); unsigned field_offset = glsl_get_struct_field_offset(type, elt); var_fill_const_array(ctx, c->elements[elt], elt_type, const_vals, offset + field_offset); } } else unreachable("unknown GLSL type in var_fill_const_array"); } static bool emit_global_consts(struct ntd_context *ctx) { nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_temp) { assert(var->constant_initializer); unsigned int num_members = DIV_ROUND_UP(glsl_get_cl_size(var->type), 4); uint32_t *const_ints = ralloc_array(ctx->ralloc_ctx, uint32_t, num_members); var_fill_const_array(ctx, var->constant_initializer, var->type, const_ints, 0); const struct dxil_value **const_vals = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, num_members); if (!const_vals) return false; for (int i = 0; i < num_members; i++) const_vals[i] = dxil_module_get_int32_const(&ctx->mod, const_ints[i]); const struct dxil_type *elt_type = dxil_module_get_int_type(&ctx->mod, 32); if (!elt_type) return false; const struct dxil_type *type = dxil_module_get_array_type(&ctx->mod, elt_type, num_members); if (!type) return false; const struct dxil_value *agg_vals = dxil_module_get_array_const(&ctx->mod, type, const_vals); if (!agg_vals) return false; const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, type, DXIL_AS_DEFAULT, 4, agg_vals); if (!gvar) return false; if (!_mesa_hash_table_insert(ctx->consts, var, (void *)gvar)) return false; } return true; } static bool emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned size, unsigned count, char *name) { assert(count != 0); unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *); const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32); const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size); const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name, &array_type, 1); // All ubo[1]s should have been lowered to ubo with static indexing const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type; resource_array_layout layout = {idx, binding, count, space}; const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type, name, &layout, 4 * size); if (!cbv_meta) return false; util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta); add_resource(ctx, DXIL_RES_CBV, &layout); return true; } static bool emit_ubo_var(struct ntd_context *ctx, nir_variable *var) { unsigned count = 1; if (glsl_type_is_array(var->type)) count = glsl_get_length(var->type); char *name = var->name; char temp_name[30]; if (name && strlen(name) == 0) { snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d", ctx->unnamed_ubo_count++); name = temp_name; } const struct glsl_type *type = glsl_without_array(var->type); assert(glsl_type_is_struct(type) || glsl_type_is_interface(type)); unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4; return emit_cbv(ctx, var->data.binding, var->data.descriptor_set, dwords, count, name); } static bool emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count) { unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *); unsigned binding = var->data.binding; resource_array_layout layout = {id, binding, count, var->data.descriptor_set}; const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1); if (glsl_type_is_array(var->type)) sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count); const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout); if (!sampler_meta) return false; util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta); add_resource(ctx, DXIL_RES_SAMPLER, &layout); return true; } static bool emit_static_indexing_handles(struct ntd_context *ctx) { /* Vulkan always uses dynamic handles, from instructions in the NIR */ if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) return true; unsigned last_res_class = -1; unsigned id = 0; util_dynarray_foreach(&ctx->resources, struct dxil_resource, res) { enum dxil_resource_class res_class; const struct dxil_value **handle_array; switch (res->resource_type) { case DXIL_RES_SRV_TYPED: case DXIL_RES_SRV_RAW: case DXIL_RES_SRV_STRUCTURED: res_class = DXIL_RESOURCE_CLASS_SRV; handle_array = ctx->srv_handles; break; case DXIL_RES_CBV: res_class = DXIL_RESOURCE_CLASS_CBV; handle_array = ctx->cbv_handles; break; case DXIL_RES_SAMPLER: res_class = DXIL_RESOURCE_CLASS_SAMPLER; handle_array = ctx->sampler_handles; break; case DXIL_RES_UAV_RAW: res_class = DXIL_RESOURCE_CLASS_UAV; handle_array = ctx->ssbo_handles; break; case DXIL_RES_UAV_TYPED: case DXIL_RES_UAV_STRUCTURED: case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER: res_class = DXIL_RESOURCE_CLASS_UAV; handle_array = ctx->image_handles; break; default: unreachable("Unexpected resource type"); } if (last_res_class != res_class) id = 0; else id++; last_res_class = res_class; if (res->space > 1) continue; assert(res->space == 0 || (res->space == 1 && res->resource_type != DXIL_RES_UAV_RAW && ctx->opts->environment == DXIL_ENVIRONMENT_GL)); /* CL uses dynamic handles for the "globals" UAV array, but uses static * handles for UBOs, textures, and samplers. */ if (ctx->opts->environment == DXIL_ENVIRONMENT_CL && res->resource_type == DXIL_RES_UAV_RAW) continue; for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) { handle_array[i] = emit_createhandle_call_const_index(ctx, res_class, id, i, false); if (!handle_array[i]) return false; } } return true; } static const struct dxil_mdnode * emit_gs_state(struct ntd_context *ctx) { const struct dxil_mdnode *gs_state_nodes[5]; const nir_shader *s = ctx->shader; gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive)); gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out); gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1)); gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive)); gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations); for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) { if (!gs_state_nodes[i]) return NULL; } return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes)); } static enum dxil_tessellator_domain get_tessellator_domain(enum tess_primitive_mode primitive_mode) { switch (primitive_mode) { case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD; case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI; case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE; default: unreachable("Invalid tessellator primitive mode"); } } static enum dxil_tessellator_partitioning get_tessellator_partitioning(enum gl_tess_spacing spacing) { switch (spacing) { default: case TESS_SPACING_EQUAL: return DXIL_TESSELLATOR_PARTITIONING_INTEGER; case TESS_SPACING_FRACTIONAL_EVEN: return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN; case TESS_SPACING_FRACTIONAL_ODD: return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD; } } static enum dxil_tessellator_output_primitive get_tessellator_output_primitive(const struct shader_info *info) { if (info->tess.point_mode) return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT; if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE; /* Note: GL tessellation domain is inverted from D3D, which means triangle * winding needs to be inverted. */ if (info->tess.ccw) return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW; return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW; } static const struct dxil_mdnode * emit_hs_state(struct ntd_context *ctx) { const struct dxil_mdnode *hs_state_nodes[7]; hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func); hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count); hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out); hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode)); hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing)); hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info)); hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f); return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes)); } static const struct dxil_mdnode * emit_ds_state(struct ntd_context *ctx) { const struct dxil_mdnode *ds_state_nodes[2]; ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode)); ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out); return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes)); } static const struct dxil_mdnode * emit_threads(struct ntd_context *ctx) { const nir_shader *s = ctx->shader; const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1)); const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1)); const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1)); if (!threads_x || !threads_y || !threads_z) return false; const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z }; return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes)); } static int64_t get_module_flags(struct ntd_context *ctx) { /* See the DXIL documentation for the definition of these flags: * * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags */ uint64_t flags = 0; if (ctx->mod.feats.doubles) flags |= (1 << 2); if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT && ctx->shader->info.fs.early_fragment_tests) flags |= (1 << 3); if (ctx->mod.raw_and_structured_buffers) flags |= (1 << 4); if (ctx->mod.feats.min_precision) flags |= (1 << 5); if (ctx->mod.feats.dx11_1_double_extensions) flags |= (1 << 6); if (ctx->mod.feats.array_layer_from_vs_or_ds) flags |= (1 << 9); if (ctx->mod.feats.inner_coverage) flags |= (1 << 10); if (ctx->mod.feats.typed_uav_load_additional_formats) flags |= (1 << 13); if (ctx->mod.feats.use_64uavs) flags |= (1 << 15); if (ctx->mod.feats.uavs_at_every_stage) flags |= (1 << 16); if (ctx->mod.feats.cs_4x_raw_sb) flags |= (1 << 17); if (ctx->mod.feats.wave_ops) flags |= (1 << 19); if (ctx->mod.feats.int64_ops) flags |= (1 << 20); if (ctx->mod.feats.stencil_ref) flags |= (1 << 11); if (ctx->mod.feats.native_low_precision) flags |= (1 << 23) | (1 << 5); if (ctx->opts->disable_math_refactoring) flags |= (1 << 1); return flags; } static const struct dxil_mdnode * emit_entrypoint(struct ntd_context *ctx, const struct dxil_func *func, const char *name, const struct dxil_mdnode *signatures, const struct dxil_mdnode *resources, const struct dxil_mdnode *shader_props) { char truncated_name[254] = { 0 }; strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1); const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func); const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name); const struct dxil_mdnode *nodes[] = { func_md, name_md, signatures, resources, shader_props }; return dxil_get_metadata_node(&ctx->mod, nodes, ARRAY_SIZE(nodes)); } static const struct dxil_mdnode * emit_resources(struct ntd_context *ctx) { bool emit_resources = false; const struct dxil_mdnode *resources_nodes[] = { NULL, NULL, NULL, NULL }; #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *) if (ctx->srv_metadata_nodes.size) { resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes)); emit_resources = true; } if (ctx->uav_metadata_nodes.size) { resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes)); emit_resources = true; } if (ctx->cbv_metadata_nodes.size) { resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes)); emit_resources = true; } if (ctx->sampler_metadata_nodes.size) { resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes)); emit_resources = true; } #undef ARRAY_AND_SIZE return emit_resources ? dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL; } static boolean emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag, const struct dxil_mdnode *value_node) { const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag); if (!tag_node || !value_node) return false; assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2); ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node; ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node; return true; } static bool emit_metadata(struct ntd_context *ctx, const struct dxil_mdnode *signatures) { unsigned dxilMinor = ctx->mod.minor_version; if (!emit_llvm_ident(&ctx->mod) || !emit_named_version(&ctx->mod, "dx.version", 1, dxilMinor) || !emit_named_version(&ctx->mod, "dx.valver", 1, 4) || !emit_dx_shader_model(&ctx->mod)) return false; const struct dxil_func_def *main_func_def = ctx->main_func_def; if (!main_func_def) return false; const struct dxil_func *main_func = main_func_def->func; const struct dxil_mdnode *resources_node = emit_resources(ctx); const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func); const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0); const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0); const struct dxil_mdnode *nodes_4_27_27[] = { node4, node27, node27 }; const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27, ARRAY_SIZE(nodes_4_27_27)); const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1); const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1); const struct dxil_mdnode *main_type_annotation_nodes[] = { node3, main_entrypoint, node29 }; const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes, ARRAY_SIZE(main_type_annotation_nodes)); if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) { if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx))) return false; } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) { ctx->tess_input_control_point_count = 32; nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) { if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) { ctx->tess_input_control_point_count = glsl_array_size(var->type); break; } } if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx))) return false; } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) { if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx))) return false; } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) { if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx))) return false; } uint64_t flags = get_module_flags(ctx); if (flags != 0) { if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags))) return false; } const struct dxil_mdnode *shader_properties = NULL; if (ctx->num_shader_property_nodes > 0) { shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes, ctx->num_shader_property_nodes); if (!shader_properties) return false; } nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader); const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func, entry_func_impl->function->name, signatures, resources_node, shader_properties); if (!dx_entry_point) return false; if (resources_node) { const struct dxil_mdnode *dx_resources = resources_node; dxil_add_metadata_named_node(&ctx->mod, "dx.resources", &dx_resources, 1); } const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation }; return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations", dx_type_annotations, ARRAY_SIZE(dx_type_annotations)) && dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints", &dx_entry_point, 1); } static const struct dxil_value * bitcast_to_int(struct ntd_context *ctx, unsigned bit_size, const struct dxil_value *value) { const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size); if (!type) return NULL; return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value); } static const struct dxil_value * bitcast_to_float(struct ntd_context *ctx, unsigned bit_size, const struct dxil_value *value) { const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size); if (!type) return NULL; return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value); } static void store_ssa_def(struct ntd_context *ctx, nir_ssa_def *ssa, unsigned chan, const struct dxil_value *value) { assert(ssa->index < ctx->num_defs); assert(chan < ssa->num_components); /* We pre-defined the dest value because of a phi node, so bitcast while storing if the * base type differs */ if (ctx->defs[ssa->index].chans[chan]) { const struct dxil_type *expect_type = dxil_value_get_type(ctx->defs[ssa->index].chans[chan]); const struct dxil_type *value_type = dxil_value_get_type(value); if (dxil_type_to_nir_type(expect_type) != dxil_type_to_nir_type(value_type)) value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, expect_type, value); } ctx->defs[ssa->index].chans[chan] = value; } static void store_dest_value(struct ntd_context *ctx, nir_dest *dest, unsigned chan, const struct dxil_value *value) { assert(dest->is_ssa); assert(value); store_ssa_def(ctx, &dest->ssa, chan, value); } static void store_dest(struct ntd_context *ctx, nir_dest *dest, unsigned chan, const struct dxil_value *value, nir_alu_type type) { switch (nir_alu_type_get_base_type(type)) { case nir_type_float: if (nir_dest_bit_size(*dest) == 64) ctx->mod.feats.doubles = true; store_dest_value(ctx, dest, chan, value); break; case nir_type_uint: case nir_type_int: if (nir_dest_bit_size(*dest) == 16) ctx->mod.feats.native_low_precision = true; if (nir_dest_bit_size(*dest) == 64) ctx->mod.feats.int64_ops = true; FALLTHROUGH; case nir_type_bool: store_dest_value(ctx, dest, chan, value); break; default: unreachable("unexpected nir_alu_type"); } } static void store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan, const struct dxil_value *value) { assert(!alu->dest.saturate); store_dest(ctx, &alu->dest.dest, chan, value, nir_op_infos[alu->op].output_type); } static const struct dxil_value * get_src_ssa(struct ntd_context *ctx, const nir_ssa_def *ssa, unsigned chan) { assert(ssa->index < ctx->num_defs); assert(chan < ssa->num_components); assert(ctx->defs[ssa->index].chans[chan]); return ctx->defs[ssa->index].chans[chan]; } static const struct dxil_value * get_src(struct ntd_context *ctx, nir_src *src, unsigned chan, nir_alu_type type) { assert(src->is_ssa); const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan); const int bit_size = nir_src_bit_size(*src); switch (nir_alu_type_get_base_type(type)) { case nir_type_int: case nir_type_uint: { assert(bit_size != 64 || ctx->mod.feats.int64_ops); const struct dxil_type *expect_type = dxil_module_get_int_type(&ctx->mod, bit_size); /* nohing to do */ if (dxil_value_type_equal_to(value, expect_type)) return value; assert(dxil_value_type_bitsize_equal_to(value, bit_size)); return bitcast_to_int(ctx, bit_size, value); } case nir_type_float: assert(nir_src_bit_size(*src) >= 16); assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles); if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) return value; assert(dxil_value_type_bitsize_equal_to(value, bit_size)); return bitcast_to_float(ctx, bit_size, value); case nir_type_bool: if (!dxil_value_type_bitsize_equal_to(value, 1)) { return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC, dxil_module_get_int_type(&ctx->mod, 1), value); } return value; default: unreachable("unexpected nir_alu_type"); } } static const struct dxil_type * get_alu_src_type(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src) { assert(!alu->src[src].abs); assert(!alu->src[src].negate); nir_ssa_def *ssa_src = alu->src[src].src.ssa; unsigned chan = alu->src[src].swizzle[0]; const struct dxil_value *value = get_src_ssa(ctx, ssa_src, chan); return dxil_value_get_type(value); } static const struct dxil_value * get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src) { assert(!alu->src[src].abs); assert(!alu->src[src].negate); unsigned chan = alu->src[src].swizzle[0]; return get_src(ctx, &alu->src[src].src, chan, nir_op_infos[alu->op].input_types[src]); } static bool emit_binop(struct ntd_context *ctx, nir_alu_instr *alu, enum dxil_bin_opcode opcode, const struct dxil_value *op0, const struct dxil_value *op1) { bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float; enum dxil_opt_flags flags = 0; if (is_float_op && !alu->exact) flags |= DXIL_UNSAFE_ALGEBRA; const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags); if (!v) return false; store_alu_dest(ctx, alu, 0, v); return true; } static bool emit_shift(struct ntd_context *ctx, nir_alu_instr *alu, enum dxil_bin_opcode opcode, const struct dxil_value *op0, const struct dxil_value *op1) { unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src); unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src); if (op0_bit_size != op1_bit_size) { const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, op0_bit_size); enum dxil_cast_opcode cast_op = op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC; op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1); } const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0); if (!v) return false; store_alu_dest(ctx, alu, 0, v); return true; } static bool emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu, enum dxil_cmp_pred pred, const struct dxil_value *op0, const struct dxil_value *op1) { const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1); if (!v) return false; store_alu_dest(ctx, alu, 0, v); return true; } static enum dxil_cast_opcode get_cast_op(nir_alu_instr *alu) { unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); unsigned src_bits = nir_src_bit_size(alu->src[0].src); switch (alu->op) { /* bool -> int */ case nir_op_b2i16: case nir_op_b2i32: case nir_op_b2i64: return DXIL_CAST_ZEXT; /* float -> float */ case nir_op_f2f16_rtz: case nir_op_f2f32: case nir_op_f2f64: assert(dst_bits != src_bits); if (dst_bits < src_bits) return DXIL_CAST_FPTRUNC; else return DXIL_CAST_FPEXT; /* int -> int */ case nir_op_i2i16: case nir_op_i2i32: case nir_op_i2i64: assert(dst_bits != src_bits); if (dst_bits < src_bits) return DXIL_CAST_TRUNC; else return DXIL_CAST_SEXT; /* uint -> uint */ case nir_op_u2u16: case nir_op_u2u32: case nir_op_u2u64: assert(dst_bits != src_bits); if (dst_bits < src_bits) return DXIL_CAST_TRUNC; else return DXIL_CAST_ZEXT; /* float -> int */ case nir_op_f2i16: case nir_op_f2i32: case nir_op_f2i64: return DXIL_CAST_FPTOSI; /* float -> uint */ case nir_op_f2u16: case nir_op_f2u32: case nir_op_f2u64: return DXIL_CAST_FPTOUI; /* int -> float */ case nir_op_i2f16: case nir_op_i2f32: case nir_op_i2f64: return DXIL_CAST_SITOFP; /* uint -> float */ case nir_op_u2f16: case nir_op_u2f32: case nir_op_u2f64: return DXIL_CAST_UITOFP; default: unreachable("unexpected cast op"); } } static const struct dxil_type * get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu) { unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) { case nir_type_bool: assert(dst_bits == 1); FALLTHROUGH; case nir_type_int: case nir_type_uint: return dxil_module_get_int_type(&ctx->mod, dst_bits); case nir_type_float: return dxil_module_get_float_type(&ctx->mod, dst_bits); default: unreachable("unknown nir_alu_type"); } } static bool is_double(nir_alu_type alu_type, unsigned bit_size) { return nir_alu_type_get_base_type(alu_type) == nir_type_float && bit_size == 64; } static bool emit_cast(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *value) { enum dxil_cast_opcode opcode = get_cast_op(alu); const struct dxil_type *type = get_cast_dest_type(ctx, alu); if (!type) return false; const nir_op_info *info = &nir_op_infos[alu->op]; switch (opcode) { case DXIL_CAST_UITOFP: case DXIL_CAST_SITOFP: if (is_double(info->output_type, nir_dest_bit_size(alu->dest.dest))) ctx->mod.feats.dx11_1_double_extensions = true; break; case DXIL_CAST_FPTOUI: case DXIL_CAST_FPTOSI: if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src))) ctx->mod.feats.dx11_1_double_extensions = true; break; default: break; } const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type, value); if (!v) return false; store_alu_dest(ctx, alu, 0, v); return true; } static enum overload_type get_overload(nir_alu_type alu_type, unsigned bit_size) { switch (nir_alu_type_get_base_type(alu_type)) { case nir_type_int: case nir_type_uint: switch (bit_size) { case 16: return DXIL_I16; case 32: return DXIL_I32; case 64: return DXIL_I64; default: unreachable("unexpected bit_size"); } case nir_type_float: switch (bit_size) { case 16: return DXIL_F16; case 32: return DXIL_F32; case 64: return DXIL_F64; default: unreachable("unexpected bit_size"); } default: unreachable("unexpected output type"); } } static bool emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu, enum dxil_intr intr, const struct dxil_value *op) { const nir_op_info *info = &nir_op_infos[alu->op]; unsigned src_bits = nir_src_bit_size(alu->src[0].src); enum overload_type overload = get_overload(info->input_types[0], src_bits); const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op); if (!v) return false; store_alu_dest(ctx, alu, 0, v); return true; } static bool emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu, enum dxil_intr intr, const struct dxil_value *op0, const struct dxil_value *op1) { const nir_op_info *info = &nir_op_infos[alu->op]; assert(info->output_type == info->input_types[0]); assert(info->output_type == info->input_types[1]); unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); assert(nir_src_bit_size(alu->src[0].src) == dst_bits); assert(nir_src_bit_size(alu->src[1].src) == dst_bits); enum overload_type overload = get_overload(info->output_type, dst_bits); const struct dxil_value *v = emit_binary_call(ctx, overload, intr, op0, op1); if (!v) return false; store_alu_dest(ctx, alu, 0, v); return true; } static bool emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu, enum dxil_intr intr, const struct dxil_value *op0, const struct dxil_value *op1, const struct dxil_value *op2) { const nir_op_info *info = &nir_op_infos[alu->op]; unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); assert(nir_src_bit_size(alu->src[0].src) == dst_bits); assert(nir_src_bit_size(alu->src[1].src) == dst_bits); assert(nir_src_bit_size(alu->src[2].src) == dst_bits); assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits)); assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits)); assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits)); enum overload_type overload = get_overload(info->output_type, dst_bits); const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr, op0, op1, op2); if (!v) return false; store_alu_dest(ctx, alu, 0, v); return true; } static bool emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *base, const struct dxil_value *insert, const struct dxil_value *offset, const struct dxil_value *width) { /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */ const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI, width, offset, insert, base); if (!v) return false; /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */ const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE, width, dxil_module_get_int32_const(&ctx->mod, 32)); v = dxil_emit_select(&ctx->mod, compare_width, insert, v); store_alu_dest(ctx, alu, 0, v); return true; } static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *sel, const struct dxil_value *val_true, const struct dxil_value *val_false) { assert(sel); assert(val_true); assert(val_false); const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false); if (!v) return false; store_alu_dest(ctx, alu, 0, v); return true; } static bool emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) { assert(val); struct dxil_module *m = &ctx->mod; const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00); const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0); if (!c0 || !c1) return false; return emit_select(ctx, alu, val, c1, c0); } static bool emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) { assert(val); struct dxil_module *m = &ctx->mod; const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f); const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f); if (!c0 || !c1) return false; return emit_select(ctx, alu, val, c1, c0); } static bool emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) { assert(val); struct dxil_module *m = &ctx->mod; const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0); const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0); if (!c0 || !c1) return false; ctx->mod.feats.doubles = 1; return emit_select(ctx, alu, val, c1, c0); } static bool emit_f2b32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) { assert(val); const struct dxil_value *zero = dxil_module_get_float_const(&ctx->mod, 0.0f); return emit_cmp(ctx, alu, DXIL_FCMP_UNE, val, zero); } static bool emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift) { if (shift) { val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val, dxil_module_get_int32_const(&ctx->mod, 16), 0); if (!val) return false; } const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.legacyF16ToF32", DXIL_NONE); if (!func) return false; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32); if (!opcode) return false; const struct dxil_value *args[] = { opcode, val }; const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); if (!v) return false; store_alu_dest(ctx, alu, 0, v); return true; } static bool emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.legacyF32ToF16", DXIL_NONE); if (!func) return false; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16); if (!opcode) return false; const struct dxil_value *args[] = { opcode, val0 }; const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); if (!v) return false; if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) { args[1] = val1; const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); if (!v_high) return false; v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high, dxil_module_get_int32_const(&ctx->mod, 16), 0); if (!v_high) return false; v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0); if (!v) return false; } store_alu_dest(ctx, alu, 0, v); return true; } static bool emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs) { const struct dxil_type *type = get_alu_src_type(ctx, alu, 0); nir_alu_type t = dxil_type_to_nir_type(type); for (unsigned i = 0; i < num_inputs; i++) { const struct dxil_value *src = get_src(ctx, &alu->src[i].src, alu->src[i].swizzle[0], t); if (!src) return false; store_alu_dest(ctx, alu, i, src); } return true; } static bool emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64); if (!func) return false; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE); if (!opcode) return false; const struct dxil_value *args[3] = { opcode, get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32), get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32), }; if (!args[1] || !args[2]) return false; const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); if (!v) return false; store_dest(ctx, &alu->dest.dest, 0, v, nir_type_float64); return true; } static bool emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64); if (!func) return false; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE); if (!opcode) return false; const struct dxil_value *args[] = { opcode, get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64) }; if (!args[1]) return false; const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); if (!v) return false; const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0); const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1); if (!hi || !lo) return false; store_dest_value(ctx, &alu->dest.dest, 0, hi); store_dest_value(ctx, &alu->dest.dest, 1, lo); return true; } static bool emit_alu(struct ntd_context *ctx, nir_alu_instr *alu) { /* handle vec-instructions first; they are the only ones that produce * vector results. */ switch (alu->op) { case nir_op_vec2: case nir_op_vec3: case nir_op_vec4: case nir_op_vec8: case nir_op_vec16: return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs); case nir_op_mov: { assert(nir_dest_num_components(alu->dest.dest) == 1); store_ssa_def(ctx, &alu->dest.dest.ssa, 0, get_src_ssa(ctx, alu->src->src.ssa, alu->src->swizzle[0])); return true; } case nir_op_pack_double_2x32_dxil: return emit_make_double(ctx, alu); case nir_op_unpack_double_2x32_dxil: return emit_split_double(ctx, alu); default: /* silence warnings */ ; } /* other ops should be scalar */ assert(alu->dest.write_mask == 1); const struct dxil_value *src[4]; assert(nir_op_infos[alu->op].num_inputs <= 4); for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { src[i] = get_alu_src(ctx, alu, i); if (!src[i]) return false; } switch (alu->op) { case nir_op_iadd: case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]); case nir_op_isub: case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]); case nir_op_imul: case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]); case nir_op_fdiv: if (alu->dest.dest.ssa.bit_size == 64) ctx->mod.feats.dx11_1_double_extensions = 1; FALLTHROUGH; case nir_op_idiv: return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]); case nir_op_udiv: return emit_binop(ctx, alu, DXIL_BINOP_UDIV, src[0], src[1]); case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]); case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]); case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]); case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]); case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]); case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]); case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]); case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]); case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]); case nir_op_inot: { unsigned bit_size = alu->dest.dest.ssa.bit_size; intmax_t val = bit_size == 1 ? 1 : -1; const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size); return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one); } case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]); case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]); case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]); case nir_op_uge: return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]); case nir_op_ilt: return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]); case nir_op_ult: return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]); case nir_op_feq: return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]); case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]); case nir_op_flt: return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]); case nir_op_fge: return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]); case nir_op_bcsel: return emit_select(ctx, alu, src[0], src[1], src[2]); case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]); case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]); case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]); case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]); case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]); case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]); case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]); case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]); case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]); case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]); case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]); case nir_op_fddx: case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]); case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]); case nir_op_fddy: case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]); case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]); case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]); case nir_op_frcp: { const struct dxil_value *one = dxil_module_get_float_const(&ctx->mod, 1.0f); return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]); } case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]); case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]); case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]); case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]); case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]); case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]); case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]); case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]); case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]); case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]); case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]); case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]); case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]); case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]); case nir_op_ffma: if (alu->dest.dest.ssa.bit_size == 64) ctx->mod.feats.dx11_1_double_extensions = 1; return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]); case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]); case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]); case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]); case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false); case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true); case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]); case nir_op_b2i16: case nir_op_i2i16: case nir_op_f2i16: case nir_op_f2u16: case nir_op_u2u16: case nir_op_u2f16: case nir_op_i2f16: case nir_op_f2f16_rtz: case nir_op_b2i32: case nir_op_f2f32: case nir_op_f2i32: case nir_op_f2u32: case nir_op_i2f32: case nir_op_i2i32: case nir_op_u2f32: case nir_op_u2u32: case nir_op_b2i64: case nir_op_f2f64: case nir_op_f2i64: case nir_op_f2u64: case nir_op_i2f64: case nir_op_i2i64: case nir_op_u2f64: case nir_op_u2u64: return emit_cast(ctx, alu, src[0]); case nir_op_f2b32: return emit_f2b32(ctx, alu, src[0]); case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]); case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]); case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]); default: NIR_INSTR_UNSUPPORTED(&alu->instr); assert("Unimplemented ALU instruction"); return false; } } static const struct dxil_value * load_ubo(struct ntd_context *ctx, const struct dxil_value *handle, const struct dxil_value *offset, enum overload_type overload) { assert(handle && offset); const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY); if (!opcode) return NULL; const struct dxil_value *args[] = { opcode, handle, offset }; const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload); if (!func) return NULL; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static bool emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, nir_scope execution_scope, nir_scope mem_scope) { const struct dxil_value *opcode, *mode; const struct dxil_func *func; uint32_t flags = 0; if (execution_scope == NIR_SCOPE_WORKGROUP) flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP; if (modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) { if (mem_scope > NIR_SCOPE_WORKGROUP) flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL; else flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP; } if (modes & nir_var_mem_shared) flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE; func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE); if (!func) return false; opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER); if (!opcode) return false; mode = dxil_module_get_int32_const(&ctx->mod, flags); if (!mode) return false; const struct dxil_value *args[] = { opcode, mode }; return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); } static bool emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr) { return emit_barrier_impl(ctx, nir_intrinsic_memory_modes(intr), nir_intrinsic_execution_scope(intr), nir_intrinsic_memory_scope(intr)); } /* Memory barrier for UAVs (buffers/images) at cross-workgroup scope */ static bool emit_memory_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr) { return emit_barrier_impl(ctx, nir_var_mem_global, NIR_SCOPE_NONE, NIR_SCOPE_DEVICE); } /* Memory barrier for TGSM */ static bool emit_memory_barrier_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr) { return emit_barrier_impl(ctx, nir_var_mem_shared, NIR_SCOPE_NONE, NIR_SCOPE_WORKGROUP); } /* Memory barrier for all intra-workgroup memory accesses (UAVs and TGSM) */ static bool emit_group_memory_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr) { return emit_barrier_impl(ctx, nir_var_mem_shared | nir_var_mem_global, NIR_SCOPE_NONE, NIR_SCOPE_WORKGROUP); } static bool emit_control_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr) { return emit_barrier_impl(ctx, nir_var_mem_shared, NIR_SCOPE_WORKGROUP, NIR_SCOPE_NONE); } static bool emit_load_global_invocation_id(struct ntd_context *ctx, nir_intrinsic_instr *intr) { assert(intr->dest.is_ssa); nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa); for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { if (comps & (1 << i)) { const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i); if (!idx) return false; const struct dxil_value *globalid = emit_threadid_call(ctx, idx); if (!globalid) return false; store_dest_value(ctx, &intr->dest, i, globalid); } } return true; } static bool emit_load_local_invocation_id(struct ntd_context *ctx, nir_intrinsic_instr *intr) { assert(intr->dest.is_ssa); nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa); for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { if (comps & (1 << i)) { const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i); if (!idx) return false; const struct dxil_value *threadidingroup = emit_threadidingroup_call(ctx, idx); if (!threadidingroup) return false; store_dest_value(ctx, &intr->dest, i, threadidingroup); } } return true; } static bool emit_load_local_invocation_index(struct ntd_context *ctx, nir_intrinsic_instr *intr) { assert(intr->dest.is_ssa); const struct dxil_value *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx); if (!flattenedthreadidingroup) return false; store_dest_value(ctx, &intr->dest, 0, flattenedthreadidingroup); return true; } static bool emit_load_local_workgroup_id(struct ntd_context *ctx, nir_intrinsic_instr *intr) { assert(intr->dest.is_ssa); nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa); for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { if (comps & (1 << i)) { const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i); if (!idx) return false; const struct dxil_value *groupid = emit_groupid_call(ctx, idx); if (!groupid) return false; store_dest_value(ctx, &intr->dest, i, groupid); } } return true; } static const struct dxil_value * call_unary_external_function(struct ntd_context *ctx, const char *name, int32_t dxil_intr) { const struct dxil_func *func = dxil_get_function(&ctx->mod, name, DXIL_I32); if (!func) return false; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, dxil_intr); if (!opcode) return false; const struct dxil_value *args[] = {opcode}; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static bool emit_load_unary_external_function(struct ntd_context *ctx, nir_intrinsic_instr *intr, const char *name, int32_t dxil_intr) { const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr); store_dest_value(ctx, &intr->dest, 0, value); return true; } static bool emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *value = call_unary_external_function(ctx, "dx.op.coverage", DXIL_INTR_COVERAGE); /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */ if (ctx->mod.info.has_per_sample_input) { value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value, dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, dxil_module_get_int32_const(&ctx->mod, 1), call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX), 0), 0); } store_dest_value(ctx, &intr->dest, 0, value); return true; } static bool emit_load_tess_coord(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32); if (!func) return false; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION); if (!opcode) return false; unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2; for (unsigned i = 0; i < num_coords; ++i) { unsigned component_idx = i; const struct dxil_value *component = dxil_module_get_int32_const(&ctx->mod, component_idx); if (!component) return false; const struct dxil_value *args[] = { opcode, component }; const struct dxil_value *value = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); store_dest_value(ctx, &intr->dest, i, value); } for (unsigned i = num_coords; i < intr->dest.ssa.num_components; ++i) { const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f); store_dest_value(ctx, &intr->dest, i, value); } return true; } static const struct dxil_value * get_int32_undef(struct dxil_module *m) { const struct dxil_type *int32_type = dxil_module_get_int_type(m, 32); if (!int32_type) return NULL; return dxil_module_get_undef(m, int32_type); } static const struct dxil_value * emit_gep_for_index(struct ntd_context *ctx, const nir_variable *var, const struct dxil_value *index) { assert(var->data.mode == nir_var_shader_temp); struct hash_entry *he = _mesa_hash_table_search(ctx->consts, var); assert(he != NULL); const struct dxil_value *ptr = he->data; const struct dxil_value *zero = dxil_module_get_int32_const(&ctx->mod, 0); if (!zero) return NULL; const struct dxil_value *ops[] = { ptr, zero, index }; return dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); } static const struct dxil_value * get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class, enum dxil_resource_kind kind) { /* This source might be one of: * 1. Constant resource index - just look it up in precomputed handle arrays * If it's null in that array, create a handle, and store the result * 2. A handle from load_vulkan_descriptor - just get the stored SSA value * 3. Dynamic resource index - create a handle for it here */ assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32); nir_const_value *const_block_index = nir_src_as_const_value(*src); const struct dxil_value **handle_entry = NULL; if (const_block_index) { assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN); switch (kind) { case DXIL_RESOURCE_KIND_CBUFFER: handle_entry = &ctx->cbv_handles[const_block_index->u32]; break; case DXIL_RESOURCE_KIND_RAW_BUFFER: if (class == DXIL_RESOURCE_CLASS_UAV) handle_entry = &ctx->ssbo_handles[const_block_index->u32]; else handle_entry = &ctx->srv_handles[const_block_index->u32]; break; case DXIL_RESOURCE_KIND_SAMPLER: handle_entry = &ctx->sampler_handles[const_block_index->u32]; break; default: if (class == DXIL_RESOURCE_CLASS_UAV) handle_entry = &ctx->image_handles[const_block_index->u32]; else handle_entry = &ctx->srv_handles[const_block_index->u32]; break; } } if (handle_entry && *handle_entry) return *handle_entry; const struct dxil_value *value = get_src_ssa(ctx, src->ssa, 0); if (nir_src_as_deref(*src) || ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { return value; } unsigned space = 0; if (ctx->opts->environment == DXIL_ENVIRONMENT_GL && class == DXIL_RESOURCE_CLASS_UAV) { if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER) space = 2; else space = 1; } /* The base binding here will almost always be zero. The only cases where we end * up in this type of dynamic indexing are: * 1. GL UBOs * 2. GL SSBOs * 2. CL SSBOs * In all cases except GL UBOs, the resources are a single zero-based array. * In that case, the base is 1, because uniforms use 0 and cannot by dynamically * indexed. All other cases should either fall into static indexing (first early return), * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or * load_vulkan_descriptor handle creation. */ unsigned base_binding = 0; if (ctx->opts->environment == DXIL_ENVIRONMENT_GL && class == DXIL_RESOURCE_CLASS_CBV) base_binding = 1; const struct dxil_value *handle = emit_createhandle_call(ctx, class, get_resource_id(ctx, class, space, base_binding), value, !const_block_index); if (handle_entry) *handle_entry = handle; return handle; } static bool emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV; if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0])); if (var && var->data.access & ACCESS_NON_WRITEABLE) class = DXIL_RESOURCE_CLASS_SRV; } const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER); const struct dxil_value *offset = get_src(ctx, &intr->src[1], 0, nir_type_uint); if (!int32_undef || !handle || !offset) return false; assert(nir_src_bit_size(intr->src[0]) == 32); assert(nir_intrinsic_dest_components(intr) <= 4); const struct dxil_value *coord[2] = { offset, int32_undef }; const struct dxil_value *load = emit_bufferload_call(ctx, handle, coord, DXIL_I32); if (!load) return false; for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) { const struct dxil_value *val = dxil_emit_extractval(&ctx->mod, load, i); if (!val) return false; store_dest_value(ctx, &intr->dest, i, val); } return true; } static bool emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER); const struct dxil_value *offset = get_src(ctx, &intr->src[2], 0, nir_type_uint); if (!handle || !offset) return false; assert(nir_src_bit_size(intr->src[0]) == 32); unsigned num_components = nir_src_num_components(intr->src[0]); assert(num_components <= 4); const struct dxil_value *value[4]; for (unsigned i = 0; i < num_components; ++i) { value[i] = get_src(ctx, &intr->src[0], i, nir_type_uint); if (!value[i]) return false; } const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); if (!int32_undef) return false; const struct dxil_value *coord[2] = { offset, int32_undef }; for (int i = num_components; i < 4; ++i) value[i] = int32_undef; const struct dxil_value *write_mask = dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1); if (!write_mask) return false; return emit_bufferstore_call(ctx, handle, coord, value, write_mask, DXIL_I32); } static bool emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_uint); const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint); const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[2], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER); const struct dxil_value *offset = get_src(ctx, &intr->src[3], 0, nir_type_uint); if (!value || !mask || !handle || !offset) return false; const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); if (!int32_undef) return false; const struct dxil_value *coord[3] = { offset, int32_undef, int32_undef }; return emit_atomic_binop(ctx, handle, DXIL_ATOMIC_AND, coord, mask) != NULL && emit_atomic_binop(ctx, handle, DXIL_ATOMIC_OR, coord, value) != NULL; } static bool emit_store_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *zero, *index; /* All shared mem accesses should have been lowered to scalar 32bit * accesses. */ assert(nir_src_bit_size(intr->src[0]) == 32); assert(nir_src_num_components(intr->src[0]) == 1); zero = dxil_module_get_int32_const(&ctx->mod, 0); if (!zero) return false; if (intr->intrinsic == nir_intrinsic_store_shared_dxil) index = get_src(ctx, &intr->src[1], 0, nir_type_uint); else index = get_src(ctx, &intr->src[2], 0, nir_type_uint); if (!index) return false; const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; const struct dxil_value *ptr, *value; ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); if (!ptr) return false; value = get_src(ctx, &intr->src[0], 0, nir_type_uint); if (!value) return false; if (intr->intrinsic == nir_intrinsic_store_shared_dxil) return dxil_emit_store(&ctx->mod, value, ptr, 4, false); const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint); if (!mask) return false; if (!dxil_emit_atomicrmw(&ctx->mod, mask, ptr, DXIL_RMWOP_AND, false, DXIL_ATOMIC_ORDERING_ACQREL, DXIL_SYNC_SCOPE_CROSSTHREAD)) return false; if (!dxil_emit_atomicrmw(&ctx->mod, value, ptr, DXIL_RMWOP_OR, false, DXIL_ATOMIC_ORDERING_ACQREL, DXIL_SYNC_SCOPE_CROSSTHREAD)) return false; return true; } static bool emit_store_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *zero, *index; /* All scratch mem accesses should have been lowered to scalar 32bit * accesses. */ assert(nir_src_bit_size(intr->src[0]) == 32); assert(nir_src_num_components(intr->src[0]) == 1); zero = dxil_module_get_int32_const(&ctx->mod, 0); if (!zero) return false; index = get_src(ctx, &intr->src[1], 0, nir_type_uint); if (!index) return false; const struct dxil_value *ops[] = { ctx->scratchvars, zero, index }; const struct dxil_value *ptr, *value; ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); if (!ptr) return false; value = get_src(ctx, &intr->src[0], 0, nir_type_uint); if (!value) return false; return dxil_emit_store(&ctx->mod, value, ptr, 4, false); } static bool emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER); if (!handle) return false; const struct dxil_value *offset; nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]); if (const_offset) { offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4); } else { const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint); const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4); if (!offset_src || !c4) return false; offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0); } const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_F32); if (!agg) return false; for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i); store_dest(ctx, &intr->dest, i, retval, nir_dest_bit_size(intr->dest) > 1 ? nir_type_float : nir_type_bool); } return true; } static bool emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr) { assert(nir_dest_num_components(intr->dest) <= 4); assert(nir_dest_bit_size(intr->dest) == 32); const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER); const struct dxil_value *offset = get_src(ctx, &intr->src[1], 0, nir_type_uint); if (!handle || !offset) return false; const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_I32); if (!agg) return false; for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++) store_dest_value(ctx, &intr->dest, i, dxil_emit_extractval(&ctx->mod, agg, i)); return true; } /* Need to add patch-ness as a matching parameter, since driver_location is *not* unique * between control points and patch variables in HS/DS */ static nir_variable * find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch) { nir_foreach_variable_with_modes(var, s, mode) { if (var->data.driver_location == driver_location && var->data.patch == patch) return var; } return NULL; } static bool emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr) { assert(intr->intrinsic == nir_intrinsic_store_output || ctx->mod.shader_kind == DXIL_HULL_SHADER); bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output && ctx->mod.shader_kind == DXIL_HULL_SHADER; nir_alu_type out_type = nir_intrinsic_src_type(intr); enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size); const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ? "dx.op.storePatchConstant" : "dx.op.storeOutput", overload); if (!func) return false; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ? DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT); const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr)); unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2; /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature * generation, so muck with them here too. */ nir_io_semantics semantics = nir_intrinsic_io_semantics(intr); bool is_tess_level = is_patch_constant && (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER || semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER); const struct dxil_value *row = NULL; const struct dxil_value *col = NULL; if (is_tess_level) col = dxil_module_get_int8_const(&ctx->mod, 0); else row = get_src(ctx, &intr->src[row_index], 0, nir_type_int); bool success = true; uint32_t writemask = nir_intrinsic_write_mask(intr); nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant); unsigned var_base_component = var->data.location_frac; unsigned base_component = nir_intrinsic_component(intr) - var_base_component; for (unsigned i = 0; i < intr->num_components && success; ++i) { if (writemask & (1 << i)) { if (is_tess_level) row = dxil_module_get_int32_const(&ctx->mod, i + base_component); else col = dxil_module_get_int8_const(&ctx->mod, i + base_component); const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type); if (!col || !row || !value) return false; const struct dxil_value *args[] = { opcode, output_id, row, col, value }; success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); } } /* Make sure all SV_Position components are written, otherwise the DXIL * validator complains. */ bool is_sv_pos = ctx->mod.shader_kind != DXIL_COMPUTE_SHADER && ctx->mod.shader_kind != DXIL_PIXEL_SHADER && var->data.location == VARYING_SLOT_POS; if (is_sv_pos) { const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32); const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type); unsigned pos_wrmask = writemask << base_component; for (unsigned i = 0; i < 4; ++i) { if (!(BITFIELD_BIT(i) & pos_wrmask)) { const struct dxil_value *args[] = { opcode, output_id, row, dxil_module_get_int8_const(&ctx->mod, i), float_undef, }; success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); } } } return success; } static bool emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr) { bool attr_at_vertex = false; if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER && ctx->opts->interpolate_at_vertex && ctx->opts->provoking_vertex != 0 && (nir_intrinsic_dest_type(intr) & nir_type_float)) { nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr)); attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT; } bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER && intr->intrinsic == nir_intrinsic_load_input) || (ctx->mod.shader_kind == DXIL_HULL_SHADER && intr->intrinsic == nir_intrinsic_load_output); bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output; unsigned opcode_val; const char *func_name; if (attr_at_vertex) { opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX; func_name = "dx.op.attributeAtVertex"; } else if (is_patch_constant) { opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT; func_name = "dx.op.loadPatchConstant"; } else if (is_output_control_point) { opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT; func_name = "dx.op.loadOutputControlPoint"; } else { opcode_val = DXIL_INTR_LOAD_INPUT; func_name = "dx.op.loadInput"; } const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val); if (!opcode) return false; const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, is_patch_constant || is_output_control_point ? nir_intrinsic_base(intr) : ctx->mod.input_mappings[nir_intrinsic_base(intr)]); if (!input_id) return false; bool is_per_vertex = intr->intrinsic == nir_intrinsic_load_per_vertex_input || intr->intrinsic == nir_intrinsic_load_per_vertex_output; int row_index = is_per_vertex ? 1 : 0; const struct dxil_value *vertex_id = NULL; if (!is_patch_constant) { if (is_per_vertex) { vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int); } else if (attr_at_vertex) { vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex); } else { const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); if (!int32_type) return false; vertex_id = dxil_module_get_undef(&ctx->mod, int32_type); } if (!vertex_id) return false; } /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature * generation, so muck with them here too. */ nir_io_semantics semantics = nir_intrinsic_io_semantics(intr); bool is_tess_level = is_patch_constant && (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER || semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER); const struct dxil_value *row = NULL; const struct dxil_value *comp = NULL; if (is_tess_level) comp = dxil_module_get_int8_const(&ctx->mod, 0); else row = get_src(ctx, &intr->src[row_index], 0, nir_type_int); nir_alu_type out_type = nir_intrinsic_dest_type(intr); enum overload_type overload = get_overload(out_type, intr->dest.ssa.bit_size); const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload); if (!func) return false; nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant); unsigned var_base_component = var ? var->data.location_frac : 0; unsigned base_component = nir_intrinsic_component(intr) - var_base_component; for (unsigned i = 0; i < intr->num_components; ++i) { if (is_tess_level) row = dxil_module_get_int32_const(&ctx->mod, i + base_component); else comp = dxil_module_get_int8_const(&ctx->mod, i + base_component); if (!row || !comp) return false; const struct dxil_value *args[] = { opcode, input_id, row, comp, vertex_id }; unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0); const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args); if (!retval) return false; store_dest(ctx, &intr->dest, i, retval, out_type); } return true; } static bool emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr) { nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]); const struct dxil_value *args[6] = { 0 }; unsigned opcode_val; const char *func_name; unsigned num_args; switch (barycentric->intrinsic) { case nir_intrinsic_load_barycentric_at_offset: opcode_val = DXIL_INTR_EVAL_SNAPPED; func_name = "dx.op.evalSnapped"; num_args = 6; for (unsigned i = 0; i < 2; ++i) { const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float); /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */ const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0); args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI, dxil_module_get_int_type(&ctx->mod, 32), offset_16); } break; case nir_intrinsic_load_barycentric_pixel: opcode_val = DXIL_INTR_EVAL_SNAPPED; func_name = "dx.op.evalSnapped"; num_args = 6; args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0); break; case nir_intrinsic_load_barycentric_at_sample: opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX; func_name = "dx.op.evalSampleIndex"; num_args = 5; args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int); break; case nir_intrinsic_load_barycentric_centroid: opcode_val = DXIL_INTR_EVAL_CENTROID; func_name = "dx.op.evalCentroid"; num_args = 4; break; default: unreachable("Unsupported interpolation barycentric intrinsic"); } args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val); args[1] = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr)); args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int); const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32); if (!func) return false; for (unsigned i = 0; i < intr->num_components; ++i) { args[3] = dxil_module_get_int8_const(&ctx->mod, i + nir_intrinsic_component(intr)); const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args); if (!retval) return false; store_dest(ctx, &intr->dest, i, retval, nir_type_float); } return true; } static bool emit_load_ptr(struct ntd_context *ctx, nir_intrinsic_instr *intr) { struct nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); const struct dxil_value *index = get_src(ctx, &intr->src[1], 0, nir_type_uint); if (!index) return false; const struct dxil_value *ptr = emit_gep_for_index(ctx, var, index); if (!ptr) return false; const struct dxil_value *retval = dxil_emit_load(&ctx->mod, ptr, 4, false); if (!retval) return false; store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); return true; } static bool emit_load_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *zero, *index; unsigned bit_size = nir_dest_bit_size(intr->dest); unsigned align = bit_size / 8; /* All shared mem accesses should have been lowered to scalar 32bit * accesses. */ assert(bit_size == 32); assert(nir_dest_num_components(intr->dest) == 1); zero = dxil_module_get_int32_const(&ctx->mod, 0); if (!zero) return false; index = get_src(ctx, &intr->src[0], 0, nir_type_uint); if (!index) return false; const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; const struct dxil_value *ptr, *retval; ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); if (!ptr) return false; retval = dxil_emit_load(&ctx->mod, ptr, align, false); if (!retval) return false; store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); return true; } static bool emit_load_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *zero, *index; unsigned bit_size = nir_dest_bit_size(intr->dest); unsigned align = bit_size / 8; /* All scratch mem accesses should have been lowered to scalar 32bit * accesses. */ assert(bit_size == 32); assert(nir_dest_num_components(intr->dest) == 1); zero = dxil_module_get_int32_const(&ctx->mod, 0); if (!zero) return false; index = get_src(ctx, &intr->src[0], 0, nir_type_uint); if (!index) return false; const struct dxil_value *ops[] = { ctx->scratchvars, zero, index }; const struct dxil_value *ptr, *retval; ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); if (!ptr) return false; retval = dxil_emit_load(&ctx->mod, ptr, align, false); if (!retval) return false; store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); return true; } static bool emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value) { const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD); if (!opcode) return false; const struct dxil_value *args[] = { opcode, value }; const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE); if (!func) return false; return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); } static bool emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool); if (!value) return false; return emit_discard_if_with_value(ctx, value); } static bool emit_discard(struct ntd_context *ctx) { const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true); return emit_discard_if_with_value(ctx, value); } static bool emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM); const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr)); if (!opcode || !stream_id) return false; const struct dxil_value *args[] = { opcode, stream_id }; const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE); if (!func) return false; return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); } static bool emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM); const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr)); if (!opcode || !stream_id) return false; const struct dxil_value *args[] = { opcode, stream_id }; const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE); if (!func) return false; return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); } static bool emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); if (!handle) return false; bool is_array = false; if (intr->intrinsic == nir_intrinsic_image_deref_store) is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type); else is_array = nir_intrinsic_image_array(intr); const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); if (!int32_undef) return false; const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_store ? nir_intrinsic_image_dim(intr) : glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type); unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); if (is_array) ++num_coords; assert(num_coords <= nir_src_num_components(intr->src[1])); for (unsigned i = 0; i < num_coords; ++i) { coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); if (!coord[i]) return false; } nir_alu_type in_type = nir_intrinsic_src_type(intr); enum overload_type overload = get_overload(in_type, 32); assert(nir_src_bit_size(intr->src[3]) == 32); unsigned num_components = nir_src_num_components(intr->src[3]); assert(num_components <= 4); const struct dxil_value *value[4]; for (unsigned i = 0; i < num_components; ++i) { value[i] = get_src(ctx, &intr->src[3], i, in_type); if (!value[i]) return false; } for (int i = num_components; i < 4; ++i) value[i] = int32_undef; const struct dxil_value *write_mask = dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1); if (!write_mask) return false; if (image_dim == GLSL_SAMPLER_DIM_BUF) { coord[1] = int32_undef; return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload); } else return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload); } static bool emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); if (!handle) return false; bool is_array = false; if (intr->intrinsic == nir_intrinsic_image_deref_load) is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type); else is_array = nir_intrinsic_image_array(intr); const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); if (!int32_undef) return false; const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_load ? nir_intrinsic_image_dim(intr) : glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type); unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); if (is_array) ++num_coords; assert(num_coords <= nir_src_num_components(intr->src[1])); for (unsigned i = 0; i < num_coords; ++i) { coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); if (!coord[i]) return false; } nir_alu_type out_type = nir_intrinsic_dest_type(intr); enum overload_type overload = get_overload(out_type, 32); const struct dxil_value *load_result; if (image_dim == GLSL_SAMPLER_DIM_BUF) { coord[1] = int32_undef; load_result = emit_bufferload_call(ctx, handle, coord, overload); } else load_result = emit_textureload_call(ctx, handle, coord, overload); if (!load_result) return false; assert(nir_dest_bit_size(intr->dest) == 32); unsigned num_components = nir_dest_num_components(intr->dest); assert(num_components <= 4); for (unsigned i = 0; i < num_components; ++i) { const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i); if (!component) return false; store_dest(ctx, &intr->dest, i, component, out_type); } /* FIXME: This flag should be set to true when the RWTexture is attached * a vector, and we always declare a vec4 right now, so it should always be * true. Might be worth reworking the dxil_module_get_res_type() to use a * scalar when the image only has one component. */ ctx->mod.feats.typed_uav_load_additional_formats = true; return true; } static bool emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr, enum dxil_atomic_op op, nir_alu_type type) { const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); if (!handle) return false; bool is_array = false; nir_deref_instr *src_as_deref = nir_src_as_deref(intr->src[0]); if (src_as_deref) is_array = glsl_sampler_type_is_array(src_as_deref->type); else is_array = nir_intrinsic_image_array(intr); const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); if (!int32_undef) return false; const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; enum glsl_sampler_dim image_dim = src_as_deref ? glsl_get_sampler_dim(src_as_deref->type) : nir_intrinsic_image_dim(intr); unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); if (is_array) ++num_coords; assert(num_coords <= nir_src_num_components(intr->src[1])); for (unsigned i = 0; i < num_coords; ++i) { coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); if (!coord[i]) return false; } const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type); if (!value) return false; const struct dxil_value *retval = emit_atomic_binop(ctx, handle, op, coord, value); if (!retval) return false; store_dest(ctx, &intr->dest, 0, retval, type); return true; } static bool emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); if (!handle) return false; bool is_array = false; if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type); else is_array = nir_intrinsic_image_array(intr); const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); if (!int32_undef) return false; const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_atomic_comp_swap ? nir_intrinsic_image_dim(intr) : glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type); unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); if (is_array) ++num_coords; assert(num_coords <= nir_src_num_components(intr->src[1])); for (unsigned i = 0; i < num_coords; ++i) { coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); if (!coord[i]) return false; } const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint); const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint); if (!cmpval || !newval) return false; const struct dxil_value *retval = emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval); if (!retval) return false; store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); return true; } struct texop_parameters { const struct dxil_value *tex; const struct dxil_value *sampler; const struct dxil_value *bias, *lod_or_sample, *min_lod; const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3]; const struct dxil_value *cmp; enum overload_type overload; }; static const struct dxil_value * emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE); if (!func) return false; const struct dxil_value *args[] = { dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE), params->tex, params->lod_or_sample }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static bool emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D); if (!handle) return false; const struct dxil_value *lod = get_src(ctx, &intr->src[1], 0, nir_type_uint); if (!lod) return false; struct texop_parameters params = { .tex = handle, .lod_or_sample = lod }; const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms); if (!dimensions) return false; for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i); store_dest(ctx, &intr->dest, i, retval, nir_type_uint); } return true; } static bool emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr) { enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV; if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0])); if (var && var->data.access & ACCESS_NON_WRITEABLE) class = DXIL_RESOURCE_CLASS_SRV; } const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER); if (!handle) return false; struct texop_parameters params = { .tex = handle, .lod_or_sample = dxil_module_get_undef( &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) }; const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms); if (!dimensions) return false; const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0); store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); return true; } static bool emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr, enum dxil_atomic_op op, nir_alu_type type) { const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER); const struct dxil_value *offset = get_src(ctx, &intr->src[1], 0, nir_type_uint); const struct dxil_value *value = get_src(ctx, &intr->src[2], 0, type); if (!value || !handle || !offset) return false; const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); if (!int32_undef) return false; const struct dxil_value *coord[3] = { offset, int32_undef, int32_undef }; const struct dxil_value *retval = emit_atomic_binop(ctx, handle, op, coord, value); if (!retval) return false; store_dest(ctx, &intr->dest, 0, retval, type); return true; } static bool emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER); const struct dxil_value *offset = get_src(ctx, &intr->src[1], 0, nir_type_uint); const struct dxil_value *cmpval = get_src(ctx, &intr->src[2], 0, nir_type_int); const struct dxil_value *newval = get_src(ctx, &intr->src[3], 0, nir_type_int); if (!cmpval || !newval || !handle || !offset) return false; const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); if (!int32_undef) return false; const struct dxil_value *coord[3] = { offset, int32_undef, int32_undef }; const struct dxil_value *retval = emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval); if (!retval) return false; store_dest(ctx, &intr->dest, 0, retval, nir_type_int); return true; } static bool emit_shared_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr, enum dxil_rmw_op op, nir_alu_type type) { const struct dxil_value *zero, *index; assert(nir_src_bit_size(intr->src[1]) == 32); zero = dxil_module_get_int32_const(&ctx->mod, 0); if (!zero) return false; index = get_src(ctx, &intr->src[0], 0, nir_type_uint); if (!index) return false; const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; const struct dxil_value *ptr, *value, *retval; ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); if (!ptr) return false; value = get_src(ctx, &intr->src[1], 0, type); if (!value) return false; retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, op, false, DXIL_ATOMIC_ORDERING_ACQREL, DXIL_SYNC_SCOPE_CROSSTHREAD); if (!retval) return false; store_dest(ctx, &intr->dest, 0, retval, type); return true; } static bool emit_shared_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *zero, *index; assert(nir_src_bit_size(intr->src[1]) == 32); zero = dxil_module_get_int32_const(&ctx->mod, 0); if (!zero) return false; index = get_src(ctx, &intr->src[0], 0, nir_type_uint); if (!index) return false; const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; const struct dxil_value *ptr, *cmpval, *newval, *retval; ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); if (!ptr) return false; cmpval = get_src(ctx, &intr->src[1], 0, nir_type_uint); newval = get_src(ctx, &intr->src[2], 0, nir_type_uint); if (!cmpval || !newval) return false; retval = dxil_emit_cmpxchg(&ctx->mod, cmpval, newval, ptr, false, DXIL_ATOMIC_ORDERING_ACQREL, DXIL_SYNC_SCOPE_CROSSTHREAD); if (!retval) return false; store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); return true; } static bool emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr) { unsigned int binding = nir_intrinsic_binding(intr); bool const_index = nir_src_is_const(intr->src[0]); if (const_index) { binding += nir_src_as_const_value(intr->src[0])->u32; } const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding); if (!index_value) return false; if (!const_index) { const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32); if (!offset) return false; index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0); if (!index_value) return false; } store_dest(ctx, &intr->dest, 0, index_value, nir_type_uint32); store_dest(ctx, &intr->dest, 1, dxil_module_get_int32_const(&ctx->mod, 0), nir_type_uint32); return true; } static bool emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr) { nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]); /* We currently do not support reindex */ assert(index && index->intrinsic == nir_intrinsic_vulkan_resource_index); unsigned binding = nir_intrinsic_binding(index); unsigned space = nir_intrinsic_desc_set(index); /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */ assert(space < 32); nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0])); const struct dxil_value *handle = NULL; enum dxil_resource_class resource_class; switch (nir_intrinsic_desc_type(intr)) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: resource_class = DXIL_RESOURCE_CLASS_CBV; break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: if (var->data.access & ACCESS_NON_WRITEABLE) resource_class = DXIL_RESOURCE_CLASS_SRV; else resource_class = DXIL_RESOURCE_CLASS_UAV; break; default: unreachable("unknown descriptor type"); return false; } const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32); if (!index_value) return false; handle = emit_createhandle_call(ctx, resource_class, get_resource_id(ctx, resource_class, space, binding), index_value, false); store_dest_value(ctx, &intr->dest, 0, handle); store_dest(ctx, &intr->dest, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32), nir_type_uint32); return true; } static bool emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE); if (!func) return false; const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION); if (!opcode) return false; const struct dxil_value *args[] = { opcode, get_src(ctx, &intr->src[0], 0, nir_type_uint32), }; if (!args[1]) return false; const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); if (!v) return false; for (unsigned i = 0; i < 2; ++i) { /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */ const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, dxil_emit_extractval(&ctx->mod, v, i), dxil_module_get_float_const(&ctx->mod, 0.5f), 0); store_dest(ctx, &intr->dest, i, coord, nir_type_float32); } return true; } static bool emit_load_layer_id(struct ntd_context *ctx, nir_intrinsic_instr *intr) { const struct dxil_value *layer_id = dxil_module_get_int32_const(&ctx->mod, 0); /* TODO: Properly implement this once multi-view is supported */ store_dest_value(ctx, &intr->dest, 0, layer_id); return true; } static bool emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr) { assert(ctx->mod.info.has_per_sample_input || intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample); if (ctx->mod.info.has_per_sample_input) return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX); store_dest_value(ctx, &intr->dest, 0, dxil_module_get_int32_const(&ctx->mod, 0)); return true; } static bool emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr) { switch (intr->intrinsic) { case nir_intrinsic_load_global_invocation_id: case nir_intrinsic_load_global_invocation_id_zero_base: return emit_load_global_invocation_id(ctx, intr); case nir_intrinsic_load_local_invocation_id: return emit_load_local_invocation_id(ctx, intr); case nir_intrinsic_load_local_invocation_index: return emit_load_local_invocation_index(ctx, intr); case nir_intrinsic_load_workgroup_id: case nir_intrinsic_load_workgroup_id_zero_base: return emit_load_local_workgroup_id(ctx, intr); case nir_intrinsic_load_ssbo: return emit_load_ssbo(ctx, intr); case nir_intrinsic_store_ssbo: return emit_store_ssbo(ctx, intr); case nir_intrinsic_store_ssbo_masked_dxil: return emit_store_ssbo_masked(ctx, intr); case nir_intrinsic_store_shared_dxil: case nir_intrinsic_store_shared_masked_dxil: return emit_store_shared(ctx, intr); case nir_intrinsic_store_scratch_dxil: return emit_store_scratch(ctx, intr); case nir_intrinsic_load_ptr_dxil: return emit_load_ptr(ctx, intr); case nir_intrinsic_load_ubo: return emit_load_ubo(ctx, intr); case nir_intrinsic_load_ubo_dxil: return emit_load_ubo_dxil(ctx, intr); case nir_intrinsic_load_primitive_id: return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID", DXIL_INTR_PRIMITIVE_ID); case nir_intrinsic_load_sample_id: case nir_intrinsic_load_sample_id_no_per_sample: return emit_load_sample_id(ctx, intr); case nir_intrinsic_load_invocation_id: switch (ctx->mod.shader_kind) { case DXIL_HULL_SHADER: return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID", DXIL_INTR_OUTPUT_CONTROL_POINT_ID); case DXIL_GEOMETRY_SHADER: return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID", DXIL_INTR_GS_INSTANCE_ID); default: unreachable("Unexpected shader kind for invocation ID"); } case nir_intrinsic_load_sample_mask_in: return emit_load_sample_mask_in(ctx, intr); case nir_intrinsic_load_tess_coord: return emit_load_tess_coord(ctx, intr); case nir_intrinsic_load_shared_dxil: return emit_load_shared(ctx, intr); case nir_intrinsic_load_scratch_dxil: return emit_load_scratch(ctx, intr); case nir_intrinsic_discard_if: case nir_intrinsic_demote_if: return emit_discard_if(ctx, intr); case nir_intrinsic_discard: case nir_intrinsic_demote: return emit_discard(ctx); case nir_intrinsic_emit_vertex: return emit_emit_vertex(ctx, intr); case nir_intrinsic_end_primitive: return emit_end_primitive(ctx, intr); case nir_intrinsic_scoped_barrier: return emit_barrier(ctx, intr); case nir_intrinsic_memory_barrier: case nir_intrinsic_memory_barrier_buffer: case nir_intrinsic_memory_barrier_image: case nir_intrinsic_memory_barrier_atomic_counter: return emit_memory_barrier(ctx, intr); case nir_intrinsic_memory_barrier_shared: return emit_memory_barrier_shared(ctx, intr); case nir_intrinsic_group_memory_barrier: return emit_group_memory_barrier(ctx, intr); case nir_intrinsic_control_barrier: return emit_control_barrier(ctx, intr); case nir_intrinsic_ssbo_atomic_add: return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int); case nir_intrinsic_ssbo_atomic_imin: return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int); case nir_intrinsic_ssbo_atomic_umin: return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint); case nir_intrinsic_ssbo_atomic_imax: return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int); case nir_intrinsic_ssbo_atomic_umax: return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMAX, nir_type_uint); case nir_intrinsic_ssbo_atomic_and: return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint); case nir_intrinsic_ssbo_atomic_or: return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint); case nir_intrinsic_ssbo_atomic_xor: return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint); case nir_intrinsic_ssbo_atomic_exchange: return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_int); case nir_intrinsic_ssbo_atomic_comp_swap: return emit_ssbo_atomic_comp_swap(ctx, intr); case nir_intrinsic_shared_atomic_add_dxil: return emit_shared_atomic(ctx, intr, DXIL_RMWOP_ADD, nir_type_int); case nir_intrinsic_shared_atomic_imin_dxil: return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MIN, nir_type_int); case nir_intrinsic_shared_atomic_umin_dxil: return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMIN, nir_type_uint); case nir_intrinsic_shared_atomic_imax_dxil: return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MAX, nir_type_int); case nir_intrinsic_shared_atomic_umax_dxil: return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMAX, nir_type_uint); case nir_intrinsic_shared_atomic_and_dxil: return emit_shared_atomic(ctx, intr, DXIL_RMWOP_AND, nir_type_uint); case nir_intrinsic_shared_atomic_or_dxil: return emit_shared_atomic(ctx, intr, DXIL_RMWOP_OR, nir_type_uint); case nir_intrinsic_shared_atomic_xor_dxil: return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XOR, nir_type_uint); case nir_intrinsic_shared_atomic_exchange_dxil: return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XCHG, nir_type_int); case nir_intrinsic_shared_atomic_comp_swap_dxil: return emit_shared_atomic_comp_swap(ctx, intr); case nir_intrinsic_image_deref_atomic_add: case nir_intrinsic_image_atomic_add: return emit_image_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int); case nir_intrinsic_image_deref_atomic_imin: case nir_intrinsic_image_atomic_imin: return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int); case nir_intrinsic_image_deref_atomic_umin: case nir_intrinsic_image_atomic_umin: return emit_image_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint); case nir_intrinsic_image_deref_atomic_imax: case nir_intrinsic_image_atomic_imax: return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int); case nir_intrinsic_image_deref_atomic_umax: case nir_intrinsic_image_atomic_umax: return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_uint); case nir_intrinsic_image_deref_atomic_and: case nir_intrinsic_image_atomic_and: return emit_image_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint); case nir_intrinsic_image_deref_atomic_or: case nir_intrinsic_image_atomic_or: return emit_image_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint); case nir_intrinsic_image_deref_atomic_xor: case nir_intrinsic_image_atomic_xor: return emit_image_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint); case nir_intrinsic_image_deref_atomic_exchange: case nir_intrinsic_image_atomic_exchange: return emit_image_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_uint); case nir_intrinsic_image_deref_atomic_comp_swap: case nir_intrinsic_image_atomic_comp_swap: return emit_image_atomic_comp_swap(ctx, intr); case nir_intrinsic_image_store: case nir_intrinsic_image_deref_store: return emit_image_store(ctx, intr); case nir_intrinsic_image_load: case nir_intrinsic_image_deref_load: return emit_image_load(ctx, intr); case nir_intrinsic_image_size: case nir_intrinsic_image_deref_size: return emit_image_size(ctx, intr); case nir_intrinsic_get_ssbo_size: return emit_get_ssbo_size(ctx, intr); case nir_intrinsic_load_input: case nir_intrinsic_load_per_vertex_input: case nir_intrinsic_load_output: case nir_intrinsic_load_per_vertex_output: return emit_load_input_via_intrinsic(ctx, intr); case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: return emit_store_output_via_intrinsic(ctx, intr); case nir_intrinsic_load_barycentric_at_offset: case nir_intrinsic_load_barycentric_at_sample: case nir_intrinsic_load_barycentric_centroid: case nir_intrinsic_load_barycentric_pixel: /* Emit nothing, we only support these as inputs to load_interpolated_input */ return true; case nir_intrinsic_load_interpolated_input: return emit_load_interpolated_input(ctx, intr); break; case nir_intrinsic_vulkan_resource_index: return emit_vulkan_resource_index(ctx, intr); case nir_intrinsic_load_vulkan_descriptor: return emit_load_vulkan_descriptor(ctx, intr); case nir_intrinsic_load_layer_id: return emit_load_layer_id(ctx, intr); case nir_intrinsic_load_sample_pos_from_id: return emit_load_sample_pos_from_id(ctx, intr); case nir_intrinsic_load_num_workgroups: case nir_intrinsic_load_workgroup_size: default: NIR_INSTR_UNSUPPORTED(&intr->instr); unreachable("Unimplemented intrinsic instruction"); return false; } } static bool emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const) { for (int i = 0; i < load_const->def.num_components; ++i) { const struct dxil_value *value; switch (load_const->def.bit_size) { case 1: value = dxil_module_get_int1_const(&ctx->mod, load_const->value[i].b); break; case 16: ctx->mod.feats.native_low_precision = true; value = dxil_module_get_int16_const(&ctx->mod, load_const->value[i].u16); break; case 32: value = dxil_module_get_int32_const(&ctx->mod, load_const->value[i].u32); break; case 64: ctx->mod.feats.int64_ops = true; value = dxil_module_get_int64_const(&ctx->mod, load_const->value[i].u64); break; default: unreachable("unexpected bit_size"); } if (!value) return false; store_ssa_def(ctx, &load_const->def, i, value); } return true; } static bool emit_deref(struct ntd_context* ctx, nir_deref_instr* instr) { assert(instr->deref_type == nir_deref_type_var || instr->deref_type == nir_deref_type_array); /* In the CL environment, there's nothing to emit. Any references to * derefs will emit the necessary logic to handle scratch/shared GEP addressing */ if (ctx->opts->environment == DXIL_ENVIRONMENT_CL) return true; /* In the Vulkan environment, we don't have cached handles for textures or * samplers, so let's use the opportunity of walking through the derefs to * emit those. */ nir_variable *var = nir_deref_instr_get_variable(instr); assert(var); if (!glsl_type_is_sampler(glsl_without_array(var->type)) && !glsl_type_is_image(glsl_without_array(var->type)) && !glsl_type_is_texture(glsl_without_array(var->type))) return true; const struct glsl_type *type = instr->type; const struct dxil_value *binding; unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ? var->data.driver_location : var->data.binding; if (instr->deref_type == nir_deref_type_var) { binding = dxil_module_get_int32_const(&ctx->mod, binding_val); } else { const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32); const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32); if (!base || !offset) return false; if (glsl_type_is_array(instr->type)) { offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset, dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0); if (!offset) return false; } binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0); } if (!binding) return false; /* Haven't finished chasing the deref chain yet, just store the value */ if (glsl_type_is_array(type)) { store_dest(ctx, &instr->dest, 0, binding, nir_type_uint32); return true; } assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type)); enum dxil_resource_class res_class; if (glsl_type_is_image(type)) { if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN && (var->data.access & ACCESS_NON_WRITEABLE)) res_class = DXIL_RESOURCE_CLASS_SRV; else res_class = DXIL_RESOURCE_CLASS_UAV; } else if (glsl_type_is_sampler(type)) { res_class = DXIL_RESOURCE_CLASS_SAMPLER; } else { res_class = DXIL_RESOURCE_CLASS_SRV; } unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ? var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0); const struct dxil_value *handle = emit_createhandle_call(ctx, res_class, get_resource_id(ctx, res_class, descriptor_set, binding_val), binding, false); if (!handle) return false; store_dest_value(ctx, &instr->dest, 0, handle); return true; } static bool emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond, int true_block, int false_block) { assert(cond); assert(true_block >= 0); assert(false_block >= 0); return dxil_emit_branch(&ctx->mod, cond, true_block, false_block); } static bool emit_branch(struct ntd_context *ctx, int block) { assert(block >= 0); return dxil_emit_branch(&ctx->mod, NULL, block, -1); } static bool emit_jump(struct ntd_context *ctx, nir_jump_instr *instr) { switch (instr->type) { case nir_jump_break: case nir_jump_continue: assert(instr->instr.block->successors[0]); assert(!instr->instr.block->successors[1]); return emit_branch(ctx, instr->instr.block->successors[0]->index); default: unreachable("Unsupported jump type\n"); } } struct phi_block { unsigned num_components; struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS]; }; static bool emit_phi(struct ntd_context *ctx, nir_phi_instr *instr) { unsigned bit_size = nir_dest_bit_size(instr->dest); const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size); struct phi_block *vphi = ralloc(ctx->phis, struct phi_block); vphi->num_components = nir_dest_num_components(instr->dest); for (unsigned i = 0; i < vphi->num_components; ++i) { struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type); if (!phi) return false; store_dest_value(ctx, &instr->dest, i, dxil_instr_get_return_value(phi)); } _mesa_hash_table_insert(ctx->phis, instr, vphi); return true; } static bool fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr, struct phi_block *vphi) { const struct dxil_value *values[16]; unsigned blocks[16]; for (unsigned i = 0; i < vphi->num_components; ++i) { size_t num_incoming = 0; nir_foreach_phi_src(src, instr) { assert(src->src.is_ssa); const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i); values[num_incoming] = val; blocks[num_incoming] = src->pred->index; ++num_incoming; if (num_incoming == ARRAY_SIZE(values)) { if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks, num_incoming)) return false; num_incoming = 0; } } if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values, blocks, num_incoming)) return false; } return true; } static unsigned get_n_src(struct ntd_context *ctx, const struct dxil_value **values, unsigned max_components, nir_tex_src *src, nir_alu_type type) { unsigned num_components = nir_src_num_components(src->src); unsigned i = 0; assert(num_components <= max_components); for (i = 0; i < num_components; ++i) { values[i] = get_src(ctx, &src->src, i, type); if (!values[i]) return 0; } return num_components; } #define PAD_SRC(ctx, array, components, undef) \ for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \ array[i] = undef; \ } static const struct dxil_value * emit_sample(struct ntd_context *ctx, struct texop_parameters *params) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload); if (!func) return NULL; const struct dxil_value *args[11] = { dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE), params->tex, params->sampler, params->coord[0], params->coord[1], params->coord[2], params->coord[3], params->offset[0], params->offset[1], params->offset[2], params->min_lod }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload); if (!func) return NULL; assert(params->bias != NULL); const struct dxil_value *args[12] = { dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS), params->tex, params->sampler, params->coord[0], params->coord[1], params->coord[2], params->coord[3], params->offset[0], params->offset[1], params->offset[2], params->bias, params->min_lod }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload); if (!func) return NULL; assert(params->lod_or_sample != NULL); const struct dxil_value *args[11] = { dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL), params->tex, params->sampler, params->coord[0], params->coord[1], params->coord[2], params->coord[3], params->offset[0], params->offset[1], params->offset[2], params->lod_or_sample }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params) { const struct dxil_func *func; enum dxil_intr opcode; int numparam; if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) { func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32); opcode = DXIL_INTR_SAMPLE_CMP; numparam = 12; } else { func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32); opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO; numparam = 11; } if (!func) return NULL; const struct dxil_value *args[12] = { dxil_module_get_int32_const(&ctx->mod, opcode), params->tex, params->sampler, params->coord[0], params->coord[1], params->coord[2], params->coord[3], params->offset[0], params->offset[1], params->offset[2], params->cmp, params->min_lod }; return dxil_emit_call(&ctx->mod, func, args, numparam); } static const struct dxil_value * emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload); if (!func) return false; const struct dxil_value *args[17] = { dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD), params->tex, params->sampler, params->coord[0], params->coord[1], params->coord[2], params->coord[3], params->offset[0], params->offset[1], params->offset[2], params->dx[0], params->dx[1], params->dx[2], params->dy[0], params->dy[1], params->dy[2], params->min_lod }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload); if (!func) return false; if (!params->lod_or_sample) params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)); const struct dxil_value *args[] = { dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD), params->tex, params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2], params->offset[0], params->offset[1], params->offset[2] }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped) { const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32); if (!func) return false; const struct dxil_value *args[] = { dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD), params->tex, params->sampler, params->coord[0], params->coord[1], params->coord[2], dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0) }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); } static const struct dxil_value * emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component) { const struct dxil_func *func = dxil_get_function(&ctx->mod, params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload); if (!func) return false; const struct dxil_value *args[] = { dxil_module_get_int32_const(&ctx->mod, params->cmp ? DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER), params->tex, params->sampler, params->coord[0], params->coord[1], params->coord[2], params->coord[3], params->offset[0], params->offset[1], dxil_module_get_int32_const(&ctx->mod, component), params->cmp }; return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1)); } static bool emit_tex(struct ntd_context *ctx, nir_tex_instr *instr) { struct texop_parameters params; memset(¶ms, 0, sizeof(struct texop_parameters)); if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) { params.tex = ctx->srv_handles[instr->texture_index]; params.sampler = ctx->sampler_handles[instr->sampler_index]; } const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32); const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32); const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type); const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type); unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0; params.overload = get_overload(instr->dest_type, 32); for (unsigned i = 0; i < instr->num_srcs; i++) { nir_alu_type type = nir_tex_instr_src_type(instr, i); switch (instr->src[i].src_type) { case nir_tex_src_coord: coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord), &instr->src[i], type); if (!coord_components) return false; break; case nir_tex_src_offset: offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset), &instr->src[i], nir_type_int); if (!offset_components) return false; break; case nir_tex_src_bias: assert(instr->op == nir_texop_txb); assert(nir_src_num_components(instr->src[i].src) == 1); params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float); if (!params.bias) return false; break; case nir_tex_src_lod: assert(nir_src_num_components(instr->src[i].src) == 1); if (instr->op == nir_texop_txf_ms) { assert(nir_src_as_int(instr->src[i].src) == 0); break; } /* Buffers don't have a LOD */ if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type); else params.lod_or_sample = int_undef; if (!params.lod_or_sample) return false; break; case nir_tex_src_min_lod: assert(nir_src_num_components(instr->src[i].src) == 1); params.min_lod = get_src(ctx, &instr->src[i].src, 0, type); if (!params.min_lod) return false; break; case nir_tex_src_comparator: assert(nir_src_num_components(instr->src[i].src) == 1); params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float); if (!params.cmp) return false; break; case nir_tex_src_ddx: dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx), &instr->src[i], nir_type_float); if (!dx_components) return false; break; case nir_tex_src_ddy: dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy), &instr->src[i], nir_type_float); if (!dy_components) return false; break; case nir_tex_src_ms_index: params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int); if (!params.lod_or_sample) return false; break; case nir_tex_src_texture_deref: assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN); params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0); break; case nir_tex_src_sampler_deref: assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN); params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0); break; case nir_tex_src_texture_offset: params.tex = emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_SRV, get_resource_id(ctx, DXIL_RESOURCE_CLASS_SRV, 0, instr->texture_index), dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, get_src_ssa(ctx, instr->src[i].src.ssa, 0), dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0), instr->texture_non_uniform); break; case nir_tex_src_sampler_offset: if (nir_tex_instr_need_sampler(instr)) { params.sampler = emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_SAMPLER, get_resource_id(ctx, DXIL_RESOURCE_CLASS_SAMPLER, 0, instr->sampler_index), dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, get_src_ssa(ctx, instr->src[i].src.ssa, 0), dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0), instr->sampler_non_uniform); } break; case nir_tex_src_projector: unreachable("Texture projector should have been lowered"); default: fprintf(stderr, "texture source: %d\n", instr->src[i].src_type); unreachable("unknown texture source"); } } assert(params.tex != NULL); assert(instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms || nir_tex_instr_is_query(instr) || params.sampler != NULL); PAD_SRC(ctx, params.coord, coord_components, float_undef); PAD_SRC(ctx, params.offset, offset_components, int_undef); if (!params.min_lod) params.min_lod = float_undef; const struct dxil_value *sample = NULL; switch (instr->op) { case nir_texop_txb: sample = emit_sample_bias(ctx, ¶ms); break; case nir_texop_tex: if (params.cmp != NULL) { sample = emit_sample_cmp(ctx, ¶ms); break; } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) { sample = emit_sample(ctx, ¶ms); break; } params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0); FALLTHROUGH; case nir_texop_txl: sample = emit_sample_level(ctx, ¶ms); break; case nir_texop_txd: PAD_SRC(ctx, params.dx, dx_components, float_undef); PAD_SRC(ctx, params.dy, dy_components,float_undef); sample = emit_sample_grad(ctx, ¶ms); break; case nir_texop_txf: case nir_texop_txf_ms: if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { params.coord[1] = int_undef; sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload); } else { PAD_SRC(ctx, params.coord, coord_components, int_undef); sample = emit_texel_fetch(ctx, ¶ms); } break; case nir_texop_txs: sample = emit_texture_size(ctx, ¶ms); break; case nir_texop_tg4: sample = emit_texture_gather(ctx, ¶ms, instr->component); break; case nir_texop_lod: sample = emit_texture_lod(ctx, ¶ms, true); store_dest(ctx, &instr->dest, 0, sample, nir_alu_type_get_base_type(instr->dest_type)); sample = emit_texture_lod(ctx, ¶ms, false); store_dest(ctx, &instr->dest, 1, sample, nir_alu_type_get_base_type(instr->dest_type)); return true; case nir_texop_query_levels: params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32); sample = emit_texture_size(ctx, ¶ms); const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3); store_dest(ctx, &instr->dest, 0, retval, nir_alu_type_get_base_type(instr->dest_type)); return true; default: fprintf(stderr, "texture op: %d\n", instr->op); unreachable("unknown texture op"); } if (!sample) return false; for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i); store_dest(ctx, &instr->dest, i, retval, nir_alu_type_get_base_type(instr->dest_type)); } return true; } static bool emit_undefined(struct ntd_context *ctx, nir_ssa_undef_instr *undef) { for (unsigned i = 0; i < undef->def.num_components; ++i) store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0)); return true; } static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr) { switch (instr->type) { case nir_instr_type_alu: return emit_alu(ctx, nir_instr_as_alu(instr)); case nir_instr_type_intrinsic: return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); case nir_instr_type_load_const: return emit_load_const(ctx, nir_instr_as_load_const(instr)); case nir_instr_type_deref: return emit_deref(ctx, nir_instr_as_deref(instr)); case nir_instr_type_jump: return emit_jump(ctx, nir_instr_as_jump(instr)); case nir_instr_type_phi: return emit_phi(ctx, nir_instr_as_phi(instr)); case nir_instr_type_tex: return emit_tex(ctx, nir_instr_as_tex(instr)); case nir_instr_type_ssa_undef: return emit_undefined(ctx, nir_instr_as_ssa_undef(instr)); default: NIR_INSTR_UNSUPPORTED(instr); unreachable("Unimplemented instruction type"); return false; } } static bool emit_block(struct ntd_context *ctx, struct nir_block *block) { assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids); ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block; nir_foreach_instr(instr, block) { TRACE_CONVERSION(instr); if (!emit_instr(ctx, instr)) { return false; } } return true; } static bool emit_cf_list(struct ntd_context *ctx, struct exec_list *list); static bool emit_if(struct ntd_context *ctx, struct nir_if *if_stmt) { assert(nir_src_num_components(if_stmt->condition) == 1); const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0, nir_type_bool); if (!cond) return false; /* prepare blocks */ nir_block *then_block = nir_if_first_then_block(if_stmt); assert(nir_if_last_then_block(if_stmt)->successors[0]); assert(!nir_if_last_then_block(if_stmt)->successors[1]); int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index; nir_block *else_block = NULL; int else_succ = -1; if (!exec_list_is_empty(&if_stmt->else_list)) { else_block = nir_if_first_else_block(if_stmt); assert(nir_if_last_else_block(if_stmt)->successors[0]); assert(!nir_if_last_else_block(if_stmt)->successors[1]); else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index; } if (!emit_cond_branch(ctx, cond, then_block->index, else_block ? else_block->index : then_succ)) return false; /* handle then-block */ if (!emit_cf_list(ctx, &if_stmt->then_list) || (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) && !emit_branch(ctx, then_succ))) return false; if (else_block) { /* handle else-block */ if (!emit_cf_list(ctx, &if_stmt->else_list) || (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) && !emit_branch(ctx, else_succ))) return false; } return true; } static bool emit_loop(struct ntd_context *ctx, nir_loop *loop) { nir_block *first_block = nir_loop_first_block(loop); assert(nir_loop_last_block(loop)->successors[0]); assert(!nir_loop_last_block(loop)->successors[1]); if (!emit_branch(ctx, first_block->index)) return false; if (!emit_cf_list(ctx, &loop->body)) return false; if (!emit_branch(ctx, first_block->index)) return false; return true; } static bool emit_cf_list(struct ntd_context *ctx, struct exec_list *list) { foreach_list_typed(nir_cf_node, node, node, list) { switch (node->type) { case nir_cf_node_block: if (!emit_block(ctx, nir_cf_node_as_block(node))) return false; break; case nir_cf_node_if: if (!emit_if(ctx, nir_cf_node_as_if(node))) return false; break; case nir_cf_node_loop: if (!emit_loop(ctx, nir_cf_node_as_loop(node))) return false; break; default: unreachable("unsupported cf-list node"); break; } } return true; } static void insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var) { nir_foreach_variable_in_list(var, var_list) { if (var->data.binding > new_var->data.binding) { exec_node_insert_node_before(&var->node, &new_var->node); return; } } exec_list_push_tail(var_list, &new_var->node); } static void sort_uniforms_by_binding_and_remove_structs(nir_shader *s) { struct exec_list new_list; exec_list_make_empty(&new_list); nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) { exec_node_remove(&var->node); const struct glsl_type *type = glsl_without_array(var->type); if (!glsl_type_is_struct(type)) insert_sorted_by_binding(&new_list, var); } exec_list_append(&s->variables, &new_list); } static void prepare_phi_values(struct ntd_context *ctx, nir_function_impl *impl) { /* PHI nodes are difficult to get right when tracking the types: * Since the incoming sources are linked to blocks, we can't bitcast * on the fly while loading. So scan the shader and insert a typed dummy * value for each phi source, and when storing we convert if the incoming * value has a different type then the one expected by the phi node. * We choose int as default, because it supports more bit sizes. */ nir_foreach_block(block, impl) { nir_foreach_instr(instr, block) { if (instr->type == nir_instr_type_phi) { nir_phi_instr *ir = nir_instr_as_phi(instr); unsigned bitsize = nir_dest_bit_size(ir->dest); const struct dxil_value *dummy = dxil_module_get_int_const(&ctx->mod, 0, bitsize); nir_foreach_phi_src(src, ir) { for(unsigned int i = 0; i < ir->dest.ssa.num_components; ++i) store_ssa_def(ctx, src->src.ssa, i, dummy); } } } } } static bool emit_cbvs(struct ntd_context *ctx) { if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) { nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) { if (!emit_ubo_var(ctx, var)) return false; } } else { if (ctx->shader->info.num_ubos) { const unsigned ubo_size = 16384 /*4096 vec4's*/; bool has_ubo0 = !ctx->opts->no_ubo0; bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed; unsigned ubo1_array_size = ctx->shader->info.num_ubos - (has_state_vars ? 2 : 1); if (has_ubo0 && !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms")) return false; if (ubo1_array_size && !emit_cbv(ctx, 1, 0, ubo_size, ubo1_array_size, "__ubos")) return false; if (has_state_vars && !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars")) return false; } } return true; } static bool emit_scratch(struct ntd_context *ctx) { if (ctx->shader->scratch_size) { /* * We always allocate an u32 array, no matter the actual variable types. * According to the DXIL spec, the minimum load/store granularity is * 32-bit, anything smaller requires using a read-extract/read-write-modify * approach. */ unsigned size = ALIGN_POT(ctx->shader->scratch_size, sizeof(uint32_t)); const struct dxil_type *int32 = dxil_module_get_int_type(&ctx->mod, 32); const struct dxil_value *array_length = dxil_module_get_int32_const(&ctx->mod, size / sizeof(uint32_t)); if (!int32 || !array_length) return false; const struct dxil_type *type = dxil_module_get_array_type( &ctx->mod, int32, size / sizeof(uint32_t)); if (!type) return false; ctx->scratchvars = dxil_emit_alloca(&ctx->mod, type, int32, array_length, 4); if (!ctx->scratchvars) return false; } return true; } /* The validator complains if we don't have ops that reference a global variable. */ static bool shader_has_shared_ops(struct nir_shader *s) { nir_foreach_function(func, s) { if (!func->impl) continue; nir_foreach_block(block, func->impl) { nir_foreach_instr(instr, block) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { case nir_intrinsic_load_shared_dxil: case nir_intrinsic_store_shared_dxil: case nir_intrinsic_shared_atomic_add_dxil: case nir_intrinsic_shared_atomic_and_dxil: case nir_intrinsic_shared_atomic_comp_swap_dxil: case nir_intrinsic_shared_atomic_exchange_dxil: case nir_intrinsic_shared_atomic_imax_dxil: case nir_intrinsic_shared_atomic_imin_dxil: case nir_intrinsic_shared_atomic_or_dxil: case nir_intrinsic_shared_atomic_umax_dxil: case nir_intrinsic_shared_atomic_umin_dxil: case nir_intrinsic_shared_atomic_xor_dxil: return true; default: break; } } } } return false; } static bool emit_function(struct ntd_context *ctx, nir_function *func) { assert(func->num_params == 0); nir_function_impl *impl = func->impl; if (!impl) return true; nir_metadata_require(impl, nir_metadata_block_index); const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod); const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0); struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks); if (!func_def) return false; if (func->is_entrypoint) ctx->main_func_def = func_def; else if (func == ctx->tess_ctrl_patch_constant_func) ctx->tess_ctrl_patch_constant_func_def = func_def; ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc); if (!ctx->defs) return false; ctx->num_defs = impl->ssa_alloc; ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx); if (!ctx->phis) return false; prepare_phi_values(ctx, impl); if (!emit_scratch(ctx)) return false; if (!emit_static_indexing_handles(ctx)) return false; if (!emit_cf_list(ctx, &impl->body)) return false; hash_table_foreach(ctx->phis, entry) { if (!fixup_phi(ctx, (nir_phi_instr *)entry->key, (struct phi_block *)entry->data)) return false; } if (!dxil_emit_ret_void(&ctx->mod)) return false; ralloc_free(ctx->defs); ctx->defs = NULL; _mesa_hash_table_destroy(ctx->phis, NULL); return true; } static bool emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts) { /* The validator forces us to emit resources in a specific order: * CBVs, Samplers, SRVs, UAVs. While we are at it also remove * stale struct uniforms, they are lowered but might not have been removed */ sort_uniforms_by_binding_and_remove_structs(ctx->shader); /* CBVs */ if (!emit_cbvs(ctx)) return false; /* Samplers */ nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) { unsigned count = glsl_type_get_sampler_count(var->type); assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type))); if (count > 0 && !emit_sampler(ctx, var, count)) return false; } /* SRVs */ nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) { if (glsl_type_is_texture(glsl_without_array(var->type)) && !emit_srv(ctx, var, glsl_type_get_texture_count(var->type))) return false; } if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { nir_foreach_image_variable(var, ctx->shader) { if ((var->data.access & ACCESS_NON_WRITEABLE) && !emit_srv(ctx, var, glsl_type_get_image_count(var->type))) return false; } } /* Handle read-only SSBOs as SRVs */ if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) { if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) { unsigned count = 1; if (glsl_type_is_array(var->type)) count = glsl_get_length(var->type); if (!emit_srv(ctx, var, count)) return false; } } } if (ctx->shader->info.shared_size && shader_has_shared_ops(ctx->shader)) { const struct dxil_type *type; unsigned size; /* * We always allocate an u32 array, no matter the actual variable types. * According to the DXIL spec, the minimum load/store granularity is * 32-bit, anything smaller requires using a read-extract/read-write-modify * approach. Non-atomic 64-bit accesses are allowed, but the * GEP(cast(gvar, u64[] *), offset) and cast(GEP(gvar, offset), u64 *)) * sequences don't seem to be accepted by the DXIL validator when the * pointer is in the groupshared address space, making the 32-bit -> 64-bit * pointer cast impossible. */ size = ALIGN_POT(ctx->shader->info.shared_size, sizeof(uint32_t)); type = dxil_module_get_array_type(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32), size / sizeof(uint32_t)); ctx->sharedvars = dxil_add_global_ptr_var(&ctx->mod, "shared", type, DXIL_AS_GROUPSHARED, ffs(sizeof(uint64_t)), NULL); } /* UAVs */ if (ctx->shader->info.stage == MESA_SHADER_KERNEL) { if (!emit_globals(ctx, opts->num_kernel_globals)) return false; ctx->consts = _mesa_pointer_hash_table_create(ctx->ralloc_ctx); if (!ctx->consts) return false; if (!emit_global_consts(ctx)) return false; } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) { /* Handle read/write SSBOs as UAVs */ nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) { if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) { unsigned count = 1; if (glsl_type_is_array(var->type)) count = glsl_get_length(var->type); if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set, count, DXIL_COMP_TYPE_INVALID, DXIL_RESOURCE_KIND_RAW_BUFFER, var->name)) return false; } } } else { for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) { char name[64]; snprintf(name, sizeof(name), "__ssbo%d", i); if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID, DXIL_RESOURCE_KIND_RAW_BUFFER, name)) return false; } /* To work around a WARP bug, bind these descriptors a second time in descriptor * space 2. Space 0 will be used for static indexing, while space 2 will be used * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while * space 2 will be a single array. */ if (ctx->shader->info.num_ssbos && !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID, DXIL_RESOURCE_KIND_RAW_BUFFER, "__ssbo_dynamic")) return false; } nir_foreach_image_variable(var, ctx->shader) { if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN && var && (var->data.access & ACCESS_NON_WRITEABLE)) continue; // already handled in SRV if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type))) return false; } ctx->mod.info.has_per_sample_input = BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID); if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) { nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) { if (var->data.sample) { ctx->mod.info.has_per_sample_input = true; break; } } } unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ? ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size; const struct dxil_mdnode *signatures = get_signatures(&ctx->mod, ctx->shader, input_clip_size); nir_foreach_function(func, ctx->shader) { if (!emit_function(ctx, func)) return false; } if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) { nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) { if (var->data.location == FRAG_RESULT_STENCIL) { ctx->mod.feats.stencil_ref = true; } } } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX || ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) { if (ctx->shader->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER)) ctx->mod.feats.array_layer_from_vs_or_ds = true; } if (ctx->mod.feats.native_low_precision) ctx->mod.minor_version = MAX2(ctx->mod.minor_version, 2); return emit_metadata(ctx, signatures) && dxil_emit_module(&ctx->mod); } static unsigned int get_dxil_shader_kind(struct nir_shader *s) { switch (s->info.stage) { case MESA_SHADER_VERTEX: return DXIL_VERTEX_SHADER; case MESA_SHADER_TESS_CTRL: return DXIL_HULL_SHADER; case MESA_SHADER_TESS_EVAL: return DXIL_DOMAIN_SHADER; case MESA_SHADER_GEOMETRY: return DXIL_GEOMETRY_SHADER; case MESA_SHADER_FRAGMENT: return DXIL_PIXEL_SHADER; case MESA_SHADER_KERNEL: case MESA_SHADER_COMPUTE: return DXIL_COMPUTE_SHADER; default: unreachable("unknown shader stage in nir_to_dxil"); return DXIL_COMPUTE_SHADER; } } static unsigned lower_bit_size_callback(const nir_instr* instr, void *data) { if (instr->type != nir_instr_type_alu) return 0; const nir_alu_instr *alu = nir_instr_as_alu(instr); if (nir_op_infos[alu->op].is_conversion) return 0; unsigned num_inputs = nir_op_infos[alu->op].num_inputs; const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data; unsigned min_bit_size = opts->lower_int16 ? 32 : 16; unsigned ret = 0; for (unsigned i = 0; i < num_inputs; i++) { unsigned bit_size = nir_src_bit_size(alu->src[i].src); if (bit_size != 1 && bit_size < min_bit_size) ret = min_bit_size; } return ret; } static void optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts) { bool progress; do { progress = false; NIR_PASS_V(s, nir_lower_vars_to_ssa); NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, UINT32_MAX); NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS(progress, s, nir_copy_prop); NIR_PASS(progress, s, nir_opt_copy_prop_vars); NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts); NIR_PASS(progress, s, dxil_nir_lower_8bit_conv); if (opts->lower_int16) NIR_PASS(progress, s, dxil_nir_lower_16bit_conv); NIR_PASS(progress, s, nir_opt_remove_phis); NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_if, true); NIR_PASS(progress, s, nir_opt_dead_cf); NIR_PASS(progress, s, nir_opt_cse); NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, dxil_nir_lower_x2b); if (s->options->lower_int64_options) NIR_PASS(progress, s, nir_lower_int64); NIR_PASS(progress, s, nir_lower_alu); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); NIR_PASS(progress, s, nir_lower_undef_to_zero); NIR_PASS(progress, s, nir_opt_deref); NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16); NIR_PASS(progress, s, nir_lower_64bit_phis); NIR_PASS_V(s, nir_lower_system_values); } while (progress); do { progress = false; NIR_PASS(progress, s, nir_opt_algebraic_late); } while (progress); } static void dxil_fill_validation_state(struct ntd_context *ctx, struct dxil_validation_state *state) { state->num_resources = util_dynarray_num_elements(&ctx->resources, struct dxil_resource); state->resources = (struct dxil_resource*)ctx->resources.data; state->state.psv0.max_expected_wave_lane_count = UINT_MAX; state->state.shader_stage = (uint8_t)ctx->mod.shader_kind; state->state.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs; state->state.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs; state->state.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts; switch (ctx->mod.shader_kind) { case DXIL_VERTEX_SHADER: state->state.psv0.vs.output_position_present = ctx->mod.info.has_out_position; break; case DXIL_PIXEL_SHADER: /* TODO: handle depth outputs */ state->state.psv0.ps.depth_output = ctx->mod.info.has_out_depth; state->state.psv0.ps.sample_frequency = ctx->mod.info.has_per_sample_input; break; case DXIL_COMPUTE_SHADER: break; case DXIL_GEOMETRY_SHADER: state->state.max_vertex_count = ctx->shader->info.gs.vertices_out; state->state.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive); state->state.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive); state->state.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1); state->state.psv0.gs.output_position_present = ctx->mod.info.has_out_position; break; case DXIL_HULL_SHADER: state->state.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count; state->state.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out; state->state.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode); state->state.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info); state->state.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts; break; case DXIL_DOMAIN_SHADER: state->state.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out; state->state.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode); state->state.psv0.ds.output_position_present = ctx->mod.info.has_out_position; state->state.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts; break; default: assert(0 && "Shader type not (yet) supported"); } } static nir_variable * add_sysvalue(struct ntd_context *ctx, uint8_t value, char *name, int driver_location) { nir_variable *var = rzalloc(ctx->shader, nir_variable); if (!var) return NULL; var->data.driver_location = driver_location; var->data.location = value; var->type = glsl_uint_type(); var->name = name; var->data.mode = nir_var_system_value; var->data.interpolation = INTERP_MODE_FLAT; return var; } static bool append_input_or_sysvalue(struct ntd_context *ctx, int input_loc, int sv_slot, char *name, int driver_location) { if (input_loc >= 0) { /* Check inputs whether a variable is available the corresponds * to the sysvalue */ nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) { if (var->data.location == input_loc) { ctx->system_value[sv_slot] = var; return true; } } } ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location); if (!ctx->system_value[sv_slot]) return false; nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]); return true; } struct sysvalue_name { gl_system_value value; int slot; char *name; gl_shader_stage only_in_shader; } possible_sysvalues[] = { {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE}, {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE}, {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE}, {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY}, {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE}, }; static bool allocate_sysvalues(struct ntd_context *ctx) { unsigned driver_location = 0; nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) driver_location++; nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value) driver_location++; if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT && ctx->shader->info.inputs_read && !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) { bool need_sample_id = true; /* "var->data.sample = true" sometimes just mean, "I want per-sample * shading", which explains why we can end up with vars having flat * interpolation with the per-sample bit set. If there's only such * type of variables, we need to tell DXIL that we read SV_SampleIndex * to make DXIL validation happy. */ nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) { if (!var->data.sample || var->data.interpolation != INTERP_MODE_FLAT) { need_sample_id = false; break; } } if (need_sample_id) BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID); } for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) { struct sysvalue_name *info = &possible_sysvalues[i]; if (info->only_in_shader != MESA_SHADER_NONE && info->only_in_shader != ctx->shader->info.stage) continue; if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) { if (!append_input_or_sysvalue(ctx, info->slot, info->value, info->name, driver_location++)) return false; } } return true; } static int type_size_vec4(const struct glsl_type *type, bool bindless) { return glsl_count_attribute_slots(type, false); } bool nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts, struct blob *blob) { assert(opts); bool retval = true; debug_dxil = (int)debug_get_option_debug_dxil(); blob_init(blob); struct ntd_context *ctx = calloc(1, sizeof(*ctx)); if (!ctx) return false; ctx->opts = opts; ctx->shader = s; ctx->ralloc_ctx = ralloc_context(NULL); if (!ctx->ralloc_ctx) { retval = false; goto out; } util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx); util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx); util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx); util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx); util_dynarray_init(&ctx->resources, ctx->ralloc_ctx); dxil_module_init(&ctx->mod, ctx->ralloc_ctx); ctx->mod.shader_kind = get_dxil_shader_kind(s); ctx->mod.major_version = 6; ctx->mod.minor_version = 1; if (s->info.stage <= MESA_SHADER_FRAGMENT) { uint64_t in_mask = s->info.stage == MESA_SHADER_VERTEX ? 0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER); uint64_t out_mask = s->info.stage == MESA_SHADER_FRAGMENT ? ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER); NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask); } NIR_PASS_V(s, dxil_nir_lower_fquantize2f16); NIR_PASS_V(s, nir_lower_frexp); NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true); NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32); NIR_PASS_V(s, nir_lower_pack); NIR_PASS_V(s, dxil_nir_lower_system_values); if (ctx->mod.shader_kind == DXIL_HULL_SHADER) NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func); if (ctx->mod.shader_kind == DXIL_HULL_SHADER || ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) { /* Make sure any derefs are gone after lower_io before updating tess level vars */ NIR_PASS_V(s, nir_opt_dce); NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain); } optimize_nir(s, opts); NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp | nir_var_shader_temp, NULL); if (!allocate_sysvalues(ctx)) return false; NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value); NIR_PASS_V(s, nir_opt_dce); if (debug_dxil & DXIL_DEBUG_VERBOSE) nir_print_shader(s, stderr); if (!emit_module(ctx, opts)) { debug_printf("D3D12: dxil_container_add_module failed\n"); retval = false; goto out; } if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) { struct dxil_dumper *dumper = dxil_dump_create(); dxil_dump_module(dumper, &ctx->mod); fprintf(stderr, "\n"); dxil_dump_buf_to_file(dumper, stderr); fprintf(stderr, "\n\n"); dxil_dump_free(dumper); } struct dxil_container container; dxil_container_init(&container); if (!dxil_container_add_features(&container, &ctx->mod.feats)) { debug_printf("D3D12: dxil_container_add_features failed\n"); retval = false; goto out; } if (!dxil_container_add_io_signature(&container, DXIL_ISG1, ctx->mod.num_sig_inputs, ctx->mod.inputs)) { debug_printf("D3D12: failed to write input signature\n"); retval = false; goto out; } if (!dxil_container_add_io_signature(&container, DXIL_OSG1, ctx->mod.num_sig_outputs, ctx->mod.outputs)) { debug_printf("D3D12: failed to write output signature\n"); retval = false; goto out; } if ((ctx->mod.shader_kind == DXIL_HULL_SHADER || ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) && !dxil_container_add_io_signature(&container, DXIL_PSG1, ctx->mod.num_sig_patch_consts, ctx->mod.patch_consts)) { debug_printf("D3D12: failed to write patch constant signature\n"); retval = false; goto out; } struct dxil_validation_state validation_state; memset(&validation_state, 0, sizeof(validation_state)); dxil_fill_validation_state(ctx, &validation_state); if (!dxil_container_add_state_validation(&container,&ctx->mod, &validation_state)) { debug_printf("D3D12: failed to write state-validation\n"); retval = false; goto out; } if (!dxil_container_add_module(&container, &ctx->mod)) { debug_printf("D3D12: failed to write module\n"); retval = false; goto out; } if (!dxil_container_write(&container, blob)) { debug_printf("D3D12: dxil_container_write failed\n"); retval = false; goto out; } dxil_container_finish(&container); if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) { static int shader_id = 0; char buffer[64]; snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob", get_shader_kind_str(ctx->mod.shader_kind), shader_id++); debug_printf("Try to write blob to %s\n", buffer); FILE *f = fopen(buffer, "wb"); if (f) { fwrite(blob->data, 1, blob->size, f); fclose(f); } } out: dxil_module_release(&ctx->mod); ralloc_free(ctx->ralloc_ctx); free(ctx); return retval; } enum dxil_sysvalue_type nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask) { switch (var->data.location) { case VARYING_SLOT_FACE: return DXIL_GENERATED_SYSVALUE; case VARYING_SLOT_POS: case VARYING_SLOT_PRIMITIVE_ID: case VARYING_SLOT_CLIP_DIST0: case VARYING_SLOT_CLIP_DIST1: case VARYING_SLOT_PSIZ: case VARYING_SLOT_TESS_LEVEL_INNER: case VARYING_SLOT_TESS_LEVEL_OUTER: case VARYING_SLOT_VIEWPORT: case VARYING_SLOT_LAYER: if (!((1ull << var->data.location) & other_stage_mask)) return DXIL_SYSVALUE; FALLTHROUGH; default: return DXIL_NO_SYSVALUE; } }