microsoft: Add CLC frontend and kernel/compute support to DXIL converter

This adds a standalone library which can convert through the pipeline of
OpenCL C -> SPIR -> SPIR-V -> NIR -> DXIL. It can add in the libclc
implementations of various library functions in the NIR phase, and
also massages the NIR to shift it more towards graphics-style compute.

This is leveraged by the out-of-tree OpenCLOn12 runtime
(https://github.com/microsoft/OpenCLOn12).

This is the combination of a lot of commits from our development branch,
containing code by several authors.

Co-authored-by: Boris Brezillon <boris.brezillon@collabora.com>
Co-authored-by: Daniel Stone <daniels@collabora.com>
Co-authored-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7565>
This commit is contained in:
Jesse Natalie 2020-11-06 17:09:30 +01:00 committed by Marge Bot
parent 1885e356e6
commit ff05da7f8d
21 changed files with 9215 additions and 11 deletions

View File

@ -299,6 +299,26 @@ if with_aco_tests and not with_amd_vk
error('ACO tests require Radv')
endif
_microsoft_clc = get_option('microsoft-clc')
if _microsoft_clc == 'auto'
with_microsoft_clc = false
else
with_microsoft_clc = _microsoft_clc == 'true'
endif
if with_microsoft_clc
with_clc = true
dep_clang = dependency(
'clang',
method: 'cmake',
static: true,
modules: [
'clangBasic', 'clangCodeGen', 'clangDriver', 'clangFrontend', 'clangFrontendTool',
'clangHandleCXX', 'clangHandleLLVM',
],
)
endif
if host_machine.system() == 'darwin'
with_dri_platform = 'apple'
pre_args += '-DBUILDING_MESA'
@ -1470,8 +1490,13 @@ if with_gallium_opencl
'lto', 'option', 'objcarcopts', 'profiledata',
]
endif
if with_microsoft_clc
llvm_modules += ['target', 'linker', 'irreader', 'option', 'libdriver']
endif
if with_amd_vk or with_gallium_radeonsi or with_gallium_opencl
if with_microsoft_clc
_llvm_version = '>= 10.0.0'
elif with_amd_vk or with_gallium_radeonsi or with_gallium_opencl
_llvm_version = '>= 8.0.0'
elif with_gallium_swr
_llvm_version = '>= 6.0.0'
@ -1521,7 +1546,7 @@ if _llvm != 'disabled'
optional_modules : llvm_optional_modules,
required : (
with_amd_vk or with_gallium_radeonsi or with_gallium_swr or
with_gallium_opencl or _llvm == 'enabled'
with_gallium_opencl or with_microsoft_clc or _llvm == 'enabled'
),
static : not _shared_llvm,
method : _llvm_method,
@ -1564,9 +1589,11 @@ elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
error('The following drivers require LLVM: Radv, RadeonSI, SWR. One of these is enabled, but LLVM is disabled.')
elif with_gallium_opencl
error('The OpenCL "Clover" state tracker requires LLVM, but LLVM is disabled.')
elif with_microsoft_clc
error('The Microsoft CLC compiler requires LLVM, but LLVM is disabled.')
endif
with_opencl_spirv = _opencl != 'disabled' and get_option('opencl-spirv')
with_opencl_spirv = (_opencl != 'disabled' and get_option('opencl-spirv')) or with_microsoft_clc
if with_opencl_spirv
chosen_llvm_version_array = dep_llvm.version().split('.')
chosen_llvm_version_major = chosen_llvm_version_array[0].to_int()

View File

@ -261,6 +261,13 @@ option(
value : false,
description : 'Enable GLVND support.'
)
option(
'microsoft-clc',
type : 'combo',
value : 'auto',
choices : ['auto', 'true', 'false'],
description : 'Build support for the Microsoft CLC to DXIL compiler'
)
option(
'glx-read-only-text',
type : 'boolean',

View File

@ -946,9 +946,45 @@ load("global_ir3", [2, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN
intrinsic("bindless_resource_ir3", [1], dest_comp=1, indices=[DESC_SET], flags=[CAN_ELIMINATE, CAN_REORDER])
# DXIL specific intrinsics
# src[] = { value, mask, index, offset }.
intrinsic("store_ssbo_masked_dxil", [1, 1, 1, 1])
# src[] = { value, index }.
intrinsic("store_shared_dxil", [1, 1])
# src[] = { value, mask, index }.
intrinsic("store_shared_masked_dxil", [1, 1, 1])
# src[] = { value, index }.
intrinsic("store_scratch_dxil", [1, 1])
# src[] = { index }.
load("shared_dxil", [1], [], [CAN_ELIMINATE])
# src[] = { index }.
load("scratch_dxil", [1], [], [CAN_ELIMINATE])
# src[] = { deref_var, offset }
load("ptr_dxil", [1, 1], [], [])
# src[] = { index, 16-byte-based-offset }
load("ubo_dxil", [1, 1], [], [CAN_ELIMINATE])
# DXIL Shared atomic intrinsics
#
# All of the shared variable atomic memory operations read a value from
# memory, compute a new value using one of the operations below, write the
# new value to memory, and return the original value read.
#
# All operations take 2 sources:
#
# 0: The index in the i32 array for by the shared memory region
# 1: The data parameter to the atomic function (i.e. the value to add
# in shared_atomic_add, etc).
intrinsic("shared_atomic_add_dxil", src_comp=[1, 1], dest_comp=1)
intrinsic("shared_atomic_imin_dxil", src_comp=[1, 1], dest_comp=1)
intrinsic("shared_atomic_umin_dxil", src_comp=[1, 1], dest_comp=1)
intrinsic("shared_atomic_imax_dxil", src_comp=[1, 1], dest_comp=1)
intrinsic("shared_atomic_umax_dxil", src_comp=[1, 1], dest_comp=1)
intrinsic("shared_atomic_and_dxil", src_comp=[1, 1], dest_comp=1)
intrinsic("shared_atomic_or_dxil", src_comp=[1, 1], dest_comp=1)
intrinsic("shared_atomic_xor_dxil", src_comp=[1, 1], dest_comp=1)
intrinsic("shared_atomic_exchange_dxil", src_comp=[1, 1], dest_comp=1)
intrinsic("shared_atomic_comp_swap_dxil", src_comp=[1, 1, 1], dest_comp=1)
# Intrinsics used by the Midgard/Bifrost blend pipeline. These are defined
# within a blend shader to read/write the raw value from the tile buffer,
# without applying any format conversion in the process. If the shader needs

View File

@ -91,7 +91,7 @@ endif
if with_any_intel
subdir('intel')
endif
if with_gallium_d3d12
if with_microsoft_clc or with_gallium_d3d12
subdir('microsoft')
endif
subdir('mesa')

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,266 @@
/*
* Copyright © Microsoft Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef CLC_COMPILER_H
#define CLC_COMPILER_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stddef.h>
#include <stdint.h>
struct clc_named_value {
const char *name;
const char *value;
};
struct clc_compile_args {
const struct clc_named_value *headers;
unsigned num_headers;
struct clc_named_value source;
const char * const *args;
unsigned num_args;
};
struct clc_linker_args {
const struct clc_object * const *in_objs;
unsigned num_in_objs;
unsigned create_library;
};
typedef void (*clc_msg_callback)(void *priv, const char *msg);
struct clc_logger {
void *priv;
clc_msg_callback error;
clc_msg_callback warning;
};
struct spirv_binary {
uint32_t *data;
size_t size;
};
enum clc_kernel_arg_type_qualifier {
CLC_KERNEL_ARG_TYPE_CONST = 1 << 0,
CLC_KERNEL_ARG_TYPE_RESTRICT = 1 << 1,
CLC_KERNEL_ARG_TYPE_VOLATILE = 1 << 2,
};
enum clc_kernel_arg_access_qualifier {
CLC_KERNEL_ARG_ACCESS_READ = 1 << 0,
CLC_KERNEL_ARG_ACCESS_WRITE = 1 << 1,
};
enum clc_kernel_arg_address_qualifier {
CLC_KERNEL_ARG_ADDRESS_PRIVATE,
CLC_KERNEL_ARG_ADDRESS_CONSTANT,
CLC_KERNEL_ARG_ADDRESS_LOCAL,
CLC_KERNEL_ARG_ADDRESS_GLOBAL,
};
struct clc_kernel_arg {
const char *name;
const char *type_name;
unsigned type_qualifier;
unsigned access_qualifier;
enum clc_kernel_arg_address_qualifier address_qualifier;
};
enum clc_vec_hint_type {
CLC_VEC_HINT_TYPE_CHAR = 0,
CLC_VEC_HINT_TYPE_SHORT = 1,
CLC_VEC_HINT_TYPE_INT = 2,
CLC_VEC_HINT_TYPE_LONG = 3,
CLC_VEC_HINT_TYPE_HALF = 4,
CLC_VEC_HINT_TYPE_FLOAT = 5,
CLC_VEC_HINT_TYPE_DOUBLE = 6
};
struct clc_kernel_info {
const char *name;
size_t num_args;
const struct clc_kernel_arg *args;
unsigned vec_hint_size;
enum clc_vec_hint_type vec_hint_type;
};
struct clc_object {
struct spirv_binary spvbin;
const struct clc_kernel_info *kernels;
unsigned num_kernels;
};
#define CLC_MAX_CONSTS 32
#define CLC_MAX_BINDINGS_PER_ARG 3
#define CLC_MAX_SAMPLERS 16
struct clc_dxil_metadata {
struct {
unsigned offset;
unsigned size;
union {
struct {
unsigned buf_ids[CLC_MAX_BINDINGS_PER_ARG];
unsigned num_buf_ids;
} image;
struct {
unsigned sampler_id;
} sampler;
struct {
unsigned buf_id;
} globconstptr;
struct {
unsigned sharedmem_offset;
} localptr;
};
} *args;
unsigned kernel_inputs_cbv_id;
unsigned kernel_inputs_buf_size;
unsigned work_properties_cbv_id;
size_t num_uavs;
size_t num_srvs;
size_t num_samplers;
struct {
void *data;
size_t size;
unsigned uav_id;
} consts[CLC_MAX_CONSTS];
size_t num_consts;
struct {
unsigned sampler_id;
unsigned addressing_mode;
unsigned normalized_coords;
unsigned filter_mode;
} const_samplers[CLC_MAX_SAMPLERS];
size_t num_const_samplers;
size_t local_mem_size;
size_t priv_mem_size;
uint16_t local_size[3];
uint16_t local_size_hint[3];
int printf_uav_id;
};
struct clc_dxil_object {
const struct clc_kernel_info *kernel;
struct clc_dxil_metadata metadata;
struct {
void *data;
size_t size;
} binary;
};
struct clc_context {
const void *libclc_nir;
};
struct clc_context_options {
unsigned optimize;
};
struct clc_context *clc_context_new(const struct clc_logger *logger, const struct clc_context_options *options);
void clc_free_context(struct clc_context *ctx);
void clc_context_serialize(struct clc_context *ctx, void **serialized, size_t *size);
void clc_context_free_serialized(void *serialized);
struct clc_context *clc_context_deserialize(void *serialized, size_t size);
struct clc_object *
clc_compile(struct clc_context *ctx,
const struct clc_compile_args *args,
const struct clc_logger *logger);
struct clc_object *
clc_link(struct clc_context *ctx,
const struct clc_linker_args *args,
const struct clc_logger *logger);
void clc_free_object(struct clc_object *obj);
struct clc_runtime_arg_info {
union {
struct {
unsigned size;
} localptr;
struct {
unsigned normalized_coords;
unsigned addressing_mode; /* See SPIR-V spec for value meanings */
unsigned linear_filtering;
} sampler;
};
};
struct clc_runtime_kernel_conf {
uint16_t local_size[3];
struct clc_runtime_arg_info *args;
unsigned lower_bit_size;
unsigned support_global_work_id_offsets;
unsigned support_work_group_id_offsets;
};
struct clc_dxil_object *
clc_to_dxil(struct clc_context *ctx,
const struct clc_object *obj,
const char *entrypoint,
const struct clc_runtime_kernel_conf *conf,
const struct clc_logger *logger);
void clc_free_dxil_object(struct clc_dxil_object *dxil);
/* This struct describes the layout of data expected in the CB bound at global_work_offset_cbv_id */
struct clc_work_properties_data {
/* Returned from get_global_offset(), and added into get_global_id() */
unsigned global_offset_x;
unsigned global_offset_y;
unsigned global_offset_z;
/* Returned from get_work_dim() */
unsigned work_dim;
/* The number of work groups being launched (i.e. the parameters to Dispatch).
* If the requested global size doesn't fit in a single Dispatch, these values should
* indicate the total number of groups that *should* have been launched. */
unsigned group_count_total_x;
unsigned group_count_total_y;
unsigned group_count_total_z;
unsigned padding;
/* If the requested global size doesn't fit in a single Dispatch, subsequent dispatches
* should fill out these offsets to indicate how many groups have already been launched */
unsigned group_id_offset_x;
unsigned group_id_offset_y;
unsigned group_id_offset_z;
};
uint64_t clc_compiler_get_version();
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,811 @@
//
// Copyright 2012-2016 Francisco Jerez
// Copyright 2012-2016 Advanced Micro Devices, Inc.
// Copyright 2014-2016 Jan Vesely
// Copyright 2014-2015 Serge Martin
// Copyright 2015 Zoltan Gilian
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <sstream>
#include <llvm/ADT/ArrayRef.h>
#include <llvm/IR/DiagnosticPrinter.h>
#include <llvm/IR/DiagnosticInfo.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Type.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm-c/Core.h>
#include <llvm-c/Target.h>
#include <LLVMSPIRVLib/LLVMSPIRVLib.h>
#include <clang/CodeGen/CodeGenAction.h>
#include <clang/Lex/PreprocessorOptions.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Frontend/TextDiagnosticBuffer.h>
#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <clang/Basic/TargetInfo.h>
#include <spirv-tools/libspirv.hpp>
#include <spirv-tools/linker.hpp>
#include "util/macros.h"
#include "glsl_types.h"
#include "nir.h"
#include "nir_types.h"
#include "clc_helpers.h"
#include "spirv.h"
#include "opencl-c.h.h"
#include "opencl-c-base.h.h"
using ::llvm::Function;
using ::llvm::LLVMContext;
using ::llvm::Module;
using ::llvm::raw_string_ostream;
static void
llvm_log_handler(const ::llvm::DiagnosticInfo &di, void *data) {
raw_string_ostream os { *reinterpret_cast<std::string *>(data) };
::llvm::DiagnosticPrinterRawOStream printer { os };
di.print(printer);
}
class SPIRVKernelArg {
public:
SPIRVKernelArg(uint32_t id, uint32_t typeId) : id(id), typeId(typeId),
addrQualifier(CLC_KERNEL_ARG_ADDRESS_PRIVATE),
accessQualifier(0),
typeQualifier(0) { }
~SPIRVKernelArg() { }
uint32_t id;
uint32_t typeId;
std::string name;
std::string typeName;
enum clc_kernel_arg_address_qualifier addrQualifier;
unsigned accessQualifier;
unsigned typeQualifier;
};
class SPIRVKernelInfo {
public:
SPIRVKernelInfo(uint32_t fid, const char *nm) : funcId(fid), name(nm), vecHint(0) { }
~SPIRVKernelInfo() { }
uint32_t funcId;
std::string name;
std::vector<SPIRVKernelArg> args;
unsigned vecHint;
};
class SPIRVKernelParser {
public:
SPIRVKernelParser() : curKernel(NULL)
{
ctx = spvContextCreate(SPV_ENV_UNIVERSAL_1_0);
}
~SPIRVKernelParser()
{
spvContextDestroy(ctx);
}
void parseEntryPoint(const spv_parsed_instruction_t *ins)
{
assert(ins->num_operands >= 3);
const spv_parsed_operand_t *op = &ins->operands[1];
assert(op->type == SPV_OPERAND_TYPE_ID);
uint32_t funcId = ins->words[op->offset];
for (auto &iter : kernels) {
if (funcId == iter.funcId)
return;
}
op = &ins->operands[2];
assert(op->type == SPV_OPERAND_TYPE_LITERAL_STRING);
const char *name = reinterpret_cast<const char *>(ins->words + op->offset);
kernels.push_back(SPIRVKernelInfo(funcId, name));
}
void parseFunction(const spv_parsed_instruction_t *ins)
{
assert(ins->num_operands == 4);
const spv_parsed_operand_t *op = &ins->operands[1];
assert(op->type == SPV_OPERAND_TYPE_RESULT_ID);
uint32_t funcId = ins->words[op->offset];
SPIRVKernelInfo *kernel = NULL;
for (auto &kernel : kernels) {
if (funcId == kernel.funcId && !kernel.args.size()) {
curKernel = &kernel;
return;
}
}
}
void parseFunctionParam(const spv_parsed_instruction_t *ins)
{
const spv_parsed_operand_t *op;
uint32_t id, typeId;
if (!curKernel)
return;
assert(ins->num_operands == 2);
op = &ins->operands[0];
assert(op->type == SPV_OPERAND_TYPE_TYPE_ID);
typeId = ins->words[op->offset];
op = &ins->operands[1];
assert(op->type == SPV_OPERAND_TYPE_RESULT_ID);
id = ins->words[op->offset];
curKernel->args.push_back(SPIRVKernelArg(id, typeId));
}
void parseName(const spv_parsed_instruction_t *ins)
{
const spv_parsed_operand_t *op;
const char *name;
uint32_t id;
assert(ins->num_operands == 2);
op = &ins->operands[0];
assert(op->type == SPV_OPERAND_TYPE_ID);
id = ins->words[op->offset];
op = &ins->operands[1];
assert(op->type == SPV_OPERAND_TYPE_LITERAL_STRING);
name = reinterpret_cast<const char *>(ins->words + op->offset);
for (auto &kernel : kernels) {
for (auto &arg : kernel.args) {
if (arg.id == id && arg.name.empty()) {
arg.name = name;
break;
}
}
}
}
void parseTypePointer(const spv_parsed_instruction_t *ins)
{
enum clc_kernel_arg_address_qualifier addrQualifier;
uint32_t typeId, targetTypeId, storageClass;
const spv_parsed_operand_t *op;
const char *typeName;
assert(ins->num_operands == 3);
op = &ins->operands[0];
assert(op->type == SPV_OPERAND_TYPE_RESULT_ID);
typeId = ins->words[op->offset];
op = &ins->operands[1];
assert(op->type == SPV_OPERAND_TYPE_STORAGE_CLASS);
storageClass = ins->words[op->offset];
switch (storageClass) {
case SpvStorageClassCrossWorkgroup:
addrQualifier = CLC_KERNEL_ARG_ADDRESS_GLOBAL;
break;
case SpvStorageClassWorkgroup:
addrQualifier = CLC_KERNEL_ARG_ADDRESS_LOCAL;
break;
case SpvStorageClassUniformConstant:
addrQualifier = CLC_KERNEL_ARG_ADDRESS_CONSTANT;
break;
default:
addrQualifier = CLC_KERNEL_ARG_ADDRESS_PRIVATE;
break;
}
for (auto &kernel : kernels) {
for (auto &arg : kernel.args) {
if (arg.typeId == typeId)
arg.addrQualifier = addrQualifier;
}
}
}
void parseOpString(const spv_parsed_instruction_t *ins)
{
const spv_parsed_operand_t *op;
std::string str;
assert(ins->num_operands == 2);
op = &ins->operands[1];
assert(op->type == SPV_OPERAND_TYPE_LITERAL_STRING);
str = reinterpret_cast<const char *>(ins->words + op->offset);
if (str.find("kernel_arg_type.") != 0)
return;
size_t start = sizeof("kernel_arg_type.") - 1;
for (auto &kernel : kernels) {
size_t pos;
pos = str.find(kernel.name, start);
if (pos == std::string::npos ||
pos != start || str[start + kernel.name.size()] != '.')
continue;
pos = start + kernel.name.size();
if (str[pos++] != '.')
continue;
for (auto &arg : kernel.args) {
if (arg.name.empty())
break;
size_t typeEnd = str.find(',', pos);
if (typeEnd == std::string::npos)
break;
arg.typeName = str.substr(pos, typeEnd - pos);
pos = typeEnd + 1;
}
}
}
void applyDecoration(uint32_t id, const spv_parsed_instruction_t *ins)
{
auto iter = decorationGroups.find(id);
if (iter != decorationGroups.end()) {
for (uint32_t entry : iter->second)
applyDecoration(entry, ins);
return;
}
const spv_parsed_operand_t *op;
uint32_t decoration;
assert(ins->num_operands >= 2);
op = &ins->operands[1];
assert(op->type == SPV_OPERAND_TYPE_DECORATION);
decoration = ins->words[op->offset];
for (auto &kernel : kernels) {
for (auto &arg : kernel.args) {
if (arg.id == id) {
switch (decoration) {
case SpvDecorationVolatile:
arg.typeQualifier |= CLC_KERNEL_ARG_TYPE_VOLATILE;
break;
case SpvDecorationConstant:
arg.typeQualifier |= CLC_KERNEL_ARG_TYPE_CONST;
break;
case SpvDecorationRestrict:
arg.typeQualifier |= CLC_KERNEL_ARG_TYPE_RESTRICT;
break;
case SpvDecorationFuncParamAttr:
op = &ins->operands[2];
assert(op->type == SPV_OPERAND_TYPE_FUNCTION_PARAMETER_ATTRIBUTE);
switch (ins->words[op->offset]) {
case SpvFunctionParameterAttributeNoAlias:
arg.typeQualifier |= CLC_KERNEL_ARG_TYPE_RESTRICT;
break;
case SpvFunctionParameterAttributeNoWrite:
arg.typeQualifier |= CLC_KERNEL_ARG_TYPE_CONST;
break;
}
break;
}
}
}
}
}
void parseOpDecorate(const spv_parsed_instruction_t *ins)
{
const spv_parsed_operand_t *op;
uint32_t id, decoration;
assert(ins->num_operands >= 2);
op = &ins->operands[0];
assert(op->type == SPV_OPERAND_TYPE_ID);
id = ins->words[op->offset];
applyDecoration(id, ins);
}
void parseOpGroupDecorate(const spv_parsed_instruction_t *ins)
{
assert(ins->num_operands >= 2);
const spv_parsed_operand_t *op = &ins->operands[0];
assert(op->type == SPV_OPERAND_TYPE_ID);
uint32_t groupId = ins->words[op->offset];
auto lowerBound = decorationGroups.lower_bound(groupId);
if (lowerBound != decorationGroups.end() &&
lowerBound->first == groupId)
// Group already filled out
return;
auto iter = decorationGroups.emplace_hint(lowerBound, groupId, std::vector<uint32_t>{});
auto& vec = iter->second;
vec.reserve(ins->num_operands - 1);
for (uint32_t i = 1; i < ins->num_operands; ++i) {
op = &ins->operands[i];
assert(op->type == SPV_OPERAND_TYPE_ID);
vec.push_back(ins->words[op->offset]);
}
}
void parseOpTypeImage(const spv_parsed_instruction_t *ins)
{
const spv_parsed_operand_t *op;
uint32_t typeId;
unsigned accessQualifier = CLC_KERNEL_ARG_ACCESS_READ;
op = &ins->operands[0];
assert(op->type == SPV_OPERAND_TYPE_RESULT_ID);
typeId = ins->words[op->offset];
if (ins->num_operands >= 9) {
op = &ins->operands[8];
assert(op->type == SPV_OPERAND_TYPE_ACCESS_QUALIFIER);
switch (ins->words[op->offset]) {
case SpvAccessQualifierReadOnly:
accessQualifier = CLC_KERNEL_ARG_ACCESS_READ;
break;
case SpvAccessQualifierWriteOnly:
accessQualifier = CLC_KERNEL_ARG_ACCESS_WRITE;
break;
case SpvAccessQualifierReadWrite:
accessQualifier = CLC_KERNEL_ARG_ACCESS_WRITE |
CLC_KERNEL_ARG_ACCESS_READ;
break;
}
}
for (auto &kernel : kernels) {
for (auto &arg : kernel.args) {
if (arg.typeId == typeId) {
arg.accessQualifier = accessQualifier;
arg.addrQualifier = CLC_KERNEL_ARG_ADDRESS_GLOBAL;
}
}
}
}
void parseExecutionMode(const spv_parsed_instruction_t *ins)
{
uint32_t executionMode = ins->words[ins->operands[1].offset];
if (executionMode != SpvExecutionModeVecTypeHint)
return;
uint32_t funcId = ins->words[ins->operands[0].offset];
uint32_t vecHint = ins->words[ins->operands[2].offset];
for (auto& kernel : kernels) {
if (kernel.funcId == funcId)
kernel.vecHint = vecHint;
}
}
static spv_result_t
parseInstruction(void *data, const spv_parsed_instruction_t *ins)
{
SPIRVKernelParser *parser = reinterpret_cast<SPIRVKernelParser *>(data);
switch (ins->opcode) {
case SpvOpName:
parser->parseName(ins);
break;
case SpvOpEntryPoint:
parser->parseEntryPoint(ins);
break;
case SpvOpFunction:
parser->parseFunction(ins);
break;
case SpvOpFunctionParameter:
parser->parseFunctionParam(ins);
break;
case SpvOpFunctionEnd:
case SpvOpLabel:
parser->curKernel = NULL;
break;
case SpvOpTypePointer:
parser->parseTypePointer(ins);
break;
case SpvOpTypeImage:
parser->parseOpTypeImage(ins);
break;
case SpvOpString:
parser->parseOpString(ins);
break;
case SpvOpDecorate:
parser->parseOpDecorate(ins);
break;
case SpvOpGroupDecorate:
parser->parseOpGroupDecorate(ins);
break;
case SpvOpExecutionMode:
parser->parseExecutionMode(ins);
break;
default:
break;
}
return SPV_SUCCESS;
}
bool parsingComplete()
{
for (auto &kernel : kernels) {
if (kernel.name.empty())
return false;
for (auto &arg : kernel.args) {
if (arg.name.empty() || arg.typeName.empty())
return false;
}
}
return true;
}
void parseBinary(const struct spirv_binary &spvbin)
{
/* 3 passes should be enough to retrieve all kernel information:
* 1st pass: all entry point name and number of args
* 2nd pass: argument names and type names
* 3rd pass: pointer type names
*/
for (unsigned pass = 0; pass < 3; pass++) {
spvBinaryParse(ctx, reinterpret_cast<void *>(this),
spvbin.data, spvbin.size / 4,
NULL, parseInstruction, NULL);
if (parsingComplete())
return;
}
assert(0);
}
std::vector<SPIRVKernelInfo> kernels;
std::map<uint32_t, std::vector<uint32_t>> decorationGroups;
SPIRVKernelInfo *curKernel;
spv_context ctx;
};
const struct clc_kernel_info *
clc_spirv_get_kernels_info(const struct spirv_binary *spvbin,
unsigned *num_kernels)
{
struct clc_kernel_info *kernels;
SPIRVKernelParser parser;
parser.parseBinary(*spvbin);
*num_kernels = parser.kernels.size();
if (!*num_kernels)
return NULL;
kernels = reinterpret_cast<struct clc_kernel_info *>(calloc(*num_kernels,
sizeof(*kernels)));
assert(kernels);
for (unsigned i = 0; i < parser.kernels.size(); i++) {
kernels[i].name = strdup(parser.kernels[i].name.c_str());
kernels[i].num_args = parser.kernels[i].args.size();
kernels[i].vec_hint_size = parser.kernels[i].vecHint >> 16;
kernels[i].vec_hint_type = (enum clc_vec_hint_type)(parser.kernels[i].vecHint & 0xFFFF);
if (!kernels[i].num_args)
continue;
struct clc_kernel_arg *args;
args = reinterpret_cast<struct clc_kernel_arg *>(calloc(kernels[i].num_args,
sizeof(*kernels->args)));
kernels[i].args = args;
assert(args);
for (unsigned j = 0; j < kernels[i].num_args; j++) {
if (!parser.kernels[i].args[j].name.empty())
args[j].name = strdup(parser.kernels[i].args[j].name.c_str());
args[j].type_name = strdup(parser.kernels[i].args[j].typeName.c_str());
args[j].address_qualifier = parser.kernels[i].args[j].addrQualifier;
args[j].type_qualifier = parser.kernels[i].args[j].typeQualifier;
args[j].access_qualifier = parser.kernels[i].args[j].accessQualifier;
}
}
return kernels;
}
void
clc_free_kernels_info(const struct clc_kernel_info *kernels,
unsigned num_kernels)
{
if (!kernels)
return;
for (unsigned i = 0; i < num_kernels; i++) {
if (kernels[i].args) {
for (unsigned j = 0; j < kernels[i].num_args; j++) {
free((void *)kernels[i].args[j].name);
free((void *)kernels[i].args[j].type_name);
}
}
free((void *)kernels[i].name);
}
free((void *)kernels);
}
int
clc_to_spirv(const struct clc_compile_args *args,
struct spirv_binary *spvbin,
const struct clc_logger *logger)
{
LLVMInitializeAllTargets();
LLVMInitializeAllTargetInfos();
LLVMInitializeAllTargetMCs();
LLVMInitializeAllAsmPrinters();
std::string log;
std::unique_ptr<LLVMContext> llvm_ctx { new LLVMContext };
llvm_ctx->setDiagnosticHandlerCallBack(llvm_log_handler, &log);
std::unique_ptr<clang::CompilerInstance> c { new clang::CompilerInstance };
clang::DiagnosticsEngine diag { new clang::DiagnosticIDs,
new clang::DiagnosticOptions,
new clang::TextDiagnosticPrinter(*new raw_string_ostream(log),
&c->getDiagnosticOpts(), true)};
std::vector<const char *> clang_opts = {
args->source.name,
"-triple", "spir64-unknown-unknown",
// By default, clang prefers to use modules to pull in the default headers,
// which doesn't work with our technique of embedding the headers in our binary
"-finclude-default-header",
// Add a default CL compiler version. Clang will pick the last one specified
// on the command line, so the app can override this one.
"-cl-std=cl1.2",
// The LLVM-SPIRV-Translator doesn't support memset with variable size
"-fno-builtin-memset",
// LLVM's optimizations can produce code that the translator can't translate
"-O0",
};
// We assume there's appropriate defines for __OPENCL_VERSION__ and __IMAGE_SUPPORT__
// being provided by the caller here.
clang_opts.insert(clang_opts.end(), args->args, args->args + args->num_args);
if (!clang::CompilerInvocation::CreateFromArgs(c->getInvocation(),
#if LLVM_VERSION_MAJOR >= 10
clang_opts,
#else
clang_opts.data(),
clang_opts.data() + clang_opts.size(),
#endif
diag)) {
log += "Couldn't create Clang invocation.\n";
clc_error(logger, log.c_str());
return -1;
}
if (diag.hasErrorOccurred()) {
log += "Errors occurred during Clang invocation.\n";
clc_error(logger, log.c_str());
return -1;
}
// This is a workaround for a Clang bug which causes the number
// of warnings and errors to be printed to stderr.
// http://www.llvm.org/bugs/show_bug.cgi?id=19735
c->getDiagnosticOpts().ShowCarets = false;
c->createDiagnostics(new clang::TextDiagnosticPrinter(
*new raw_string_ostream(log),
&c->getDiagnosticOpts(), true));
c->setTarget(clang::TargetInfo::CreateTargetInfo(
c->getDiagnostics(), c->getInvocation().TargetOpts));
c->getFrontendOpts().ProgramAction = clang::frontend::EmitLLVMOnly;
c->getHeaderSearchOpts().UseBuiltinIncludes = false;
c->getHeaderSearchOpts().UseStandardSystemIncludes = false;
// Add opencl-c generic search path
{
::llvm::SmallString<128> system_header_path;
::llvm::sys::path::system_temp_directory(true, system_header_path);
::llvm::sys::path::append(system_header_path, "openclon12");
c->getHeaderSearchOpts().AddPath(system_header_path.str(),
clang::frontend::Angled,
false, false);
::llvm::sys::path::append(system_header_path, "opencl-c.h");
c->getPreprocessorOpts().addRemappedFile(system_header_path.str(),
::llvm::MemoryBuffer::getMemBuffer(llvm::StringRef(opencl_c_source, _countof(opencl_c_source) - 1)).release());
::llvm::sys::path::remove_filename(system_header_path);
::llvm::sys::path::append(system_header_path, "opencl-c-base.h");
c->getPreprocessorOpts().addRemappedFile(system_header_path.str(),
::llvm::MemoryBuffer::getMemBuffer(llvm::StringRef(opencl_c_base_source, _countof(opencl_c_base_source) - 1)).release());
}
if (args->num_headers) {
::llvm::SmallString<128> tmp_header_path;
::llvm::sys::path::system_temp_directory(true, tmp_header_path);
::llvm::sys::path::append(tmp_header_path, "openclon12");
c->getHeaderSearchOpts().AddPath(tmp_header_path.str(),
clang::frontend::Quoted,
false, false);
for (size_t i = 0; i < args->num_headers; i++) {
auto path_copy = tmp_header_path;
::llvm::sys::path::append(path_copy, ::llvm::sys::path::convert_to_slash(args->headers[i].name));
c->getPreprocessorOpts().addRemappedFile(path_copy.str(),
::llvm::MemoryBuffer::getMemBufferCopy(args->headers[i].value).release());
}
}
c->getPreprocessorOpts().addRemappedFile(
args->source.name,
::llvm::MemoryBuffer::getMemBufferCopy(std::string(args->source.value)).release());
// Compile the code
clang::EmitLLVMOnlyAction act(llvm_ctx.get());
if (!c->ExecuteAction(act)) {
log += "Error executing LLVM compilation action.\n";
clc_error(logger, log.c_str());
return -1;
}
auto mod = act.takeModule();
std::ostringstream spv_stream;
if (!::llvm::writeSpirv(mod.get(), spv_stream, log)) {
log += "Translation from LLVM IR to SPIR-V failed.\n";
clc_error(logger, log.c_str());
return -1;
}
const std::string spv_out = spv_stream.str();
spvbin->size = spv_out.size();
spvbin->data = static_cast<uint32_t *>(malloc(spvbin->size));
memcpy(spvbin->data, spv_out.data(), spvbin->size);
return 0;
}
static const char *
spv_result_to_str(spv_result_t res)
{
switch (res) {
case SPV_SUCCESS: return "success";
case SPV_UNSUPPORTED: return "unsupported";
case SPV_END_OF_STREAM: return "end of stream";
case SPV_WARNING: return "warning";
case SPV_FAILED_MATCH: return "failed match";
case SPV_REQUESTED_TERMINATION: return "requested termination";
case SPV_ERROR_INTERNAL: return "internal error";
case SPV_ERROR_OUT_OF_MEMORY: return "out of memory";
case SPV_ERROR_INVALID_POINTER: return "invalid pointer";
case SPV_ERROR_INVALID_BINARY: return "invalid binary";
case SPV_ERROR_INVALID_TEXT: return "invalid text";
case SPV_ERROR_INVALID_TABLE: return "invalid table";
case SPV_ERROR_INVALID_VALUE: return "invalid value";
case SPV_ERROR_INVALID_DIAGNOSTIC: return "invalid diagnostic";
case SPV_ERROR_INVALID_LOOKUP: return "invalid lookup";
case SPV_ERROR_INVALID_ID: return "invalid id";
case SPV_ERROR_INVALID_CFG: return "invalid config";
case SPV_ERROR_INVALID_LAYOUT: return "invalid layout";
case SPV_ERROR_INVALID_CAPABILITY: return "invalid capability";
case SPV_ERROR_INVALID_DATA: return "invalid data";
case SPV_ERROR_MISSING_EXTENSION: return "missing extension";
case SPV_ERROR_WRONG_VERSION: return "wrong version";
default: return "unknown error";
}
}
class SPIRVMessageConsumer {
public:
SPIRVMessageConsumer(const struct clc_logger *logger): logger(logger) {}
void operator()(spv_message_level_t level, const char *src,
const spv_position_t &pos, const char *msg)
{
switch(level) {
case SPV_MSG_FATAL:
case SPV_MSG_INTERNAL_ERROR:
case SPV_MSG_ERROR:
clc_error(logger, "(file=%s,line=%ld,column=%ld,index=%ld): %s",
src, pos.line, pos.column, pos.index, msg);
break;
case SPV_MSG_WARNING:
clc_warning(logger, "(file=%s,line=%ld,column=%ld,index=%ld): %s",
src, pos.line, pos.column, pos.index, msg);
break;
default:
break;
}
}
private:
const struct clc_logger *logger;
};
int
clc_link_spirv_binaries(const struct clc_linker_args *args,
struct spirv_binary *dst_bin,
const struct clc_logger *logger)
{
std::vector<std::vector<uint32_t>> binaries;
for (unsigned i = 0; i < args->num_in_objs; i++) {
std::vector<uint32_t> bin(args->in_objs[i]->spvbin.data,
args->in_objs[i]->spvbin.data +
(args->in_objs[i]->spvbin.size / 4));
binaries.push_back(bin);
}
SPIRVMessageConsumer msgconsumer(logger);
spvtools::Context context(SPV_ENV_UNIVERSAL_1_0);
context.SetMessageConsumer(msgconsumer);
spvtools::LinkerOptions options;
options.SetAllowPartialLinkage(args->create_library);
options.SetCreateLibrary(args->create_library);
std::vector<uint32_t> linkingResult;
spv_result_t status = spvtools::Link(context, binaries, &linkingResult, options);
if (status != SPV_SUCCESS) {
return -1;
}
dst_bin->size = linkingResult.size() * 4;
dst_bin->data = static_cast<uint32_t *>(malloc(dst_bin->size));
memcpy(dst_bin->data, linkingResult.data(), dst_bin->size);
return 0;
}
void
clc_dump_spirv(const struct spirv_binary *spvbin, FILE *f)
{
spvtools::SpirvTools tools(SPV_ENV_UNIVERSAL_1_0);
std::vector<uint32_t> bin(spvbin->data, spvbin->data + (spvbin->size / 4));
std::string out;
tools.Disassemble(bin, &out,
SPV_BINARY_TO_TEXT_OPTION_INDENT |
SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES);
fwrite(out.c_str(), out.size(), 1, f);
}
void
clc_free_spirv_binary(struct spirv_binary *spvbin)
{
free(spvbin->data);
}

View File

@ -0,0 +1,81 @@
/*
* Copyright © Microsoft Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef CLC_TO_NIR_H
#define CLC_TO_NIR_H
#ifdef __cplusplus
extern "C" {
#endif
#include "nir_types.h"
#include "clc_compiler.h"
#include "util/u_string.h"
#include <assert.h>
#include <stddef.h>
#include <stdio.h>
#include <stdint.h>
const struct clc_kernel_info *
clc_spirv_get_kernels_info(const struct spirv_binary *spvbin,
unsigned *num_kernels);
void
clc_free_kernels_info(const struct clc_kernel_info *kernels,
unsigned num_kernels);
int
clc_to_spirv(const struct clc_compile_args *args,
struct spirv_binary *spvbin,
const struct clc_logger *logger);
int
clc_link_spirv_binaries(const struct clc_linker_args *args,
struct spirv_binary *dst_bin,
const struct clc_logger *logger);
void
clc_dump_spirv(const struct spirv_binary *spvbin, FILE *f);
void
clc_free_spirv_binary(struct spirv_binary *spvbin);
#define clc_log(logger, level, fmt, ...) do { \
if (!logger || !logger->level) break; \
char *msg = NULL; \
asprintf(&msg, fmt, __VA_ARGS__); \
assert(msg); \
logger->level(logger->priv, msg); \
free(msg); \
} while (0)
#define clc_error(logger, fmt, ...) clc_log(logger, error, fmt, __VA_ARGS__)
#define clc_warning(logger, fmt, ...) clc_log(logger, warning, fmt, __VA_ARGS__)
#ifdef __cplusplus
}
#endif
#endif

388
src/microsoft/clc/clc_nir.c Normal file
View File

@ -0,0 +1,388 @@
/*
* Copyright © Microsoft Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "u_math.h"
#include "nir.h"
#include "glsl_types.h"
#include "nir_types.h"
#include "nir_builder.h"
#include "clc_nir.h"
#include "clc_compiler.h"
#include "../compiler/dxil_nir.h"
static bool
lower_load_base_global_invocation_id(nir_builder *b, nir_intrinsic_instr *intr,
nir_variable *var)
{
b->cursor = nir_after_instr(&intr->instr);
nir_ssa_def *offset =
build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
nir_imm_int(b,
offsetof(struct clc_work_properties_data,
global_offset_x)),
nir_dest_num_components(intr->dest),
nir_dest_bit_size(intr->dest));
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(offset));
nir_instr_remove(&intr->instr);
return true;
}
static bool
lower_load_work_dim(nir_builder *b, nir_intrinsic_instr *intr,
nir_variable *var)
{
b->cursor = nir_after_instr(&intr->instr);
nir_ssa_def *dim =
build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
nir_imm_int(b,
offsetof(struct clc_work_properties_data,
work_dim)),
nir_dest_num_components(intr->dest),
nir_dest_bit_size(intr->dest));
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(dim));
nir_instr_remove(&intr->instr);
return true;
}
static bool
lower_load_local_group_size(nir_builder *b, nir_intrinsic_instr *intr)
{
b->cursor = nir_after_instr(&intr->instr);
nir_const_value v[3] = {
nir_const_value_for_int(b->shader->info.cs.local_size[0], 32),
nir_const_value_for_int(b->shader->info.cs.local_size[1], 32),
nir_const_value_for_int(b->shader->info.cs.local_size[2], 32)
};
nir_ssa_def *size = nir_build_imm(b, 3, 32, v);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(size));
nir_instr_remove(&intr->instr);
return true;
}
static bool
lower_load_num_work_groups(nir_builder *b, nir_intrinsic_instr *intr,
nir_variable *var)
{
b->cursor = nir_after_instr(&intr->instr);
nir_ssa_def *count =
build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
nir_imm_int(b,
offsetof(struct clc_work_properties_data,
group_count_total_x)),
nir_dest_num_components(intr->dest),
nir_dest_bit_size(intr->dest));
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(count));
nir_instr_remove(&intr->instr);
return true;
}
static bool
lower_load_base_work_group_id(nir_builder *b, nir_intrinsic_instr *intr,
nir_variable *var)
{
b->cursor = nir_after_instr(&intr->instr);
nir_ssa_def *offset =
build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
nir_imm_int(b,
offsetof(struct clc_work_properties_data,
group_id_offset_x)),
nir_dest_num_components(intr->dest),
nir_dest_bit_size(intr->dest));
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(offset));
nir_instr_remove(&intr->instr);
return true;
}
bool
clc_nir_lower_system_values(nir_shader *nir, nir_variable *var)
{
bool progress = false;
foreach_list_typed(nir_function, func, node, &nir->functions) {
if (!func->is_entrypoint)
continue;
assert(func->impl);
nir_builder b;
nir_builder_init(&b, func->impl);
nir_foreach_block(block, func->impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_base_global_invocation_id:
progress |= lower_load_base_global_invocation_id(&b, intr, var);
break;
case nir_intrinsic_load_work_dim:
progress |= lower_load_work_dim(&b, intr, var);
break;
case nir_intrinsic_load_local_group_size:
lower_load_local_group_size(&b, intr);
break;
case nir_intrinsic_load_num_work_groups:
lower_load_num_work_groups(&b, intr, var);
break;
case nir_intrinsic_load_base_work_group_id:
lower_load_base_work_group_id(&b, intr, var);
break;
default: break;
}
}
}
}
return progress;
}
static bool
lower_load_kernel_input(nir_builder *b, nir_intrinsic_instr *intr,
nir_variable *var)
{
nir_intrinsic_instr *load;
b->cursor = nir_before_instr(&intr->instr);
unsigned bit_size = nir_dest_bit_size(intr->dest);
enum glsl_base_type base_type;
switch (bit_size) {
case 64:
base_type = GLSL_TYPE_UINT64;
break;
case 32:
base_type = GLSL_TYPE_UINT;
break;
case 16:
base_type = GLSL_TYPE_UINT16;
break;
case 8:
base_type = GLSL_TYPE_UINT8;
break;
}
const struct glsl_type *type =
glsl_vector_type(base_type, nir_dest_num_components(intr->dest));
nir_ssa_def *ptr = nir_vec2(b, nir_imm_int(b, var->data.binding),
nir_u2u(b, intr->src[0].ssa, 32));
nir_deref_instr *deref = nir_build_deref_cast(b, ptr, nir_var_mem_ubo, type,
bit_size / 8);
deref->cast.align_mul = nir_intrinsic_align_mul(intr);
deref->cast.align_offset = nir_intrinsic_align_offset(intr);
nir_ssa_def *result =
nir_load_deref(b, deref);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(result));
nir_instr_remove(&intr->instr);
return true;
}
bool
clc_nir_lower_kernel_input_loads(nir_shader *nir, nir_variable *var)
{
bool progress = false;
foreach_list_typed(nir_function, func, node, &nir->functions) {
if (!func->is_entrypoint)
continue;
assert(func->impl);
nir_builder b;
nir_builder_init(&b, func->impl);
nir_foreach_block(block, func->impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic == nir_intrinsic_load_kernel_input)
progress |= lower_load_kernel_input(&b, intr, var);
}
}
}
return progress;
}
static nir_variable *
add_printf_var(struct nir_shader *nir, unsigned uav_id)
{
/* This size is arbitrary. Minimum required per spec is 1MB */
const unsigned max_printf_size = 1 * 1024 * 1024;
const unsigned printf_array_size = max_printf_size / sizeof(unsigned);
nir_variable *var =
nir_variable_create(nir, nir_var_mem_ssbo,
glsl_array_type(glsl_uint_type(), printf_array_size, sizeof(unsigned)),
"kernel_work_properies");
var->data.binding = uav_id;
return var;
}
static void
lower_printf_impl(nir_builder *b, nir_intrinsic_instr *instr, nir_variable *var)
{
/* Atomic add a buffer size counter to determine where to write.
* If overflowed, return -1, otherwise, store the arguments and return 0.
*/
b->cursor = nir_before_instr(&instr->instr);
nir_deref_instr *ssbo_deref = nir_build_deref_var(b, var);
nir_deref_instr *counter_deref = nir_build_deref_array_imm(b, ssbo_deref, 0);
nir_deref_instr *struct_deref = nir_instr_as_deref(instr->src[1].ssa->parent_instr);
nir_variable *struct_var = nir_deref_instr_get_variable(struct_deref);
const struct glsl_type *struct_type = struct_var->type;
/* Align the struct size to 4 for natural SSBO alignment */
int struct_size = align(glsl_get_cl_size(struct_type), 4);
/* Hardcoding 64bit pointers to simplify some code below */
assert(instr->src[0].ssa->num_components == 1 && instr->src[0].ssa->bit_size == 64);
nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, nir_intrinsic_deref_atomic_add);
nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
atomic->src[0] = nir_src_for_ssa(&counter_deref->dest.ssa);
atomic->src[1] = nir_src_for_ssa(nir_imm_int(b, struct_size + sizeof(uint64_t)));
nir_builder_instr_insert(b, &atomic->instr);
int max_valid_offset =
glsl_get_cl_size(var->type) - /* buffer size */
struct_size - /* printf args size */
sizeof(uint64_t) - /* format string */
sizeof(int); /* the first int in the buffer is for the counter */
nir_push_if(b, nir_ilt(b, &atomic->dest.ssa, nir_imm_int(b, max_valid_offset)));
nir_ssa_def *printf_succ_val = nir_imm_int(b, 0);
nir_ssa_def *start_offset = nir_u2u64(b, nir_iadd(b, &atomic->dest.ssa, nir_imm_int(b, sizeof(int))));
nir_deref_instr *as_byte_array = nir_build_deref_cast(b, &ssbo_deref->dest.ssa, nir_var_mem_ssbo, glsl_uint8_t_type(), 1);
nir_deref_instr *as_offset_byte_array = nir_build_deref_ptr_as_array(b, as_byte_array, start_offset);
nir_deref_instr *format_string_write_deref =
nir_build_deref_cast(b, &as_offset_byte_array->dest.ssa, nir_var_mem_ssbo, glsl_uint64_t_type(), 8);
nir_store_deref(b, format_string_write_deref, instr->src[0].ssa, ~0);
for (unsigned i = 0; i < glsl_get_length(struct_type); ++i) {
nir_ssa_def *field_offset_from_start = nir_imm_int64(b, glsl_get_struct_field_offset(struct_type, i) + sizeof(uint64_t));
nir_ssa_def *field_offset = nir_iadd(b, start_offset, field_offset_from_start);
const struct glsl_type *field_type = glsl_get_struct_field(struct_type, i);
nir_deref_instr *field_read_deref = nir_build_deref_struct(b, struct_deref, i);
nir_ssa_def *field_value = nir_load_deref(b, field_read_deref);
/* Clang does promotion of arguments to their "native" size. That means that any floats
* have been converted to doubles for the call to printf. Since we don't support doubles,
* convert them back here; copy-prop and other optimizations should remove all hint of doubles.
*/
if (glsl_get_base_type(field_type) == GLSL_TYPE_DOUBLE) {
field_value = nir_f2f32(b, field_value);
field_type = glsl_float_type();
}
as_offset_byte_array = nir_build_deref_ptr_as_array(b, as_byte_array, field_offset);
nir_deref_instr *field_write_deref =
nir_build_deref_cast(b, &as_offset_byte_array->dest.ssa, nir_var_mem_ssbo, field_type, glsl_get_cl_size(field_type));
nir_store_deref(b, field_write_deref, field_value, ~0);
}
nir_push_else(b, NULL);
nir_ssa_def *printf_fail_val = nir_imm_int(b, -1);
nir_pop_if(b, NULL);
nir_ssa_def *return_value = nir_if_phi(b, printf_succ_val, printf_fail_val);
nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(return_value));
nir_instr_remove(&instr->instr);
}
static nir_variable *
find_identical_const_sampler(nir_shader *nir, nir_variable *sampler)
{
nir_foreach_variable_with_modes(uniform, nir, nir_var_uniform) {
if (!glsl_type_is_sampler(uniform->type) || !uniform->data.sampler.is_inline_sampler)
continue;
if (uniform->data.sampler.addressing_mode == sampler->data.sampler.addressing_mode &&
uniform->data.sampler.normalized_coordinates == sampler->data.sampler.normalized_coordinates &&
uniform->data.sampler.filter_mode == sampler->data.sampler.filter_mode)
return uniform;
}
unreachable("Should have at least found the input sampler");
}
bool
clc_nir_dedupe_const_samplers(nir_shader *nir)
{
bool progress = false;
nir_foreach_function(func, nir) {
if (!func->impl)
continue;
nir_builder b;
nir_builder_init(&b, func->impl);
nir_foreach_block(block, func->impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_tex)
continue;
nir_tex_instr *tex = nir_instr_as_tex(instr);
int sampler_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
if (sampler_idx == -1)
continue;
nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_idx].src);
nir_variable *sampler = nir_deref_instr_get_variable(deref);
if (!sampler)
continue;
assert(sampler->data.mode == nir_var_uniform);
if (!sampler->data.sampler.is_inline_sampler)
continue;
nir_variable *replacement = find_identical_const_sampler(nir, sampler);
if (replacement == sampler)
continue;
b.cursor = nir_before_instr(&tex->instr);
nir_deref_instr *replacement_deref = nir_build_deref_var(&b, replacement);
nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_idx].src,
nir_src_for_ssa(&replacement_deref->dest.ssa));
nir_deref_instr_remove_if_unused(deref);
progress = true;
}
}
if (progress) {
nir_metadata_preserve(func->impl, nir_metadata_block_index | nir_metadata_dominance);
}
}
return progress;
}

View File

@ -0,0 +1,40 @@
/*
* Copyright © Microsoft Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef CLC_NIR_H
#define CLC_NIR_H
#include <stdbool.h>
#include "nir.h"
bool
clc_nir_lower_system_values(nir_shader *nir, nir_variable *var);
bool dxil_nir_lower_kernel_input_loads(nir_shader *nir, nir_variable *var);
bool
clc_nir_lower_printf(nir_shader *nir, unsigned uav_id);
bool
clc_nir_dedupe_const_samplers(nir_shader *nir);
#endif

View File

@ -0,0 +1,12 @@
EXPORTS
clc_context_new
clc_free_context
clc_context_serialize
clc_context_free_serialized
clc_context_deserialize
clc_compile
clc_link
clc_free_object
clc_to_dxil
clc_free_dxil_object
clc_compiler_get_version

View File

@ -0,0 +1,880 @@
/*
* Copyright © Microsoft Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdio.h>
#include <stdint.h>
#include <stdexcept>
#include <d3d12.h>
#include <dxgi1_4.h>
#include <gtest/gtest.h>
#include <wrl.h>
#include "util/u_debug.h"
#include "clc_compiler.h"
#include "compute_test.h"
#include "dxcapi.h"
using std::runtime_error;
using Microsoft::WRL::ComPtr;
enum compute_test_debug_flags {
COMPUTE_DEBUG_EXPERIMENTAL_SHADERS = 1 << 0,
COMPUTE_DEBUG_USE_HW_D3D = 1 << 1,
COMPUTE_DEBUG_OPTIMIZE_LIBCLC = 1 << 2,
COMPUTE_DEBUG_SERIALIZE_LIBCLC = 1 << 3,
};
static const struct debug_named_value debug_options[] = {
{ "experimental_shaders", COMPUTE_DEBUG_EXPERIMENTAL_SHADERS, "Enable experimental shaders" },
{ "use_hw_d3d", COMPUTE_DEBUG_USE_HW_D3D, "Use a hardware D3D device" },
{ "optimize_libclc", COMPUTE_DEBUG_OPTIMIZE_LIBCLC, "Optimize the clc_context before using it" },
{ "serialize_libclc", COMPUTE_DEBUG_SERIALIZE_LIBCLC, "Serialize and deserialize the clc_context" },
DEBUG_NAMED_VALUE_END
};
DEBUG_GET_ONCE_FLAGS_OPTION(debug_compute, "COMPUTE_TEST_DEBUG", debug_options, 0)
static void warning_callback(void *priv, const char *msg)
{
fprintf(stderr, "WARNING: %s\n", msg);
}
static void error_callback(void *priv, const char *msg)
{
fprintf(stderr, "ERROR: %s\n", msg);
}
static const struct clc_logger logger = {
NULL,
error_callback,
warning_callback,
};
void
ComputeTest::enable_d3d12_debug_layer()
{
HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
if (!hD3D12Mod) {
fprintf(stderr, "D3D12: failed to load D3D12.DLL\n");
return;
}
typedef HRESULT(WINAPI * PFN_D3D12_GET_DEBUG_INTERFACE)(REFIID riid,
void **ppFactory);
PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(hD3D12Mod, "D3D12GetDebugInterface");
if (!D3D12GetDebugInterface) {
fprintf(stderr, "D3D12: failed to load D3D12GetDebugInterface from D3D12.DLL\n");
return;
}
ID3D12Debug *debug;
if (FAILED(D3D12GetDebugInterface(__uuidof(ID3D12Debug), (void **)& debug))) {
fprintf(stderr, "D3D12: D3D12GetDebugInterface failed\n");
return;
}
debug->EnableDebugLayer();
}
IDXGIFactory4 *
ComputeTest::get_dxgi_factory()
{
static const GUID IID_IDXGIFactory4 = {
0x1bc6ea02, 0xef36, 0x464f,
{ 0xbf, 0x0c, 0x21, 0xca, 0x39, 0xe5, 0x16, 0x8a }
};
typedef HRESULT(WINAPI * PFN_CREATE_DXGI_FACTORY)(REFIID riid,
void **ppFactory);
PFN_CREATE_DXGI_FACTORY CreateDXGIFactory;
HMODULE hDXGIMod = LoadLibrary("DXGI.DLL");
if (!hDXGIMod)
throw runtime_error("Failed to load DXGI.DLL");
CreateDXGIFactory = (PFN_CREATE_DXGI_FACTORY)GetProcAddress(hDXGIMod, "CreateDXGIFactory");
if (!CreateDXGIFactory)
throw runtime_error("Failed to load CreateDXGIFactory from DXGI.DLL");
IDXGIFactory4 *factory = NULL;
HRESULT hr = CreateDXGIFactory(IID_IDXGIFactory4, (void **)&factory);
if (FAILED(hr))
throw runtime_error("CreateDXGIFactory failed");
return factory;
}
IDXGIAdapter1 *
ComputeTest::choose_adapter(IDXGIFactory4 *factory)
{
IDXGIAdapter1 *ret;
if (debug_get_option_debug_compute() & COMPUTE_DEBUG_USE_HW_D3D) {
for (unsigned i = 0; SUCCEEDED(factory->EnumAdapters1(i, &ret)); i++) {
DXGI_ADAPTER_DESC1 desc;
ret->GetDesc1(&desc);
if (!(desc.Flags & D3D_DRIVER_TYPE_SOFTWARE))
return ret;
}
throw runtime_error("Failed to enum hardware adapter");
} else {
if (FAILED(factory->EnumWarpAdapter(__uuidof(IDXGIAdapter1),
(void **)& ret)))
throw runtime_error("Failed to enum warp adapter");
return ret;
}
}
ID3D12Device *
ComputeTest::create_device(IDXGIAdapter1 *adapter)
{
typedef HRESULT(WINAPI *PFN_D3D12CREATEDEVICE)(IUnknown *, D3D_FEATURE_LEVEL, REFIID, void **);
PFN_D3D12CREATEDEVICE D3D12CreateDevice;
HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
if (!hD3D12Mod)
throw runtime_error("failed to load D3D12.DLL");
if (debug_get_option_debug_compute() & COMPUTE_DEBUG_EXPERIMENTAL_SHADERS) {
typedef HRESULT(WINAPI *PFN_D3D12ENABLEEXPERIMENTALFEATURES)(UINT, const IID *, void *, UINT *);
PFN_D3D12ENABLEEXPERIMENTALFEATURES D3D12EnableExperimentalFeatures;
D3D12EnableExperimentalFeatures = (PFN_D3D12ENABLEEXPERIMENTALFEATURES)
GetProcAddress(hD3D12Mod, "D3D12EnableExperimentalFeatures");
if (FAILED(D3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModels, NULL, NULL)))
throw runtime_error("failed to enable experimental shader models");
}
D3D12CreateDevice = (PFN_D3D12CREATEDEVICE)GetProcAddress(hD3D12Mod, "D3D12CreateDevice");
if (!D3D12CreateDevice)
throw runtime_error("failed to load D3D12CreateDevice from D3D12.DLL");
ID3D12Device *dev;
if (FAILED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_12_0,
__uuidof(ID3D12Device), (void **)& dev)))
throw runtime_error("D3D12CreateDevice failed");
return dev;
}
ComPtr<ID3D12RootSignature>
ComputeTest::create_root_signature(const ComputeTest::Resources &resources)
{
D3D12_ROOT_PARAMETER1 root_param;
root_param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
root_param.DescriptorTable.NumDescriptorRanges = resources.ranges.size();
root_param.DescriptorTable.pDescriptorRanges = resources.ranges.data();
root_param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
D3D12_ROOT_SIGNATURE_DESC1 root_sig_desc;
root_sig_desc.NumParameters = 1;
root_sig_desc.pParameters = &root_param;
root_sig_desc.NumStaticSamplers = 0;
root_sig_desc.pStaticSamplers = NULL;
root_sig_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
D3D12_VERSIONED_ROOT_SIGNATURE_DESC versioned_desc;
versioned_desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
versioned_desc.Desc_1_1 = root_sig_desc;
ID3DBlob *sig, *error;
if (FAILED(D3D12SerializeVersionedRootSignature(&versioned_desc,
&sig, &error)))
throw runtime_error("D3D12SerializeVersionedRootSignature failed");
ComPtr<ID3D12RootSignature> ret;
if (FAILED(dev->CreateRootSignature(0,
sig->GetBufferPointer(),
sig->GetBufferSize(),
__uuidof(ret),
(void **)& ret)))
throw runtime_error("CreateRootSignature failed");
return ret;
}
ComPtr<ID3D12PipelineState>
ComputeTest::create_pipeline_state(ComPtr<ID3D12RootSignature> &root_sig,
const struct clc_dxil_object &dxil)
{
D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc = { root_sig.Get() };
pipeline_desc.CS.pShaderBytecode = dxil.binary.data;
pipeline_desc.CS.BytecodeLength = dxil.binary.size;
ComPtr<ID3D12PipelineState> pipeline_state;
if (FAILED(dev->CreateComputePipelineState(&pipeline_desc,
__uuidof(pipeline_state),
(void **)& pipeline_state)))
throw runtime_error("Failed to create pipeline state");
return pipeline_state;
}
ComPtr<ID3D12Resource>
ComputeTest::create_buffer(int size, D3D12_HEAP_TYPE heap_type)
{
D3D12_RESOURCE_DESC desc;
desc.Format = DXGI_FORMAT_UNKNOWN;
desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
desc.Width = size;
desc.Height = 1;
desc.DepthOrArraySize = 1;
desc.MipLevels = 1;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
desc.Flags = heap_type == D3D12_HEAP_TYPE_DEFAULT ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE;
desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
D3D12_HEAP_PROPERTIES heap_pris = dev->GetCustomHeapProperties(0, heap_type);
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COMMON;
switch (heap_type) {
case D3D12_HEAP_TYPE_UPLOAD:
initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;
break;
case D3D12_HEAP_TYPE_READBACK:
initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
break;
}
ComPtr<ID3D12Resource> res;
if (FAILED(dev->CreateCommittedResource(&heap_pris,
D3D12_HEAP_FLAG_NONE, &desc, initial_state,
NULL, __uuidof(ID3D12Resource), (void **)&res)))
throw runtime_error("CreateCommittedResource failed");
return res;
}
ComPtr<ID3D12Resource>
ComputeTest::create_upload_buffer_with_data(const void *data, size_t size)
{
auto upload_res = create_buffer(size, D3D12_HEAP_TYPE_UPLOAD);
void *ptr = NULL;
D3D12_RANGE res_range = { 0, (SIZE_T)size };
if (FAILED(upload_res->Map(0, &res_range, (void **)&ptr)))
throw runtime_error("Failed to map upload-buffer");
assert(ptr);
memcpy(ptr, data, size);
upload_res->Unmap(0, &res_range);
return upload_res;
}
ComPtr<ID3D12Resource>
ComputeTest::create_sized_buffer_with_data(size_t buffer_size,
const void *data,
size_t data_size)
{
auto upload_res = create_upload_buffer_with_data(data, data_size);
auto res = create_buffer(buffer_size, D3D12_HEAP_TYPE_DEFAULT);
resource_barrier(res, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST);
cmdlist->CopyBufferRegion(res.Get(), 0, upload_res.Get(), 0, data_size);
resource_barrier(res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COMMON);
execute_cmdlist();
return res;
}
void
ComputeTest::get_buffer_data(ComPtr<ID3D12Resource> res,
void *buf, size_t size)
{
auto readback_res = create_buffer(align(size, 4), D3D12_HEAP_TYPE_READBACK);
resource_barrier(res, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_SOURCE);
cmdlist->CopyResource(readback_res.Get(), res.Get());
resource_barrier(res, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COMMON);
execute_cmdlist();
void *ptr = NULL;
D3D12_RANGE res_range = { 0, size };
if (FAILED(readback_res->Map(0, &res_range, &ptr)))
throw runtime_error("Failed to map readback-buffer");
memcpy(buf, ptr, size);
D3D12_RANGE empty_range = { 0, 0 };
readback_res->Unmap(0, &empty_range);
}
void
ComputeTest::resource_barrier(ComPtr<ID3D12Resource> &res,
D3D12_RESOURCE_STATES state_before,
D3D12_RESOURCE_STATES state_after)
{
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = res.Get();
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = state_before;
barrier.Transition.StateAfter = state_after;
cmdlist->ResourceBarrier(1, &barrier);
}
void
ComputeTest::execute_cmdlist()
{
if (FAILED(cmdlist->Close()))
throw runtime_error("Closing ID3D12GraphicsCommandList failed");
ID3D12CommandList *cmdlists[] = { cmdlist };
cmdqueue->ExecuteCommandLists(1, cmdlists);
cmdqueue_fence->SetEventOnCompletion(fence_value, event);
cmdqueue->Signal(cmdqueue_fence, fence_value);
fence_value++;
WaitForSingleObject(event, INFINITE);
if (FAILED(cmdalloc->Reset()))
throw runtime_error("resetting ID3D12CommandAllocator failed");
if (FAILED(cmdlist->Reset(cmdalloc, NULL)))
throw runtime_error("resetting ID3D12GraphicsCommandList failed");
}
void
ComputeTest::create_uav_buffer(ComPtr<ID3D12Resource> res,
size_t width, size_t byte_stride,
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)
{
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uav_desc.Buffer.FirstElement = 0;
uav_desc.Buffer.NumElements = DIV_ROUND_UP(width * byte_stride, 4);
uav_desc.Buffer.StructureByteStride = 0;
uav_desc.Buffer.CounterOffsetInBytes = 0;
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
dev->CreateUnorderedAccessView(res.Get(), NULL, &uav_desc, cpu_handle);
}
void
ComputeTest::create_cbv(ComPtr<ID3D12Resource> res, size_t size,
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)
{
D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc;
cbv_desc.BufferLocation = res ? res->GetGPUVirtualAddress() : 0;
cbv_desc.SizeInBytes = size;
dev->CreateConstantBufferView(&cbv_desc, cpu_handle);
}
ComPtr<ID3D12Resource>
ComputeTest::add_uav_resource(ComputeTest::Resources &resources,
unsigned spaceid, unsigned resid,
const void *data, size_t num_elems,
size_t elem_size)
{
size_t size = align(elem_size * num_elems, 4);
D3D12_CPU_DESCRIPTOR_HANDLE handle;
ComPtr<ID3D12Resource> res;
handle = uav_heap->GetCPUDescriptorHandleForHeapStart();
handle = offset_cpu_handle(handle, resources.descs.size() * uav_heap_incr);
if (size) {
if (data)
res = create_buffer_with_data(data, size);
else
res = create_buffer(size, D3D12_HEAP_TYPE_DEFAULT);
resource_barrier(res, D3D12_RESOURCE_STATE_COMMON,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
}
create_uav_buffer(res, num_elems, elem_size, handle);
resources.add(res, D3D12_DESCRIPTOR_RANGE_TYPE_UAV, spaceid, resid);
return res;
}
ComPtr<ID3D12Resource>
ComputeTest::add_cbv_resource(ComputeTest::Resources &resources,
unsigned spaceid, unsigned resid,
const void *data, size_t size)
{
unsigned aligned_size = align(size, 256);
D3D12_CPU_DESCRIPTOR_HANDLE handle;
ComPtr<ID3D12Resource> res;
handle = uav_heap->GetCPUDescriptorHandleForHeapStart();
handle = offset_cpu_handle(handle, resources.descs.size() * uav_heap_incr);
if (size) {
assert(data);
res = create_sized_buffer_with_data(aligned_size, data, size);
}
create_cbv(res, aligned_size, handle);
resources.add(res, D3D12_DESCRIPTOR_RANGE_TYPE_CBV, spaceid, resid);
return res;
}
void
ComputeTest::run_shader_with_raw_args(Shader shader,
const CompileArgs &compile_args,
const std::vector<RawShaderArg *> &args)
{
if (args.size() < 1)
throw runtime_error("no inputs");
static HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
if (!hD3D12Mod)
throw runtime_error("Failed to load D3D12.DLL");
D3D12SerializeVersionedRootSignature = (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)GetProcAddress(hD3D12Mod, "D3D12SerializeVersionedRootSignature");
if (args.size() != shader.dxil->kernel->num_args)
throw runtime_error("incorrect number of inputs");
struct clc_runtime_kernel_conf conf = { 0 };
// Older WARP and some hardware doesn't support int64, so for these tests, unconditionally lower away int64
// A more complex runtime can be smarter about detecting when this needs to be done
conf.lower_bit_size = 64;
if (!shader.dxil->metadata.local_size[0])
conf.local_size[0] = compile_args.x;
else
conf.local_size[0] = shader.dxil->metadata.local_size[0];
if (!shader.dxil->metadata.local_size[1])
conf.local_size[1] = compile_args.y;
else
conf.local_size[1] = shader.dxil->metadata.local_size[1];
if (!shader.dxil->metadata.local_size[2])
conf.local_size[2] = compile_args.z;
else
conf.local_size[2] = shader.dxil->metadata.local_size[2];
if (compile_args.x % conf.local_size[0] ||
compile_args.y % conf.local_size[1] ||
compile_args.z % conf.local_size[2])
throw runtime_error("invalid global size must be a multiple of local size");
std::vector<struct clc_runtime_arg_info> argsinfo(args.size());
conf.args = argsinfo.data();
conf.support_global_work_id_offsets =
compile_args.work_props.global_offset_x != 0 ||
compile_args.work_props.global_offset_y != 0 ||
compile_args.work_props.global_offset_z != 0;
conf.support_work_group_id_offsets =
compile_args.work_props.group_id_offset_x != 0 ||
compile_args.work_props.group_id_offset_y != 0 ||
compile_args.work_props.group_id_offset_z != 0;
for (unsigned i = 0; i < shader.dxil->kernel->num_args; ++i) {
RawShaderArg *arg = args[i];
size_t size = arg->get_elem_size() * arg->get_num_elems();
switch (shader.dxil->kernel->args[i].address_qualifier) {
case CLC_KERNEL_ARG_ADDRESS_LOCAL:
argsinfo[i].localptr.size = size;
break;
default:
break;
}
}
configure(shader, &conf);
validate(shader);
std::shared_ptr<struct clc_dxil_object> &dxil = shader.dxil;
std::vector<uint8_t> argsbuf(dxil->metadata.kernel_inputs_buf_size);
std::vector<ComPtr<ID3D12Resource>> argres(shader.dxil->kernel->num_args);
clc_work_properties_data work_props = compile_args.work_props;
if (!conf.support_work_group_id_offsets) {
work_props.group_count_total_x = compile_args.x / conf.local_size[0];
work_props.group_count_total_y = compile_args.y / conf.local_size[1];
work_props.group_count_total_z = compile_args.z / conf.local_size[2];
}
if (work_props.work_dim == 0)
work_props.work_dim = 3;
Resources resources;
for (unsigned i = 0; i < dxil->kernel->num_args; ++i) {
RawShaderArg *arg = args[i];
size_t size = arg->get_elem_size() * arg->get_num_elems();
void *slot = argsbuf.data() + dxil->metadata.args[i].offset;
switch (dxil->kernel->args[i].address_qualifier) {
case CLC_KERNEL_ARG_ADDRESS_CONSTANT:
case CLC_KERNEL_ARG_ADDRESS_GLOBAL: {
assert(dxil->metadata.args[i].size == sizeof(uint64_t));
uint64_t *ptr_slot = (uint64_t *)slot;
if (arg->get_data())
*ptr_slot = (uint64_t)dxil->metadata.args[i].globconstptr.buf_id << 32;
else
*ptr_slot = ~0ull;
break;
}
case CLC_KERNEL_ARG_ADDRESS_LOCAL: {
assert(dxil->metadata.args[i].size == sizeof(uint64_t));
uint64_t *ptr_slot = (uint64_t *)slot;
*ptr_slot = dxil->metadata.args[i].localptr.sharedmem_offset;
break;
}
case CLC_KERNEL_ARG_ADDRESS_PRIVATE: {
assert(size == dxil->metadata.args[i].size);
memcpy(slot, arg->get_data(), size);
break;
}
default:
assert(0);
}
}
for (unsigned i = 0; i < dxil->kernel->num_args; ++i) {
RawShaderArg *arg = args[i];
if (dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL ||
dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_CONSTANT) {
argres[i] = add_uav_resource(resources, 0,
dxil->metadata.args[i].globconstptr.buf_id,
arg->get_data(), arg->get_num_elems(),
arg->get_elem_size());
}
}
if (dxil->metadata.printf_uav_id > 0)
add_uav_resource(resources, 0, dxil->metadata.printf_uav_id, NULL, 1024 * 1024 / 4, 4);
for (unsigned i = 0; i < dxil->metadata.num_consts; ++i)
add_uav_resource(resources, 0, dxil->metadata.consts[i].uav_id,
dxil->metadata.consts[i].data,
dxil->metadata.consts[i].size / 4, 4);
if (argsbuf.size())
add_cbv_resource(resources, 0, dxil->metadata.kernel_inputs_cbv_id,
argsbuf.data(), argsbuf.size());
add_cbv_resource(resources, 0, dxil->metadata.work_properties_cbv_id,
&work_props, sizeof(work_props));
auto root_sig = create_root_signature(resources);
auto pipeline_state = create_pipeline_state(root_sig, *dxil);
cmdlist->SetDescriptorHeaps(1, &uav_heap);
cmdlist->SetComputeRootSignature(root_sig.Get());
cmdlist->SetComputeRootDescriptorTable(0, uav_heap->GetGPUDescriptorHandleForHeapStart());
cmdlist->SetPipelineState(pipeline_state.Get());
cmdlist->Dispatch(compile_args.x / conf.local_size[0],
compile_args.y / conf.local_size[1],
compile_args.z / conf.local_size[2]);
for (auto &range : resources.ranges) {
if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) {
for (unsigned i = range.OffsetInDescriptorsFromTableStart;
i < range.NumDescriptors; i++) {
if (!resources.descs[i].Get())
continue;
resource_barrier(resources.descs[i],
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
D3D12_RESOURCE_STATE_COMMON);
}
}
}
execute_cmdlist();
for (unsigned i = 0; i < args.size(); i++) {
if (!(args[i]->get_direction() & SHADER_ARG_OUTPUT))
continue;
assert(dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL);
get_buffer_data(argres[i], args[i]->get_data(),
args[i]->get_elem_size() * args[i]->get_num_elems());
}
ComPtr<ID3D12InfoQueue> info_queue;
dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
if (info_queue)
{
EXPECT_EQ(0, info_queue->GetNumStoredMessages());
for (unsigned i = 0; i < info_queue->GetNumStoredMessages(); ++i) {
SIZE_T message_size = 0;
info_queue->GetMessageA(i, nullptr, &message_size);
D3D12_MESSAGE* message = (D3D12_MESSAGE*)malloc(message_size);
info_queue->GetMessageA(i, message, &message_size);
FAIL() << message->pDescription;
free(message);
}
}
}
void
ComputeTest::SetUp()
{
static struct clc_context *compiler_ctx_g = nullptr;
if (!compiler_ctx_g) {
clc_context_options options = { };
options.optimize = (debug_get_option_debug_compute() & COMPUTE_DEBUG_OPTIMIZE_LIBCLC) != 0;
compiler_ctx_g = clc_context_new(&logger, &options);
if (!compiler_ctx_g)
throw runtime_error("failed to create CLC compiler context");
if (debug_get_option_debug_compute() & COMPUTE_DEBUG_SERIALIZE_LIBCLC) {
void *serialized = nullptr;
size_t serialized_size = 0;
clc_context_serialize(compiler_ctx_g, &serialized, &serialized_size);
if (!serialized)
throw runtime_error("failed to serialize CLC compiler context");
clc_free_context(compiler_ctx_g);
compiler_ctx_g = nullptr;
compiler_ctx_g = clc_context_deserialize(serialized, serialized_size);
if (!compiler_ctx_g)
throw runtime_error("failed to deserialize CLC compiler context");
clc_context_free_serialized(serialized);
}
}
compiler_ctx = compiler_ctx_g;
enable_d3d12_debug_layer();
factory = get_dxgi_factory();
if (!factory)
throw runtime_error("failed to create DXGI factory");
adapter = choose_adapter(factory);
if (!adapter)
throw runtime_error("failed to choose adapter");
dev = create_device(adapter);
if (!dev)
throw runtime_error("failed to create device");
if (FAILED(dev->CreateFence(0, D3D12_FENCE_FLAG_NONE,
__uuidof(cmdqueue_fence),
(void **)&cmdqueue_fence)))
throw runtime_error("failed to create fence\n");
D3D12_COMMAND_QUEUE_DESC queue_desc;
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
queue_desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
queue_desc.NodeMask = 0;
if (FAILED(dev->CreateCommandQueue(&queue_desc,
__uuidof(cmdqueue),
(void **)&cmdqueue)))
throw runtime_error("failed to create command queue");
if (FAILED(dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE,
__uuidof(cmdalloc), (void **)&cmdalloc)))
throw runtime_error("failed to create command allocator");
if (FAILED(dev->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE,
cmdalloc, NULL, __uuidof(cmdlist), (void **)&cmdlist)))
throw runtime_error("failed to create command list");
D3D12_DESCRIPTOR_HEAP_DESC heap_desc;
heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
heap_desc.NumDescriptors = 1000;
heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
heap_desc.NodeMask = 0;
if (FAILED(dev->CreateDescriptorHeap(&heap_desc,
__uuidof(uav_heap), (void **)&uav_heap)))
throw runtime_error("failed to create descriptor heap");
uav_heap_incr = dev->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
event = CreateEvent(NULL, FALSE, FALSE, NULL);
if (!event)
throw runtime_error("Failed to create event");
fence_value = 1;
}
void
ComputeTest::TearDown()
{
CloseHandle(event);
uav_heap->Release();
cmdlist->Release();
cmdalloc->Release();
cmdqueue->Release();
cmdqueue_fence->Release();
dev->Release();
adapter->Release();
factory->Release();
}
PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE ComputeTest::D3D12SerializeVersionedRootSignature;
bool
validate_module(const struct clc_dxil_object &dxil)
{
static HMODULE hmod = LoadLibrary("DXIL.DLL");
if (!hmod) {
/* Enabling experimental shaders allows us to run unsigned shader code,
* such as when under the debugger where we can't run the validator. */
if (debug_get_option_debug_compute() & COMPUTE_DEBUG_EXPERIMENTAL_SHADERS)
return true;
else
throw runtime_error("failed to load DXIL.DLL");
}
DxcCreateInstanceProc pfnDxcCreateInstance =
(DxcCreateInstanceProc)GetProcAddress(hmod, "DxcCreateInstance");
if (!pfnDxcCreateInstance)
throw runtime_error("failed to load DxcCreateInstance");
struct shader_blob : public IDxcBlob {
shader_blob(void *data, size_t size) : data(data), size(size) {}
LPVOID STDMETHODCALLTYPE GetBufferPointer() override { return data; }
SIZE_T STDMETHODCALLTYPE GetBufferSize() override { return size; }
HRESULT STDMETHODCALLTYPE QueryInterface(REFIID, void **) override { return E_NOINTERFACE; }
ULONG STDMETHODCALLTYPE AddRef() override { return 1; }
ULONG STDMETHODCALLTYPE Release() override { return 0; }
void *data;
size_t size;
} blob(dxil.binary.data, dxil.binary.size);
IDxcValidator *validator;
if (FAILED(pfnDxcCreateInstance(CLSID_DxcValidator, __uuidof(IDxcValidator),
(void **)&validator)))
throw runtime_error("failed to create IDxcValidator");
IDxcOperationResult *result;
if (FAILED(validator->Validate(&blob, DxcValidatorFlags_InPlaceEdit,
&result)))
throw runtime_error("Validate failed");
HRESULT hr;
if (FAILED(result->GetStatus(&hr)) ||
FAILED(hr)) {
IDxcBlobEncoding *message;
result->GetErrorBuffer(&message);
fprintf(stderr, "D3D12: validation failed: %*s\n",
(int)message->GetBufferSize(),
(char *)message->GetBufferPointer());
message->Release();
validator->Release();
result->Release();
return false;
}
validator->Release();
result->Release();
return true;
}
static void
dump_blob(const char *path, const struct clc_dxil_object &dxil)
{
FILE *fp = fopen(path, "wb");
if (fp) {
fwrite(dxil.binary.data, 1, dxil.binary.size, fp);
fclose(fp);
printf("D3D12: wrote '%s'...\n", path);
}
}
ComputeTest::Shader
ComputeTest::compile(const std::vector<const char *> &sources,
const std::vector<const char *> &compile_args,
bool create_library)
{
struct clc_compile_args args = { 0 };
args.args = compile_args.data();
args.num_args = (unsigned)compile_args.size();
struct clc_dxil_object *dxil;
ComputeTest::Shader shader;
std::vector<Shader> shaders;
args.source.name = "obj.cl";
for (unsigned i = 0; i < sources.size(); i++) {
args.source.value = sources[i];
auto obj = clc_compile(compiler_ctx, &args, &logger);
if (!obj)
throw runtime_error("failed to compile object!");
Shader shader;
shader.obj = std::shared_ptr<struct clc_object>(obj, clc_free_object);
shaders.push_back(shader);
}
if (shaders.size() == 1 && create_library)
return shaders[0];
return link(shaders, create_library);
}
ComputeTest::Shader
ComputeTest::link(const std::vector<Shader> &sources,
bool create_library)
{
std::vector<const clc_object*> objs;
for (auto& source : sources)
objs.push_back(&*source.obj);
struct clc_linker_args link_args = {};
link_args.in_objs = objs.data();
link_args.num_in_objs = (unsigned)objs.size();
link_args.create_library = create_library;
struct clc_object *obj = clc_link(compiler_ctx,
&link_args,
&logger);
if (!obj)
throw runtime_error("failed to link objects!");
ComputeTest::Shader shader;
shader.obj = std::shared_ptr<struct clc_object>(obj, clc_free_object);
if (!link_args.create_library)
configure(shader, NULL);
return shader;
}
void
ComputeTest::configure(Shader &shader,
const struct clc_runtime_kernel_conf *conf)
{
struct clc_dxil_object *dxil;
dxil = clc_to_dxil(compiler_ctx, shader.obj.get(), "main_test", conf, &logger);
if (!dxil)
throw runtime_error("failed to compile kernel!");
shader.dxil = std::shared_ptr<struct clc_dxil_object>(dxil, clc_free_dxil_object);
}
void
ComputeTest::validate(ComputeTest::Shader &shader)
{
dump_blob("unsigned.cso", *shader.dxil);
if (!validate_module(*shader.dxil))
throw runtime_error("failed to validate module!");
dump_blob("signed.cso", *shader.dxil);
}

View File

@ -0,0 +1,324 @@
/*
* Copyright © Microsoft Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdio.h>
#include <stdint.h>
#include <stdexcept>
#include <d3d12.h>
#include <dxgi1_4.h>
#include <gtest/gtest.h>
#include <wrl.h>
#include "clc_compiler.h"
using std::runtime_error;
using Microsoft::WRL::ComPtr;
inline D3D12_CPU_DESCRIPTOR_HANDLE
offset_cpu_handle(D3D12_CPU_DESCRIPTOR_HANDLE handle, UINT offset)
{
handle.ptr += offset;
return handle;
}
inline size_t
align(size_t value, unsigned alignment)
{
assert(alignment > 0);
return ((value + (alignment - 1)) / alignment) * alignment;
}
class ComputeTest : public ::testing::Test {
protected:
struct Shader {
std::shared_ptr<struct clc_object> obj;
std::shared_ptr<struct clc_dxil_object> dxil;
};
static void
enable_d3d12_debug_layer();
static IDXGIFactory4 *
get_dxgi_factory();
static IDXGIAdapter1 *
choose_adapter(IDXGIFactory4 *factory);
static ID3D12Device *
create_device(IDXGIAdapter1 *adapter);
struct Resources {
void add(ComPtr<ID3D12Resource> res,
D3D12_DESCRIPTOR_RANGE_TYPE type,
unsigned spaceid,
unsigned resid)
{
descs.push_back(res);
if(!ranges.empty() &&
ranges.back().RangeType == type &&
ranges.back().RegisterSpace == spaceid &&
ranges.back().BaseShaderRegister + ranges.back().NumDescriptors == resid) {
ranges.back().NumDescriptors++;
return;
}
D3D12_DESCRIPTOR_RANGE1 range;
range.RangeType = type;
range.NumDescriptors = 1;
range.BaseShaderRegister = resid;
range.RegisterSpace = spaceid;
range.OffsetInDescriptorsFromTableStart = descs.size() - 1;
range.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS;
ranges.push_back(range);
}
std::vector<D3D12_DESCRIPTOR_RANGE1> ranges;
std::vector<ComPtr<ID3D12Resource>> descs;
};
ComPtr<ID3D12RootSignature>
create_root_signature(const Resources &resources);
ComPtr<ID3D12PipelineState>
create_pipeline_state(ComPtr<ID3D12RootSignature> &root_sig,
const struct clc_dxil_object &dxil);
ComPtr<ID3D12Resource>
create_buffer(int size, D3D12_HEAP_TYPE heap_type);
ComPtr<ID3D12Resource>
create_upload_buffer_with_data(const void *data, size_t size);
ComPtr<ID3D12Resource>
create_sized_buffer_with_data(size_t buffer_size, const void *data,
size_t data_size);
ComPtr<ID3D12Resource>
create_buffer_with_data(const void *data, size_t size)
{
return create_sized_buffer_with_data(size, data, size);
}
void
get_buffer_data(ComPtr<ID3D12Resource> res,
void *buf, size_t size);
void
resource_barrier(ComPtr<ID3D12Resource> &res,
D3D12_RESOURCE_STATES state_before,
D3D12_RESOURCE_STATES state_after);
void
execute_cmdlist();
void
create_uav_buffer(ComPtr<ID3D12Resource> res,
size_t width, size_t byte_stride,
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle);
void create_cbv(ComPtr<ID3D12Resource> res, size_t size,
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle);
ComPtr<ID3D12Resource>
add_uav_resource(Resources &resources, unsigned spaceid, unsigned resid,
const void *data = NULL, size_t num_elems = 0,
size_t elem_size = 0);
ComPtr<ID3D12Resource>
add_cbv_resource(Resources &resources, unsigned spaceid, unsigned resid,
const void *data, size_t size);
void
SetUp() override;
void
TearDown() override;
Shader
compile(const std::vector<const char *> &sources,
const std::vector<const char *> &compile_args = {},
bool create_library = false);
Shader
link(const std::vector<Shader> &sources,
bool create_library = false);
void
configure(Shader &shader,
const struct clc_runtime_kernel_conf *conf);
void
validate(Shader &shader);
enum ShaderArgDirection {
SHADER_ARG_INPUT = 1,
SHADER_ARG_OUTPUT = 2,
SHADER_ARG_INOUT = SHADER_ARG_INPUT | SHADER_ARG_OUTPUT,
};
class RawShaderArg {
public:
RawShaderArg(enum ShaderArgDirection dir) : dir(dir) { }
virtual size_t get_elem_size() const = 0;
virtual size_t get_num_elems() const = 0;
virtual const void *get_data() const = 0;
virtual void *get_data() = 0;
enum ShaderArgDirection get_direction() { return dir; }
private:
enum ShaderArgDirection dir;
};
class NullShaderArg : public RawShaderArg {
public:
NullShaderArg() : RawShaderArg(SHADER_ARG_INPUT) { }
size_t get_elem_size() const override { return 0; }
size_t get_num_elems() const override { return 0; }
const void *get_data() const override { return NULL; }
void *get_data() override { return NULL; }
};
template <typename T>
class ShaderArg : public std::vector<T>, public RawShaderArg
{
public:
ShaderArg(const T &v, enum ShaderArgDirection dir = SHADER_ARG_INOUT) :
std::vector<T>({ v }), RawShaderArg(dir) { }
ShaderArg(const std::vector<T> &v, enum ShaderArgDirection dir = SHADER_ARG_INOUT) :
std::vector<T>(v), RawShaderArg(dir) { }
ShaderArg(const std::initializer_list<T> v, enum ShaderArgDirection dir = SHADER_ARG_INOUT) :
std::vector<T>(v), RawShaderArg(dir) { }
ShaderArg<T>& operator =(const T &v)
{
this->clear();
this->push_back(v);
return *this;
}
operator T&() { return this->at(0); }
operator const T&() const { return this->at(0); }
ShaderArg<T>& operator =(const std::vector<T> &v)
{
*this = v;
return *this;
}
ShaderArg<T>& operator =(std::initializer_list<T> v)
{
*this = v;
return *this;
}
size_t get_elem_size() const override { return sizeof(T); }
size_t get_num_elems() const override { return this->size(); }
const void *get_data() const override { return this->data(); }
void *get_data() override { return this->data(); }
};
struct CompileArgs
{
unsigned x, y, z;
std::vector<const char *> compiler_command_line;
clc_work_properties_data work_props;
};
private:
void gather_args(std::vector<RawShaderArg *> &args) { }
template <typename T, typename... Rest>
void gather_args(std::vector<RawShaderArg *> &args, T &arg, Rest&... rest)
{
args.push_back(&arg);
gather_args(args, rest...);
}
void run_shader_with_raw_args(Shader shader,
const CompileArgs &compile_args,
const std::vector<RawShaderArg *> &args);
protected:
template <typename... Args>
void run_shader(Shader shader,
const CompileArgs &compile_args,
Args&... args)
{
std::vector<RawShaderArg *> raw_args;
gather_args(raw_args, args...);
run_shader_with_raw_args(shader, compile_args, raw_args);
}
template <typename... Args>
void run_shader(const std::vector<const char *> &sources,
unsigned x, unsigned y, unsigned z,
Args&... args)
{
std::vector<RawShaderArg *> raw_args;
gather_args(raw_args, args...);
CompileArgs compile_args = { x, y, z };
run_shader_with_raw_args(compile(sources), compile_args, raw_args);
}
template <typename... Args>
void run_shader(const std::vector<const char *> &sources,
const CompileArgs &compile_args,
Args&... args)
{
std::vector<RawShaderArg *> raw_args;
gather_args(raw_args, args...);
run_shader_with_raw_args(
compile(sources, compile_args.compiler_command_line),
compile_args, raw_args);
}
template <typename... Args>
void run_shader(const char *source,
unsigned x, unsigned y, unsigned z,
Args&... args)
{
std::vector<RawShaderArg *> raw_args;
gather_args(raw_args, args...);
CompileArgs compile_args = { x, y, z };
run_shader_with_raw_args(compile({ source }), compile_args, raw_args);
}
IDXGIFactory4 *factory;
IDXGIAdapter1 *adapter;
ID3D12Device *dev;
ID3D12Fence *cmdqueue_fence;
ID3D12CommandQueue *cmdqueue;
ID3D12CommandAllocator *cmdalloc;
ID3D12GraphicsCommandList *cmdlist;
ID3D12DescriptorHeap *uav_heap;
struct clc_context *compiler_ctx;
UINT uav_heap_incr;
int fence_value;
HANDLE event;
static PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE D3D12SerializeVersionedRootSignature;
};

View File

@ -0,0 +1,59 @@
# Copyright © Microsoft Corporation
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
clang_resource_dir = join_paths(
dep_clang.get_variable(cmake: 'CLANG_INCLUDE_DIRS'), '..',
'lib', 'clang', dep_clang.version(), 'include'
)
opencl_c_h = custom_target(
'opencl-c.h',
input : [files_xxd, join_paths(clang_resource_dir, 'opencl-c.h')],
output : 'opencl-c.h.h',
command : [prog_python, '@INPUT@', '@OUTPUT@', '-n', 'opencl_c_source'],
)
opencl_c_base_h = custom_target(
'opencl-c-base.h',
input : [files_xxd, join_paths(clang_resource_dir, 'opencl-c-base.h')],
output : 'opencl-c-base.h.h',
command : [prog_python, '@INPUT@', '@OUTPUT@', '-n', 'opencl_c_base_source'],
)
libclc_compiler = shared_library(
'clglon12compiler',
'clc_compiler.c',
'clc_nir.c',
'clc_helpers.cpp',
opencl_c_h,
opencl_c_base_h,
vs_module_defs : 'clglon12compiler.def',
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_compiler, inc_gallium, inc_spirv],
dependencies: [idep_nir_headers, dep_clang, dep_llvm, cc.find_library('version'),
dep_llvmspirvlib, idep_mesautil, idep_libdxil_compiler, idep_nir, dep_spirv_tools]
)
clc_compiler_test = executable('clc_compiler_test',
['clc_compiler_test.cpp', 'compute_test.cpp'],
link_with : [libclc_compiler],
dependencies : [idep_gtest, idep_mesautil],
include_directories : [inc_include, inc_src])
test('clc_compiler_test', clc_compiler_test, timeout: 120)

View File

@ -0,0 +1,676 @@
///////////////////////////////////////////////////////////////////////////////
// //
// dxcapi.h //
// Copyright (C) Microsoft Corporation. All rights reserved. //
// This file is distributed under the University of Illinois Open Source //
// License. See LICENSE.TXT for details. //
// //
// Provides declarations for the DirectX Compiler API entry point. //
// //
///////////////////////////////////////////////////////////////////////////////
#ifndef __DXC_API__
#define __DXC_API__
#ifdef _WIN32
#ifndef DXC_API_IMPORT
#define DXC_API_IMPORT __declspec(dllimport)
#endif
#else
#ifndef DXC_API_IMPORT
#define DXC_API_IMPORT __attribute__ ((visibility ("default")))
#endif
#endif
#ifdef _WIN32
#define DECLARE_CROSS_PLATFORM_UUIDOF(T)
#define DEFINE_CROSS_PLATFORM_UUIDOF(T)
#else
#include <dlfcn.h>
#include "dxc/Support/WinAdapter.h"
#endif
struct IMalloc;
struct IDxcIncludeHandler;
typedef HRESULT (__stdcall *DxcCreateInstanceProc)(
_In_ REFCLSID rclsid,
_In_ REFIID riid,
_Out_ LPVOID* ppv
);
typedef HRESULT(__stdcall *DxcCreateInstance2Proc)(
_In_ IMalloc *pMalloc,
_In_ REFCLSID rclsid,
_In_ REFIID riid,
_Out_ LPVOID* ppv
);
/// <summary>
/// Creates a single uninitialized object of the class associated with a specified CLSID.
/// </summary>
/// <param name="rclsid">
/// The CLSID associated with the data and code that will be used to create the object.
/// </param>
/// <param name="riid">
/// A reference to the identifier of the interface to be used to communicate
/// with the object.
/// </param>
/// <param name="ppv">
/// Address of pointer variable that receives the interface pointer requested
/// in riid. Upon successful return, *ppv contains the requested interface
/// pointer. Upon failure, *ppv contains NULL.</param>
/// <remarks>
/// While this function is similar to CoCreateInstance, there is no COM involvement.
/// </remarks>
extern "C"
DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance(
_In_ REFCLSID rclsid,
_In_ REFIID riid,
_Out_ LPVOID* ppv
);
extern "C"
DXC_API_IMPORT HRESULT __stdcall DxcCreateInstance2(
_In_ IMalloc *pMalloc,
_In_ REFCLSID rclsid,
_In_ REFIID riid,
_Out_ LPVOID* ppv
);
// For convenience, equivalent definitions to CP_UTF8 and CP_UTF16.
#define DXC_CP_UTF8 65001
#define DXC_CP_UTF16 1200
// Use DXC_CP_ACP for: Binary; ANSI Text; Autodetect UTF with BOM
#define DXC_CP_ACP 0
// This flag indicates that the shader hash was computed taking into account source information (-Zss)
#define DXC_HASHFLAG_INCLUDES_SOURCE 1
// Hash digest type for ShaderHash
typedef struct DxcShaderHash {
UINT32 Flags; // DXC_HASHFLAG_*
BYTE HashDigest[16];
} DxcShaderHash;
#define DXC_FOURCC(ch0, ch1, ch2, ch3) ( \
(UINT32)(UINT8)(ch0) | (UINT32)(UINT8)(ch1) << 8 | \
(UINT32)(UINT8)(ch2) << 16 | (UINT32)(UINT8)(ch3) << 24 \
)
#define DXC_PART_PDB DXC_FOURCC('I', 'L', 'D', 'B')
#define DXC_PART_PDB_NAME DXC_FOURCC('I', 'L', 'D', 'N')
#define DXC_PART_PRIVATE_DATA DXC_FOURCC('P', 'R', 'I', 'V')
#define DXC_PART_ROOT_SIGNATURE DXC_FOURCC('R', 'T', 'S', '0')
#define DXC_PART_DXIL DXC_FOURCC('D', 'X', 'I', 'L')
#define DXC_PART_REFLECTION_DATA DXC_FOURCC('R', 'D', 'A', 'T')
#define DXC_PART_SHADER_HASH DXC_FOURCC('H', 'A', 'S', 'H')
#define DXC_PART_INPUT_SIGNATURE DXC_FOURCC('I', 'S', 'G', '1')
#define DXC_PART_OUTPUT_SIGNATURE DXC_FOURCC('O', 'S', 'G', '1')
#define DXC_PART_PATCH_CONSTANT_SIGNATURE DXC_FOURCC('P', 'S', 'G', '1')
// Some option arguments are defined here for continuity with D3DCompile interface
#define DXC_ARG_DEBUG L"-Zi"
#define DXC_ARG_SKIP_VALIDATION L"-Vd"
#define DXC_ARG_SKIP_OPTIMIZATIONS L"-Od"
#define DXC_ARG_PACK_MATRIX_ROW_MAJOR L"-Zpr"
#define DXC_ARG_PACK_MATRIX_COLUMN_MAJOR L"-Zpc"
#define DXC_ARG_AVOID_FLOW_CONTROL L"-Gfa"
#define DXC_ARG_PREFER_FLOW_CONTROL L"-Gfp"
#define DXC_ARG_ENABLE_STRICTNESS L"-Ges"
#define DXC_ARG_ENABLE_BACKWARDS_COMPATIBILITY L"-Gec"
#define DXC_ARG_IEEE_STRICTNESS L"-Gis"
#define DXC_ARG_OPTIMIZATION_LEVEL0 L"-O0"
#define DXC_ARG_OPTIMIZATION_LEVEL1 L"-O1"
#define DXC_ARG_OPTIMIZATION_LEVEL2 L"-O2"
#define DXC_ARG_OPTIMIZATION_LEVEL3 L"-O3"
#define DXC_ARG_WARNINGS_ARE_ERRORS L"-WX"
#define DXC_ARG_RESOURCES_MAY_ALIAS L"-res_may_alias"
#define DXC_ARG_ALL_RESOURCES_BOUND L"-all_resources_bound"
#define DXC_ARG_DEBUG_NAME_FOR_SOURCE L"-Zss"
#define DXC_ARG_DEBUG_NAME_FOR_BINARY L"-Zsb"
// IDxcBlob is an alias of ID3D10Blob and ID3DBlob
struct __declspec(uuid("8BA5FB08-5195-40e2-AC58-0D989C3A0102"))
IDxcBlob : public IUnknown {
public:
virtual LPVOID STDMETHODCALLTYPE GetBufferPointer(void) = 0;
virtual SIZE_T STDMETHODCALLTYPE GetBufferSize(void) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlob)
};
struct __declspec(uuid("7241d424-2646-4191-97c0-98e96e42fc68"))
IDxcBlobEncoding : public IDxcBlob {
public:
virtual HRESULT STDMETHODCALLTYPE GetEncoding(_Out_ BOOL *pKnown,
_Out_ UINT32 *pCodePage) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobEncoding)
};
// Notes on IDxcBlobUtf16 and IDxcBlobUtf8
// These guarantee null-terminated text and the stated encoding.
// GetBufferSize() will return the size in bytes, including null-terminator
// GetStringLength() will return the length in characters, excluding the null-terminator
// Name strings will use IDxcBlobUtf16, while other string output blobs,
// such as errors/warnings, preprocessed HLSL, or other text will be based
// on the -encoding option.
// The API will use this interface for output name strings
struct __declspec(uuid("A3F84EAB-0FAA-497E-A39C-EE6ED60B2D84"))
IDxcBlobUtf16 : public IDxcBlobEncoding {
public:
virtual LPCWSTR STDMETHODCALLTYPE GetStringPointer(void) = 0;
virtual SIZE_T STDMETHODCALLTYPE GetStringLength(void) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobUtf16)
};
struct __declspec(uuid("3DA636C9-BA71-4024-A301-30CBF125305B"))
IDxcBlobUtf8 : public IDxcBlobEncoding {
public:
virtual LPCSTR STDMETHODCALLTYPE GetStringPointer(void) = 0;
virtual SIZE_T STDMETHODCALLTYPE GetStringLength(void) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcBlobUtf8)
};
struct __declspec(uuid("7f61fc7d-950d-467f-b3e3-3c02fb49187c"))
IDxcIncludeHandler : public IUnknown {
virtual HRESULT STDMETHODCALLTYPE LoadSource(
_In_z_ LPCWSTR pFilename, // Candidate filename.
_COM_Outptr_result_maybenull_ IDxcBlob **ppIncludeSource // Resultant source object for included file, nullptr if not found.
) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcIncludeHandler)
};
// Structure for supplying bytes or text input to Dxc APIs.
// Use Encoding = 0 for non-text bytes, ANSI text, or unknown with BOM.
typedef struct DxcBuffer {
LPCVOID Ptr;
SIZE_T Size;
UINT Encoding;
} DxcText;
struct DxcDefine {
LPCWSTR Name;
_Maybenull_ LPCWSTR Value;
};
struct __declspec(uuid("73EFFE2A-70DC-45F8-9690-EFF64C02429D"))
IDxcCompilerArgs : public IUnknown {
// Pass GetArguments() and GetCount() to Compile
virtual LPCWSTR* STDMETHODCALLTYPE GetArguments() = 0;
virtual UINT32 STDMETHODCALLTYPE GetCount() = 0;
// Add additional arguments or defines here, if desired.
virtual HRESULT STDMETHODCALLTYPE AddArguments(
_In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments to add
_In_ UINT32 argCount // Number of arguments to add
) = 0;
virtual HRESULT STDMETHODCALLTYPE AddArgumentsUTF8(
_In_opt_count_(argCount)LPCSTR *pArguments, // Array of pointers to UTF-8 arguments to add
_In_ UINT32 argCount // Number of arguments to add
) = 0;
virtual HRESULT STDMETHODCALLTYPE AddDefines(
_In_count_(defineCount) const DxcDefine *pDefines, // Array of defines
_In_ UINT32 defineCount // Number of defines
) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompilerArgs)
};
//////////////////////////
// Legacy Interfaces
/////////////////////////
// NOTE: IDxcUtils replaces IDxcLibrary
struct __declspec(uuid("e5204dc7-d18c-4c3c-bdfb-851673980fe7"))
IDxcLibrary : public IUnknown {
virtual HRESULT STDMETHODCALLTYPE SetMalloc(_In_opt_ IMalloc *pMalloc) = 0;
virtual HRESULT STDMETHODCALLTYPE CreateBlobFromBlob(
_In_ IDxcBlob *pBlob, UINT32 offset, UINT32 length, _COM_Outptr_ IDxcBlob **ppResult) = 0;
virtual HRESULT STDMETHODCALLTYPE CreateBlobFromFile(
_In_z_ LPCWSTR pFileName, _In_opt_ UINT32* codePage,
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingFromPinned(
_In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnHeapCopy(
_In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
virtual HRESULT STDMETHODCALLTYPE CreateBlobWithEncodingOnMalloc(
_In_bytecount_(size) LPCVOID pText, IMalloc *pIMalloc, UINT32 size, UINT32 codePage,
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
virtual HRESULT STDMETHODCALLTYPE CreateIncludeHandler(
_COM_Outptr_ IDxcIncludeHandler **ppResult) = 0;
virtual HRESULT STDMETHODCALLTYPE CreateStreamFromBlobReadOnly(
_In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0;
virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf8(
_In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf16(
_In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcLibrary)
};
// NOTE: IDxcResult replaces IDxcOperationResult
struct __declspec(uuid("CEDB484A-D4E9-445A-B991-CA21CA157DC2"))
IDxcOperationResult : public IUnknown {
virtual HRESULT STDMETHODCALLTYPE GetStatus(_Out_ HRESULT *pStatus) = 0;
// GetResult returns the main result of the operation.
// This corresponds to:
// DXC_OUT_OBJECT - Compile() with shader or library target
// DXC_OUT_DISASSEMBLY - Disassemble()
// DXC_OUT_HLSL - Compile() with -P
// DXC_OUT_ROOT_SIGNATURE - Compile() with rootsig_* target
virtual HRESULT STDMETHODCALLTYPE GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **ppResult) = 0;
// GetErrorBuffer Corresponds to DXC_OUT_ERRORS.
virtual HRESULT STDMETHODCALLTYPE GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **ppErrors) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcOperationResult)
};
// NOTE: IDxcCompiler3 replaces IDxcCompiler and IDxcCompiler2
struct __declspec(uuid("8c210bf3-011f-4422-8d70-6f9acb8db617"))
IDxcCompiler : public IUnknown {
// Compile a single entry point to the target shader model
virtual HRESULT STDMETHODCALLTYPE Compile(
_In_ IDxcBlob *pSource, // Source text to compile
_In_opt_z_ LPCWSTR pSourceName, // Optional file name for pSource. Used in errors and include handlers.
_In_opt_z_ LPCWSTR pEntryPoint, // entry point name
_In_z_ LPCWSTR pTargetProfile, // shader profile to compile
_In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
_In_ UINT32 argCount, // Number of arguments
_In_count_(defineCount)
const DxcDefine *pDefines, // Array of defines
_In_ UINT32 defineCount, // Number of defines
_In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
_COM_Outptr_ IDxcOperationResult **ppResult // Compiler output status, buffer, and errors
) = 0;
// Preprocess source text
virtual HRESULT STDMETHODCALLTYPE Preprocess(
_In_ IDxcBlob *pSource, // Source text to preprocess
_In_opt_z_ LPCWSTR pSourceName, // Optional file name for pSource. Used in errors and include handlers.
_In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
_In_ UINT32 argCount, // Number of arguments
_In_count_(defineCount)
const DxcDefine *pDefines, // Array of defines
_In_ UINT32 defineCount, // Number of defines
_In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
_COM_Outptr_ IDxcOperationResult **ppResult // Preprocessor output status, buffer, and errors
) = 0;
// Disassemble a program.
virtual HRESULT STDMETHODCALLTYPE Disassemble(
_In_ IDxcBlob *pSource, // Program to disassemble.
_COM_Outptr_ IDxcBlobEncoding **ppDisassembly // Disassembly text.
) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompiler)
};
// NOTE: IDxcCompiler3 replaces IDxcCompiler and IDxcCompiler2
struct __declspec(uuid("A005A9D9-B8BB-4594-B5C9-0E633BEC4D37"))
IDxcCompiler2 : public IDxcCompiler {
// Compile a single entry point to the target shader model with debug information.
virtual HRESULT STDMETHODCALLTYPE CompileWithDebug(
_In_ IDxcBlob *pSource, // Source text to compile
_In_opt_z_ LPCWSTR pSourceName, // Optional file name for pSource. Used in errors and include handlers.
_In_opt_z_ LPCWSTR pEntryPoint, // Entry point name
_In_z_ LPCWSTR pTargetProfile, // Shader profile to compile
_In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
_In_ UINT32 argCount, // Number of arguments
_In_count_(defineCount)
const DxcDefine *pDefines, // Array of defines
_In_ UINT32 defineCount, // Number of defines
_In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
_COM_Outptr_ IDxcOperationResult **ppResult, // Compiler output status, buffer, and errors
_Outptr_opt_result_z_ LPWSTR *ppDebugBlobName,// Suggested file name for debug blob. (Must be HeapFree()'d!)
_COM_Outptr_opt_ IDxcBlob **ppDebugBlob // Debug blob
) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompiler2)
};
struct __declspec(uuid("F1B5BE2A-62DD-4327-A1C2-42AC1E1E78E6"))
IDxcLinker : public IUnknown {
public:
// Register a library with name to ref it later.
virtual HRESULT RegisterLibrary(
_In_opt_ LPCWSTR pLibName, // Name of the library.
_In_ IDxcBlob *pLib // Library blob.
) = 0;
// Links the shader and produces a shader blob that the Direct3D runtime can
// use.
virtual HRESULT STDMETHODCALLTYPE Link(
_In_opt_ LPCWSTR pEntryName, // Entry point name
_In_ LPCWSTR pTargetProfile, // shader profile to link
_In_count_(libCount)
const LPCWSTR *pLibNames, // Array of library names to link
_In_ UINT32 libCount, // Number of libraries to link
_In_opt_count_(argCount) const LPCWSTR *pArguments, // Array of pointers to arguments
_In_ UINT32 argCount, // Number of arguments
_COM_Outptr_
IDxcOperationResult **ppResult // Linker output status, buffer, and errors
) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcLinker)
};
/////////////////////////
// Latest interfaces. Please use these
////////////////////////
// NOTE: IDxcUtils replaces IDxcLibrary
struct __declspec(uuid("4605C4CB-2019-492A-ADA4-65F20BB7D67F"))
IDxcUtils : public IUnknown {
// Create a sub-blob that holds a reference to the outer blob and points to its memory.
virtual HRESULT STDMETHODCALLTYPE CreateBlobFromBlob(
_In_ IDxcBlob *pBlob, UINT32 offset, UINT32 length, _COM_Outptr_ IDxcBlob **ppResult) = 0;
// For codePage, use 0 (or DXC_CP_ACP) for raw binary or ANSI code page
// Creates a blob referencing existing memory, with no copy.
// User must manage the memory lifetime separately.
// (was: CreateBlobWithEncodingFromPinned)
virtual HRESULT STDMETHODCALLTYPE CreateBlobFromPinned(
_In_bytecount_(size) LPCVOID pData, UINT32 size, UINT32 codePage,
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
// Create blob, taking ownership of memory allocated with supplied allocator.
// (was: CreateBlobWithEncodingOnMalloc)
virtual HRESULT STDMETHODCALLTYPE MoveToBlob(
_In_bytecount_(size) LPCVOID pData, IMalloc *pIMalloc, UINT32 size, UINT32 codePage,
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
////
// New blobs and copied contents are allocated with the current allocator
// Copy blob contents to memory owned by the new blob.
// (was: CreateBlobWithEncodingOnHeapCopy)
virtual HRESULT STDMETHODCALLTYPE CreateBlob(
_In_bytecount_(size) LPCVOID pData, UINT32 size, UINT32 codePage,
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
// (was: CreateBlobFromFile)
virtual HRESULT STDMETHODCALLTYPE LoadFile(
_In_z_ LPCWSTR pFileName, _In_opt_ UINT32* pCodePage,
_COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) = 0;
virtual HRESULT STDMETHODCALLTYPE CreateReadOnlyStreamFromBlob(
_In_ IDxcBlob *pBlob, _COM_Outptr_ IStream **ppStream) = 0;
// Create default file-based include handler
virtual HRESULT STDMETHODCALLTYPE CreateDefaultIncludeHandler(
_COM_Outptr_ IDxcIncludeHandler **ppResult) = 0;
// Convert or return matching encoded text blobs
virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf8(
_In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobUtf8 **pBlobEncoding) = 0;
virtual HRESULT STDMETHODCALLTYPE GetBlobAsUtf16(
_In_ IDxcBlob *pBlob, _COM_Outptr_ IDxcBlobUtf16 **pBlobEncoding) = 0;
virtual HRESULT STDMETHODCALLTYPE GetDxilContainerPart(
_In_ const DxcBuffer *pShader,
_In_ UINT32 DxcPart,
_Outptr_result_nullonfailure_ void **ppPartData,
_Out_ UINT32 *pPartSizeInBytes) = 0;
// Create reflection interface from serialized Dxil container, or DXC_PART_REFLECTION_DATA.
// TBD: Require part header for RDAT? (leaning towards yes)
virtual HRESULT STDMETHODCALLTYPE CreateReflection(
_In_ const DxcBuffer *pData, REFIID iid, void **ppvReflection) = 0;
virtual HRESULT STDMETHODCALLTYPE BuildArguments(
_In_opt_z_ LPCWSTR pSourceName, // Optional file name for pSource. Used in errors and include handlers.
_In_opt_z_ LPCWSTR pEntryPoint, // Entry point name. (-E)
_In_z_ LPCWSTR pTargetProfile, // Shader profile to compile. (-T)
_In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
_In_ UINT32 argCount, // Number of arguments
_In_count_(defineCount)
const DxcDefine *pDefines, // Array of defines
_In_ UINT32 defineCount, // Number of defines
_COM_Outptr_ IDxcCompilerArgs **ppArgs // Arguments you can use with Compile() method
) = 0;
// Takes the shader PDB and returns the hash and the container inside it
virtual HRESULT STDMETHODCALLTYPE GetPDBContents(
_In_ IDxcBlob *pPDBBlob, _COM_Outptr_ IDxcBlob **ppHash, _COM_Outptr_ IDxcBlob **ppContainer) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcUtils)
};
// For use with IDxcResult::[Has|Get]Output dxcOutKind argument
// Note: text outputs returned from version 2 APIs are UTF-8 or UTF-16 based on -encoding option
typedef enum DXC_OUT_KIND {
DXC_OUT_NONE = 0,
DXC_OUT_OBJECT = 1, // IDxcBlob - Shader or library object
DXC_OUT_ERRORS = 2, // IDxcBlobUtf8 or IDxcBlobUtf16
DXC_OUT_PDB = 3, // IDxcBlob
DXC_OUT_SHADER_HASH = 4, // IDxcBlob - DxcShaderHash of shader or shader with source info (-Zsb/-Zss)
DXC_OUT_DISASSEMBLY = 5, // IDxcBlobUtf8 or IDxcBlobUtf16 - from Disassemble
DXC_OUT_HLSL = 6, // IDxcBlobUtf8 or IDxcBlobUtf16 - from Preprocessor or Rewriter
DXC_OUT_TEXT = 7, // IDxcBlobUtf8 or IDxcBlobUtf16 - other text, such as -ast-dump or -Odump
DXC_OUT_REFLECTION = 8, // IDxcBlob - RDAT part with reflection data
DXC_OUT_ROOT_SIGNATURE = 9, // IDxcBlob - Serialized root signature output
DXC_OUT_FORCE_DWORD = 0xFFFFFFFF
} DXC_OUT_KIND;
struct __declspec(uuid("58346CDA-DDE7-4497-9461-6F87AF5E0659"))
IDxcResult : public IDxcOperationResult {
virtual BOOL STDMETHODCALLTYPE HasOutput(_In_ DXC_OUT_KIND dxcOutKind) = 0;
virtual HRESULT STDMETHODCALLTYPE GetOutput(_In_ DXC_OUT_KIND dxcOutKind,
_In_ REFIID iid, _COM_Outptr_opt_result_maybenull_ void **ppvObject,
_COM_Outptr_ IDxcBlobUtf16 **ppOutputName) = 0;
virtual UINT32 GetNumOutputs() = 0;
virtual DXC_OUT_KIND GetOutputByIndex(UINT32 Index) = 0;
virtual DXC_OUT_KIND PrimaryOutput() = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcResult)
};
struct __declspec(uuid("228B4687-5A6A-4730-900C-9702B2203F54"))
IDxcCompiler3 : public IUnknown {
// Compile a single entry point to the target shader model,
// Compile a library to a library target (-T lib_*),
// Compile a root signature (-T rootsig_*), or
// Preprocess HLSL source (-P)
virtual HRESULT STDMETHODCALLTYPE Compile(
_In_ const DxcBuffer *pSource, // Source text to compile
_In_opt_count_(argCount) LPCWSTR *pArguments, // Array of pointers to arguments
_In_ UINT32 argCount, // Number of arguments
_In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
_In_ REFIID riid, _Out_ LPVOID *ppResult // IDxcResult: status, buffer, and errors
) = 0;
// Disassemble a program.
virtual HRESULT STDMETHODCALLTYPE Disassemble(
_In_ const DxcBuffer *pObject, // Program to disassemble: dxil container or bitcode.
_In_ REFIID riid, _Out_ LPVOID *ppResult // IDxcResult: status, disassembly text, and errors
) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcCompiler3)
};
static const UINT32 DxcValidatorFlags_Default = 0;
static const UINT32 DxcValidatorFlags_InPlaceEdit = 1; // Validator is allowed to update shader blob in-place.
static const UINT32 DxcValidatorFlags_RootSignatureOnly = 2;
static const UINT32 DxcValidatorFlags_ModuleOnly = 4;
static const UINT32 DxcValidatorFlags_ValidMask = 0x7;
struct __declspec(uuid("A6E82BD2-1FD7-4826-9811-2857E797F49A"))
IDxcValidator : public IUnknown {
// Validate a shader.
virtual HRESULT STDMETHODCALLTYPE Validate(
_In_ IDxcBlob *pShader, // Shader to validate.
_In_ UINT32 Flags, // Validation flags.
_COM_Outptr_ IDxcOperationResult **ppResult // Validation output status, buffer, and errors
) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcValidator)
};
struct __declspec(uuid("334b1f50-2292-4b35-99a1-25588d8c17fe"))
IDxcContainerBuilder : public IUnknown {
virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pDxilContainerHeader) = 0; // Loads DxilContainer to the builder
virtual HRESULT STDMETHODCALLTYPE AddPart(_In_ UINT32 fourCC, _In_ IDxcBlob *pSource) = 0; // Part to add to the container
virtual HRESULT STDMETHODCALLTYPE RemovePart(_In_ UINT32 fourCC) = 0; // Remove the part with fourCC
virtual HRESULT STDMETHODCALLTYPE SerializeContainer(_Out_ IDxcOperationResult **ppResult) = 0; // Builds a container of the given container builder state
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcContainerBuilder)
};
struct __declspec(uuid("091f7a26-1c1f-4948-904b-e6e3a8a771d5"))
IDxcAssembler : public IUnknown {
// Assemble dxil in ll or llvm bitcode to DXIL container.
virtual HRESULT STDMETHODCALLTYPE AssembleToContainer(
_In_ IDxcBlob *pShader, // Shader to assemble.
_COM_Outptr_ IDxcOperationResult **ppResult // Assembly output status, buffer, and errors
) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcAssembler)
};
struct __declspec(uuid("d2c21b26-8350-4bdc-976a-331ce6f4c54c"))
IDxcContainerReflection : public IUnknown {
virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pContainer) = 0; // Container to load.
virtual HRESULT STDMETHODCALLTYPE GetPartCount(_Out_ UINT32 *pResult) = 0;
virtual HRESULT STDMETHODCALLTYPE GetPartKind(UINT32 idx, _Out_ UINT32 *pResult) = 0;
virtual HRESULT STDMETHODCALLTYPE GetPartContent(UINT32 idx, _COM_Outptr_ IDxcBlob **ppResult) = 0;
virtual HRESULT STDMETHODCALLTYPE FindFirstPartKind(UINT32 kind, _Out_ UINT32 *pResult) = 0;
virtual HRESULT STDMETHODCALLTYPE GetPartReflection(UINT32 idx, REFIID iid, void **ppvObject) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcContainerReflection)
};
struct __declspec(uuid("AE2CD79F-CC22-453F-9B6B-B124E7A5204C"))
IDxcOptimizerPass : public IUnknown {
virtual HRESULT STDMETHODCALLTYPE GetOptionName(_COM_Outptr_ LPWSTR *ppResult) = 0;
virtual HRESULT STDMETHODCALLTYPE GetDescription(_COM_Outptr_ LPWSTR *ppResult) = 0;
virtual HRESULT STDMETHODCALLTYPE GetOptionArgCount(_Out_ UINT32 *pCount) = 0;
virtual HRESULT STDMETHODCALLTYPE GetOptionArgName(UINT32 argIndex, _COM_Outptr_ LPWSTR *ppResult) = 0;
virtual HRESULT STDMETHODCALLTYPE GetOptionArgDescription(UINT32 argIndex, _COM_Outptr_ LPWSTR *ppResult) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcOptimizerPass)
};
struct __declspec(uuid("25740E2E-9CBA-401B-9119-4FB42F39F270"))
IDxcOptimizer : public IUnknown {
virtual HRESULT STDMETHODCALLTYPE GetAvailablePassCount(_Out_ UINT32 *pCount) = 0;
virtual HRESULT STDMETHODCALLTYPE GetAvailablePass(UINT32 index, _COM_Outptr_ IDxcOptimizerPass** ppResult) = 0;
virtual HRESULT STDMETHODCALLTYPE RunOptimizer(IDxcBlob *pBlob,
_In_count_(optionCount) LPCWSTR *ppOptions, UINT32 optionCount,
_COM_Outptr_ IDxcBlob **pOutputModule,
_COM_Outptr_opt_ IDxcBlobEncoding **ppOutputText) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcOptimizer)
};
static const UINT32 DxcVersionInfoFlags_None = 0;
static const UINT32 DxcVersionInfoFlags_Debug = 1; // Matches VS_FF_DEBUG
static const UINT32 DxcVersionInfoFlags_Internal = 2; // Internal Validator (non-signing)
struct __declspec(uuid("b04f5b50-2059-4f12-a8ff-a1e0cde1cc7e"))
IDxcVersionInfo : public IUnknown {
virtual HRESULT STDMETHODCALLTYPE GetVersion(_Out_ UINT32 *pMajor, _Out_ UINT32 *pMinor) = 0;
virtual HRESULT STDMETHODCALLTYPE GetFlags(_Out_ UINT32 *pFlags) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcVersionInfo)
};
struct __declspec(uuid("fb6904c4-42f0-4b62-9c46-983af7da7c83"))
IDxcVersionInfo2 : public IDxcVersionInfo {
virtual HRESULT STDMETHODCALLTYPE GetCommitInfo(_Out_ UINT32 *pCommitCount, _Out_ char **pCommitHash) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(IDxcVersionInfo2)
};
// Note: __declspec(selectany) requires 'extern'
// On Linux __declspec(selectany) is removed and using 'extern' results in link error.
#ifdef _MSC_VER
#define CLSID_SCOPE __declspec(selectany) extern
#else
#define CLSID_SCOPE
#endif
CLSID_SCOPE const CLSID CLSID_DxcCompiler = {
0x73e22d93,
0xe6ce,
0x47f3,
{0xb5, 0xbf, 0xf0, 0x66, 0x4f, 0x39, 0xc1, 0xb0}};
// {EF6A8087-B0EA-4D56-9E45-D07E1A8B7806}
CLSID_SCOPE const GUID CLSID_DxcLinker = {
0xef6a8087,
0xb0ea,
0x4d56,
{0x9e, 0x45, 0xd0, 0x7e, 0x1a, 0x8b, 0x78, 0x6}};
// {CD1F6B73-2AB0-484D-8EDC-EBE7A43CA09F}
CLSID_SCOPE const CLSID CLSID_DxcDiaDataSource = {
0xcd1f6b73,
0x2ab0,
0x484d,
{0x8e, 0xdc, 0xeb, 0xe7, 0xa4, 0x3c, 0xa0, 0x9f}};
// {3E56AE82-224D-470F-A1A1-FE3016EE9F9D}
CLSID_SCOPE const CLSID CLSID_DxcCompilerArgs = {
0x3e56ae82,
0x224d,
0x470f,
{0xa1, 0xa1, 0xfe, 0x30, 0x16, 0xee, 0x9f, 0x9d}};
// {6245D6AF-66E0-48FD-80B4-4D271796748C}
CLSID_SCOPE const GUID CLSID_DxcLibrary = {
0x6245d6af,
0x66e0,
0x48fd,
{0x80, 0xb4, 0x4d, 0x27, 0x17, 0x96, 0x74, 0x8c}};
CLSID_SCOPE const GUID CLSID_DxcUtils = CLSID_DxcLibrary;
// {8CA3E215-F728-4CF3-8CDD-88AF917587A1}
CLSID_SCOPE const GUID CLSID_DxcValidator = {
0x8ca3e215,
0xf728,
0x4cf3,
{0x8c, 0xdd, 0x88, 0xaf, 0x91, 0x75, 0x87, 0xa1}};
// {D728DB68-F903-4F80-94CD-DCCF76EC7151}
CLSID_SCOPE const GUID CLSID_DxcAssembler = {
0xd728db68,
0xf903,
0x4f80,
{0x94, 0xcd, 0xdc, 0xcf, 0x76, 0xec, 0x71, 0x51}};
// {b9f54489-55b8-400c-ba3a-1675e4728b91}
CLSID_SCOPE const GUID CLSID_DxcContainerReflection = {
0xb9f54489,
0x55b8,
0x400c,
{0xba, 0x3a, 0x16, 0x75, 0xe4, 0x72, 0x8b, 0x91}};
// {AE2CD79F-CC22-453F-9B6B-B124E7A5204C}
CLSID_SCOPE const GUID CLSID_DxcOptimizer = {
0xae2cd79f,
0xcc22,
0x453f,
{0x9b, 0x6b, 0xb1, 0x24, 0xe7, 0xa5, 0x20, 0x4c}};
// {94134294-411f-4574-b4d0-8741e25240d2}
CLSID_SCOPE const GUID CLSID_DxcContainerBuilder = {
0x94134294,
0x411f,
0x4574,
{0xb4, 0xd0, 0x87, 0x41, 0xe2, 0x52, 0x40, 0xd2}};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -32,6 +32,14 @@ bool dxil_nir_lower_8bit_conv(nir_shader *shader);
bool dxil_nir_lower_16bit_conv(nir_shader *shader);
bool dxil_nir_lower_x2b(nir_shader *shader);
bool dxil_nir_lower_inot(nir_shader *shader);
bool dxil_nir_lower_ubo_to_temp(nir_shader *shader);
bool dxil_nir_lower_loads_stores_to_dxil(nir_shader *shader);
bool dxil_nir_lower_atomics_to_dxil(nir_shader *shader);
bool dxil_nir_lower_deref_ssbo(nir_shader *shader);
bool dxil_nir_opt_alu_deref_srcs(nir_shader *shader);
bool dxil_nir_lower_memcpy_deref(nir_shader *shader);
bool dxil_nir_lower_upcast_phis(nir_shader *shader, unsigned min_bit_size);
bool dxil_nir_lower_fp16_casts(nir_shader *shader);
nir_ssa_def *
build_load_ubo_dxil(nir_builder *b, nir_ssa_def *buffer,

View File

@ -97,8 +97,10 @@ nir_options = {
.lower_pack_32_2x16_split = true,
.lower_unpack_64_2x32_split = true,
.lower_unpack_32_2x16_split = true,
.use_scoped_barrier = true,
.vertex_id_zero_based = true,
.lower_base_vertex = true,
.has_cs_global_id = true,
};
const nir_shader_compiler_options*
@ -808,6 +810,48 @@ emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned binding, unsigned
return true;
}
static bool
emit_globals(struct ntd_context *ctx, nir_shader *s, unsigned size)
{
nir_foreach_variable_with_modes(var, s, nir_var_mem_ssbo)
size++;
if (!size)
return true;
const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, 32);
if (!type)
return false;
const struct dxil_type *struct_type =
dxil_module_get_struct_type(&ctx->mod, NULL, &type, 1);
if (!struct_type)
return false;
const struct dxil_type *array_type =
dxil_module_get_array_type(&ctx->mod, struct_type, size);
if (!array_type)
return false;
resource_array_layout layout = {0, 0, size};
const struct dxil_mdnode *uav_meta =
emit_uav_metadata(&ctx->mod, array_type,
"globals", &layout,
DXIL_COMP_TYPE_INVALID,
DXIL_RESOURCE_KIND_RAW_BUFFER);
if (!uav_meta)
return false;
ctx->uav_metadata_nodes[ctx->num_uav_arrays++] = uav_meta;
if (ctx->num_uav_arrays > 8)
ctx->mod.feats.use_64uavs = 1;
/* Handles to UAVs used for kernel globals are created on-demand */
ctx->num_uavs += size;
add_resource(ctx, DXIL_RES_UAV_RAW, &layout);
ctx->mod.raw_and_structured_buffers = true;
return true;
}
static bool
emit_uav(struct ntd_context *ctx, nir_variable *var, unsigned count)
{
@ -936,6 +980,53 @@ var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c,
unreachable("unknown GLSL type in var_fill_const_array");
}
static bool
emit_global_consts(struct ntd_context *ctx, nir_shader *s)
{
nir_foreach_variable_with_modes(var, s, nir_var_shader_temp) {
struct dxil_value *ret;
bool err;
assert(var->constant_initializer);
unsigned int num_members = DIV_ROUND_UP(glsl_get_cl_size(var->type), 4);
uint32_t *const_ints = ralloc_array(ctx->ralloc_ctx, uint32_t, num_members);
err = var_fill_const_array(ctx, var->constant_initializer, var->type,
const_ints, 0);
if (!err)
return false;
const struct dxil_value **const_vals =
ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, num_members);
if (!const_vals)
return false;
for (int i = 0; i < num_members; i++)
const_vals[i] = dxil_module_get_int32_const(&ctx->mod, const_ints[i]);
const struct dxil_type *elt_type = dxil_module_get_int_type(&ctx->mod, 32);
if (!elt_type)
return false;
const struct dxil_type *type =
dxil_module_get_array_type(&ctx->mod, elt_type, num_members);
if (!type)
return false;
const struct dxil_value *agg_vals =
dxil_module_get_array_const(&ctx->mod, type, const_vals);
if (!agg_vals)
return false;
const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, type,
DXIL_AS_DEFAULT, 4,
agg_vals);
if (!gvar)
return false;
if (!_mesa_hash_table_insert(ctx->consts, var, (void *)gvar))
return false;
}
return true;
}
static bool
emit_cbv(struct ntd_context *ctx, unsigned binding,
unsigned size, char *name)
@ -1882,6 +1973,8 @@ emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
case nir_op_fddx:
case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]);
@ -1966,6 +2059,120 @@ load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
}
static bool
emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
{
const struct dxil_value *opcode, *mode;
const struct dxil_func *func;
uint32_t flags = 0;
if (nir_intrinsic_execution_scope(intr) == NIR_SCOPE_WORKGROUP)
flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
nir_variable_mode modes = nir_intrinsic_memory_modes(intr);
nir_scope mem_scope = nir_intrinsic_memory_scope(intr);
if (modes & ~(nir_var_mem_ssbo | nir_var_mem_global | nir_var_mem_shared))
return false;
if (mem_scope != NIR_SCOPE_DEVICE && mem_scope != NIR_SCOPE_WORKGROUP)
return false;
if (modes & (nir_var_mem_ssbo | nir_var_mem_global)) {
if (mem_scope == NIR_SCOPE_DEVICE)
flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
else
flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
}
if (modes & nir_var_mem_shared)
flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
if (!func)
return false;
opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
if (!opcode)
return false;
mode = dxil_module_get_int32_const(&ctx->mod, flags);
if (!mode)
return false;
const struct dxil_value *args[] = { opcode, mode };
return dxil_emit_call_void(&ctx->mod, func,
args, ARRAY_SIZE(args));
}
static bool
emit_load_global_invocation_id(struct ntd_context *ctx,
nir_intrinsic_instr *intr)
{
assert(intr->dest.is_ssa);
nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
if (comps & (1 << i)) {
const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
if (!idx)
return false;
const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
if (!globalid)
return false;
store_dest_value(ctx, &intr->dest, i, globalid);
}
}
return true;
}
static bool
emit_load_local_invocation_id(struct ntd_context *ctx,
nir_intrinsic_instr *intr)
{
assert(intr->dest.is_ssa);
nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
if (comps & (1 << i)) {
const struct dxil_value
*idx = dxil_module_get_int32_const(&ctx->mod, i);
if (!idx)
return false;
const struct dxil_value
*threadidingroup = emit_threadidingroup_call(ctx, idx);
if (!threadidingroup)
return false;
store_dest_value(ctx, &intr->dest, i, threadidingroup);
}
}
return true;
}
static bool
emit_load_local_work_group_id(struct ntd_context *ctx,
nir_intrinsic_instr *intr)
{
assert(intr->dest.is_ssa);
nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
if (comps & (1 << i)) {
const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
if (!idx)
return false;
const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
if (!groupid)
return false;
store_dest_value(ctx, &intr->dest, i, groupid);
}
}
return true;
}
static bool
emit_load_primitiveid(struct ntd_context *ctx,
nir_intrinsic_instr *intr)
@ -2000,6 +2207,249 @@ get_int32_undef(struct dxil_module *m)
return dxil_module_get_undef(m, int32_type);
}
static const struct dxil_value *
offset_to_index(struct dxil_module *m, const struct dxil_value *offset,
unsigned bit_size)
{
unsigned shift_amt = util_logbase2(bit_size / 8);
const struct dxil_value *shift =
dxil_module_get_int32_const(m, shift_amt);
if (!shift)
return NULL;
return dxil_emit_binop(m, DXIL_BINOP_LSHR, offset, shift, 0);
}
static const struct dxil_value *
index_to_offset(struct dxil_module *m, const struct dxil_value *index,
unsigned bit_size)
{
unsigned shift_amt = util_logbase2(bit_size / 8);
const struct dxil_value *shift =
dxil_module_get_int32_const(m, shift_amt);
if (!shift)
return NULL;
return dxil_emit_binop(m, DXIL_BINOP_SHL, index, shift, 0);
}
static const struct dxil_value *
emit_gep_for_index(struct ntd_context *ctx, const nir_variable *var,
const struct dxil_value *index)
{
assert(var->data.mode == nir_var_shader_temp);
struct hash_entry *he = _mesa_hash_table_search(ctx->consts, var);
assert(he != NULL);
const struct dxil_value *ptr = he->data;
const struct dxil_value *zero = dxil_module_get_int32_const(&ctx->mod, 0);
if (!zero)
return NULL;
const struct dxil_value *ops[] = { ptr, zero, index };
return dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
}
static bool
emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
{
const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
const struct dxil_value *buffer =
get_src(ctx, &intr->src[0], 0, nir_type_uint);
const struct dxil_value *offset =
get_src(ctx, &intr->src[1], 0, nir_type_uint);
if (!int32_undef || !buffer || !offset)
return false;
assert(nir_src_bit_size(intr->src[0]) == 32);
assert(nir_intrinsic_dest_components(intr) <= 4);
const struct dxil_value *handle =
emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_UAV, 0, buffer,
nir_src_is_const(intr->src[0]));
if (!handle)
return false;
const struct dxil_value *coord[2] = {
offset,
int32_undef
};
const struct dxil_value *load = emit_bufferload_call(ctx, handle, coord);
if (!load)
return false;
for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
const struct dxil_value *val =
dxil_emit_extractval(&ctx->mod, load, i);
if (!val)
return false;
store_dest_value(ctx, &intr->dest, i, val);
}
return true;
}
static bool
emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
{
const struct dxil_value *buffer =
get_src(ctx, &intr->src[1], 0, nir_type_uint);
const struct dxil_value *offset =
get_src(ctx, &intr->src[2], 0, nir_type_uint);
if (!buffer || !offset)
return false;
const struct dxil_value *handle =
emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_UAV, 0, buffer,
nir_src_is_const(intr->src[1]));
if (!handle)
return false;
assert(nir_src_bit_size(intr->src[0]) == 32);
unsigned num_components = nir_src_num_components(intr->src[0]);
assert(num_components <= 4);
const struct dxil_value *value[4];
for (unsigned i = 0; i < num_components; ++i) {
value[i] = get_src(ctx, &intr->src[0], i, nir_type_uint);
if (!value[i])
return false;
}
const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
if (!int32_undef)
return false;
const struct dxil_value *coord[2] = {
offset,
int32_undef
};
for (int i = num_components; i < 4; ++i)
value[i] = int32_undef;
const struct dxil_value *write_mask =
dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
if (!write_mask)
return false;
return emit_bufferstore_call(ctx, handle, coord, value, write_mask, DXIL_I32);
}
static bool
emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr)
{
const struct dxil_value *value =
get_src(ctx, &intr->src[0], 0, nir_type_uint);
const struct dxil_value *mask =
get_src(ctx, &intr->src[1], 0, nir_type_uint);
const struct dxil_value *buffer =
get_src(ctx, &intr->src[2], 0, nir_type_uint);
const struct dxil_value *offset =
get_src(ctx, &intr->src[3], 0, nir_type_uint);
if (!value || !mask || !buffer || !offset)
return false;
const struct dxil_value *handle =
emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_UAV, 0, buffer,
nir_src_is_const(intr->src[2]));
if (!handle)
return false;
const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
if (!int32_undef)
return false;
const struct dxil_value *coord[3] = {
offset, int32_undef, int32_undef
};
return
emit_atomic_binop(ctx, handle, DXIL_ATOMIC_AND, coord, mask) != NULL &&
emit_atomic_binop(ctx, handle, DXIL_ATOMIC_OR, coord, value) != NULL;
}
static bool
emit_store_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
{
const struct dxil_value *zero, *index;
unsigned bit_size = nir_src_bit_size(intr->src[0]);
/* All shared mem accesses should have been lowered to scalar 32bit
* accesses.
*/
assert(bit_size == 32);
assert(nir_src_num_components(intr->src[0]) == 1);
zero = dxil_module_get_int32_const(&ctx->mod, 0);
if (!zero)
return false;
if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
else
index = get_src(ctx, &intr->src[2], 0, nir_type_uint);
if (!index)
return false;
const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
const struct dxil_value *ptr, *value;
ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
if (!ptr)
return false;
value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint);
if (!dxil_emit_atomicrmw(&ctx->mod, mask, ptr, DXIL_RMWOP_AND, false,
DXIL_ATOMIC_ORDERING_ACQREL,
DXIL_SYNC_SCOPE_CROSSTHREAD))
return false;
if (!dxil_emit_atomicrmw(&ctx->mod, value, ptr, DXIL_RMWOP_OR, false,
DXIL_ATOMIC_ORDERING_ACQREL,
DXIL_SYNC_SCOPE_CROSSTHREAD))
return false;
return true;
}
static bool
emit_store_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
{
const struct dxil_value *zero, *index;
unsigned bit_size = nir_src_bit_size(intr->src[0]);
/* All scratch mem accesses should have been lowered to scalar 32bit
* accesses.
*/
assert(bit_size == 32);
assert(nir_src_num_components(intr->src[0]) == 1);
zero = dxil_module_get_int32_const(&ctx->mod, 0);
if (!zero)
return false;
index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
if (!index)
return false;
const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
const struct dxil_value *ptr, *value;
ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
if (!ptr)
return false;
value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
}
static bool
emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
{
@ -2224,6 +2674,97 @@ emit_load_input(struct ntd_context *ctx, nir_intrinsic_instr *intr,
return emit_load_input_flat(ctx, intr, input);
}
static bool
emit_load_ptr(struct ntd_context *ctx, nir_intrinsic_instr *intr)
{
struct nir_variable *var =
nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
const struct dxil_value *index =
get_src(ctx, &intr->src[1], 0, nir_type_uint);
const struct dxil_value *ptr = emit_gep_for_index(ctx, var, index);
if (!ptr)
return false;
const struct dxil_value *retval =
dxil_emit_load(&ctx->mod, ptr, 4, false);
store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
return true;
}
static bool
emit_load_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
{
const struct dxil_value *zero, *index;
unsigned bit_size = nir_dest_bit_size(intr->dest);
unsigned align = bit_size / 8;
/* All shared mem accesses should have been lowered to scalar 32bit
* accesses.
*/
assert(bit_size == 32);
assert(nir_dest_num_components(intr->dest) == 1);
zero = dxil_module_get_int32_const(&ctx->mod, 0);
if (!zero)
return false;
index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
if (!index)
return false;
const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
const struct dxil_value *ptr, *retval;
ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
if (!ptr)
return false;
retval = dxil_emit_load(&ctx->mod, ptr, align, false);
if (!retval)
return false;
store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
return true;
}
static bool
emit_load_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
{
const struct dxil_value *zero, *one, *index;
unsigned bit_size = nir_dest_bit_size(intr->dest);
unsigned align = bit_size / 8;
/* All scratch mem accesses should have been lowered to scalar 32bit
* accesses.
*/
assert(bit_size == 32);
assert(nir_dest_num_components(intr->dest) == 1);
zero = dxil_module_get_int32_const(&ctx->mod, 0);
if (!zero)
return false;
index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
if (!index)
return false;
const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
const struct dxil_value *ptr, *retval;
ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
if (!ptr)
return false;
retval = dxil_emit_load(&ctx->mod, ptr, align, false);
if (!retval)
return false;
store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
return true;
}
static bool
emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
{
@ -2573,10 +3114,31 @@ static bool
emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
{
switch (intr->intrinsic) {
case nir_intrinsic_load_global_invocation_id:
case nir_intrinsic_load_global_invocation_id_zero_base:
return emit_load_global_invocation_id(ctx, intr);
case nir_intrinsic_load_local_invocation_id:
return emit_load_local_invocation_id(ctx, intr);
case nir_intrinsic_load_work_group_id:
case nir_intrinsic_load_work_group_id_zero_base:
return emit_load_local_work_group_id(ctx, intr);
case nir_intrinsic_load_ssbo:
return emit_load_ssbo(ctx, intr);
case nir_intrinsic_store_ssbo:
return emit_store_ssbo(ctx, intr);
case nir_intrinsic_store_ssbo_masked_dxil:
return emit_store_ssbo_masked(ctx, intr);
case nir_intrinsic_store_deref:
return emit_store_deref(ctx, intr);
case nir_intrinsic_store_shared_dxil:
case nir_intrinsic_store_shared_masked_dxil:
return emit_store_shared(ctx, intr);
case nir_intrinsic_store_scratch_dxil:
return emit_store_scratch(ctx, intr);
case nir_intrinsic_load_deref:
return emit_load_deref(ctx, intr);
case nir_intrinsic_load_ptr_dxil:
return emit_load_ptr(ctx, intr);
case nir_intrinsic_load_ubo:
return emit_load_ubo(ctx, intr);
case nir_intrinsic_load_ubo_dxil:
@ -2592,6 +3154,10 @@ emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
ctx->system_value[SYSTEM_VALUE_INSTANCE_ID]);
case nir_intrinsic_load_primitive_id:
return emit_load_primitiveid(ctx, intr);
case nir_intrinsic_load_shared_dxil:
return emit_load_shared(ctx, intr);
case nir_intrinsic_load_scratch_dxil:
return emit_load_scratch(ctx, intr);
case nir_intrinsic_discard_if:
return emit_discard_if(ctx, intr);
case nir_intrinsic_discard:
@ -2600,7 +3166,55 @@ emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
return emit_emit_vertex(ctx, intr);
case nir_intrinsic_end_primitive:
return emit_end_primitive(ctx, intr);
case nir_intrinsic_scoped_barrier:
return emit_barrier(ctx, intr);
case nir_intrinsic_ssbo_atomic_add:
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int);
case nir_intrinsic_ssbo_atomic_imin:
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int);
case nir_intrinsic_ssbo_atomic_umin:
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint);
case nir_intrinsic_ssbo_atomic_imax:
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int);
case nir_intrinsic_ssbo_atomic_umax:
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMAX, nir_type_uint);
case nir_intrinsic_ssbo_atomic_and:
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint);
case nir_intrinsic_ssbo_atomic_or:
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint);
case nir_intrinsic_ssbo_atomic_xor:
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint);
case nir_intrinsic_ssbo_atomic_exchange:
return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_int);
case nir_intrinsic_ssbo_atomic_comp_swap:
return emit_ssbo_atomic_comp_swap(ctx, intr);
case nir_intrinsic_shared_atomic_add_dxil:
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_ADD, nir_type_int);
case nir_intrinsic_shared_atomic_imin_dxil:
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MIN, nir_type_int);
case nir_intrinsic_shared_atomic_umin_dxil:
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMIN, nir_type_uint);
case nir_intrinsic_shared_atomic_imax_dxil:
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MAX, nir_type_int);
case nir_intrinsic_shared_atomic_umax_dxil:
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMAX, nir_type_uint);
case nir_intrinsic_shared_atomic_and_dxil:
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_AND, nir_type_uint);
case nir_intrinsic_shared_atomic_or_dxil:
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_OR, nir_type_uint);
case nir_intrinsic_shared_atomic_xor_dxil:
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XOR, nir_type_uint);
case nir_intrinsic_shared_atomic_exchange_dxil:
return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XCHG, nir_type_int);
case nir_intrinsic_shared_atomic_comp_swap_dxil:
return emit_shared_atomic_comp_swap(ctx, intr);
case nir_intrinsic_image_store:
return emit_image_store(ctx, intr);
case nir_intrinsic_image_size:
return emit_image_size(ctx, intr);
case nir_intrinsic_load_num_work_groups:
case nir_intrinsic_load_local_group_size:
default:
NIR_INSTR_UNSUPPORTED(&intr->instr);
assert("Unimplemented intrinsic instruction");
@ -3266,18 +3880,88 @@ prepare_phi_values(struct ntd_context *ctx, nir_shader *shader)
static bool
emit_cbvs(struct ntd_context *ctx, nir_shader *s)
{
for (int i = ctx->opts->ubo_binding_offset; i < s->info.num_ubos; ++i) {
char name[64];
snprintf(name, sizeof(name), "__ubo%d", i);
if (!emit_cbv(ctx, i, 16384 /*4096 vec4's*/, name))
return false;
if (s->info.stage == MESA_SHADER_KERNEL) {
nir_foreach_variable_with_modes(var, s, nir_var_mem_ubo) {
if (!emit_ubo_var(ctx, var))
return false;
}
} else {
for (int i = ctx->opts->ubo_binding_offset; i < s->info.num_ubos; ++i) {
char name[64];
snprintf(name, sizeof(name), "__ubo%d", i);
if (!emit_cbv(ctx, i, 16384 /*4096 vec4's*/, name))
return false;
}
}
return true;
}
static bool
emit_module(struct ntd_context *ctx, nir_shader *s)
emit_scratch(struct ntd_context *ctx, nir_shader *s)
{
if (s->scratch_size) {
/*
* We always allocate an u32 array, no matter the actual variable types.
* According to the DXIL spec, the minimum load/store granularity is
* 32-bit, anything smaller requires using a read-extract/read-write-modify
* approach.
*/
unsigned size = ALIGN_POT(s->scratch_size, sizeof(uint32_t));
const struct dxil_type *int32 = dxil_module_get_int_type(&ctx->mod, 32);
const struct dxil_value *array_length = dxil_module_get_int32_const(&ctx->mod, size / sizeof(uint32_t));
if (!int32 || !array_length)
return false;
const struct dxil_type *type = dxil_module_get_array_type(
&ctx->mod, int32, size / sizeof(uint32_t));
if (!type)
return false;
ctx->scratchvars = dxil_emit_alloca(&ctx->mod, type, int32, array_length, 4);
if (!ctx->scratchvars)
return false;
}
return true;
}
/* The validator complains if we don't have ops that reference a global variable. */
static bool
shader_has_shared_ops(struct nir_shader *s)
{
nir_foreach_function(func, s) {
if (!func->impl)
continue;
nir_foreach_block(block, func->impl) {
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_load_shared_dxil:
case nir_intrinsic_store_shared_dxil:
case nir_intrinsic_shared_atomic_add_dxil:
case nir_intrinsic_shared_atomic_and_dxil:
case nir_intrinsic_shared_atomic_comp_swap_dxil:
case nir_intrinsic_shared_atomic_exchange_dxil:
case nir_intrinsic_shared_atomic_imax_dxil:
case nir_intrinsic_shared_atomic_imin_dxil:
case nir_intrinsic_shared_atomic_or_dxil:
case nir_intrinsic_shared_atomic_umax_dxil:
case nir_intrinsic_shared_atomic_umin_dxil:
case nir_intrinsic_shared_atomic_xor_dxil:
return true;
default: break;
}
}
}
}
return false;
}
static bool
emit_module(struct ntd_context *ctx, nir_shader *s, const struct nir_to_dxil_options *opts)
{
unsigned binding;
@ -3314,6 +3998,45 @@ emit_module(struct ntd_context *ctx, nir_shader *s)
}
}
if (s->info.cs.shared_size && shader_has_shared_ops(s)) {
const struct dxil_type *type;
unsigned size;
/*
* We always allocate an u32 array, no matter the actual variable types.
* According to the DXIL spec, the minimum load/store granularity is
* 32-bit, anything smaller requires using a read-extract/read-write-modify
* approach. Non-atomic 64-bit accesses are allowed, but the
* GEP(cast(gvar, u64[] *), offset) and cast(GEP(gvar, offset), u64 *))
* sequences don't seem to be accepted by the DXIL validator when the
* pointer is in the groupshared address space, making the 32-bit -> 64-bit
* pointer cast impossible.
*/
size = ALIGN_POT(s->info.cs.shared_size, sizeof(uint32_t));
type = dxil_module_get_array_type(&ctx->mod,
dxil_module_get_int_type(&ctx->mod, 32),
size / sizeof(uint32_t));
ctx->sharedvars = dxil_add_global_ptr_var(&ctx->mod, "shared", type,
DXIL_AS_GROUPSHARED,
ffs(sizeof(uint64_t)),
NULL);
}
if (!emit_scratch(ctx, s))
return false;
/* UAVs */
if (s->info.stage == MESA_SHADER_KERNEL) {
if (!emit_globals(ctx, s, opts->num_kernel_globals))
return false;
ctx->consts = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
if (!ctx->consts)
return false;
if (!emit_global_consts(ctx, s))
return false;
}
nir_foreach_variable_with_modes(var, s, nir_var_uniform) {
unsigned count = glsl_type_get_image_count(var->type);
if (var->data.mode == nir_var_uniform && count) {
@ -3383,6 +4106,7 @@ get_dxil_shader_kind(struct nir_shader *s)
return DXIL_GEOMETRY_SHADER;
case MESA_SHADER_FRAGMENT:
return DXIL_PIXEL_SHADER;
case MESA_SHADER_KERNEL:
case MESA_SHADER_COMPUTE:
return DXIL_COMPUTE_SHADER;
default:
@ -3437,11 +4161,16 @@ optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, dxil_nir_lower_x2b);
if (s->options->lower_int64_options)
NIR_PASS(progress, s, nir_lower_int64);
NIR_PASS(progress, s, nir_lower_alu);
NIR_PASS(progress, s, dxil_nir_lower_inot);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);
NIR_PASS(progress, s, nir_lower_undef_to_zero);
NIR_PASS(progress, s, nir_opt_deref);
NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
NIR_PASS(progress, s, nir_lower_64bit_phis);
NIR_PASS_V(s, nir_lower_system_values);
} while (progress);
@ -3602,7 +4331,7 @@ nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
if (debug_dxil & DXIL_DEBUG_VERBOSE)
nir_print_shader(s, stderr);
if (!emit_module(ctx, s)) {
if (!emit_module(ctx, s, opts)) {
debug_printf("D3D12: dxil_container_add_module failed\n");
retval = false;
goto out;

View File

@ -52,6 +52,7 @@ struct nir_to_dxil_options {
bool disable_math_refactoring;
unsigned ubo_binding_offset;
unsigned provoking_vertex;
unsigned num_kernel_globals;
};
bool

View File

@ -20,4 +20,7 @@
# IN THE SOFTWARE.
subdir('compiler')
if with_microsoft_clc
subdir('clc')
endif
subdir('resource_state_manager')