[dxbc] Implemented thread group shared memory and barriers

This commit is contained in:
Philip Rebohle 2017-12-29 00:51:31 +01:00
parent 9848f9bdaa
commit a51439fb29
7 changed files with 212 additions and 59 deletions

View File

@ -69,6 +69,9 @@ namespace dxvk {
case DxbcInstClass::Atomic:
return this->emitAtomic(ins);
case DxbcInstClass::Barrier:
return this->emitBarrier(ins);
case DxbcInstClass::BufferLoad:
return this->emitBufferLoad(ins);
@ -354,6 +357,39 @@ namespace dxvk {
ins.op));
}
} break;
case DxbcOperandType::InputThreadId: {
m_cs.builtinGlobalInvocationId = emitNewBuiltinVariable({
{ DxbcScalarType::Uint32, 3, 0 },
spv::StorageClassInput },
spv::BuiltInGlobalInvocationId,
"vThreadId");
} break;
case DxbcOperandType::InputThreadGroupId: {
m_cs.builtinWorkgroupId = emitNewBuiltinVariable({
{ DxbcScalarType::Uint32, 3, 0 },
spv::StorageClassInput },
spv::BuiltInWorkgroupId,
"vThreadGroupId");
} break;
case DxbcOperandType::InputThreadIdInGroup: {
m_cs.builtinLocalInvocationId = emitNewBuiltinVariable({
{ DxbcScalarType::Uint32, 3, 0 },
spv::StorageClassInput },
spv::BuiltInLocalInvocationId,
"vThreadIdInGroup");
} break;
case DxbcOperandType::InputThreadIndexInGroup: {
// FIXME this might not be supported by Vulkan?
m_cs.builtinLocalInvocationIndex = emitNewBuiltinVariable({
{ DxbcScalarType::Uint32, 1, 0 },
spv::StorageClassInput },
spv::BuiltInLocalInvocationIndex,
"vThreadIndexInGroup");
} break;
default:
Logger::err(str::format(
@ -734,8 +770,34 @@ namespace dxvk {
// dcl_tgsm_structured takes three arguments:
// (dst0) The resource register ID
// (imm0) Structure stride, in bytes
// (imm0) Structure count
Logger::err("DxbcCompiler: emitDclThreadGroupSharedMemory not implemented");
// (imm1) Structure count
const bool isStructured = ins.op == DxbcOpcode::DclThreadGroupSharedMemoryStructured;
const uint32_t regId = ins.dst[0].idx[0].offset;
if (regId >= m_gRegs.size())
m_gRegs.resize(regId + 1);
const uint32_t elementStride = isStructured ? ins.imm[0].u32 : 0;
const uint32_t elementCount = isStructured ? ins.imm[1].u32 : ins.imm[0].u32;
DxbcRegisterInfo varInfo;
varInfo.type.ctype = DxbcScalarType::Uint32;
varInfo.type.ccount = 1;
varInfo.type.alength = isStructured
? elementCount * elementStride / 4
: elementCount;
varInfo.sclass = spv::StorageClassWorkgroup;
m_gRegs[regId].type = isStructured
? DxbcResourceType::Structured
: DxbcResourceType::Raw;
m_gRegs[regId].elementStride = elementStride;
m_gRegs[regId].elementCount = elementCount;
m_gRegs[regId].varId = emitNewVariable(varInfo);
m_module.setDebugName(m_gRegs[regId].varId,
str::format("g", regId).c_str());
}
@ -1459,6 +1521,48 @@ namespace dxvk {
}
void DxbcCompiler::emitBarrier(const DxbcShaderInstruction& ins) {
// sync takes no operands. Instead, the synchronization
// scope is defined by the operand control bits.
const DxbcSyncFlags flags = ins.controls.syncFlags;
uint32_t executionScope = 0;
uint32_t memoryScope = 0;
uint32_t memorySemantics = 0;
if (flags.test(DxbcSyncFlag::ThreadsInGroup))
executionScope = spv::ScopeWorkgroup;
if (flags.test(DxbcSyncFlag::ThreadGroupSharedMemory)) {
memoryScope = spv::ScopeWorkgroup;
memorySemantics |= spv::MemorySemanticsWorkgroupMemoryMask;
}
if (flags.test(DxbcSyncFlag::UavMemoryGroup)) {
memoryScope = spv::ScopeWorkgroup;
memorySemantics |= spv::MemorySemanticsUniformMemoryMask;
}
if (flags.test(DxbcSyncFlag::UavMemoryGlobal)) {
memoryScope = spv::ScopeDevice;
memorySemantics |= spv::MemorySemanticsUniformMemoryMask;
}
if (executionScope != 0) {
m_module.opControlBarrier(
m_module.constu32(executionScope),
m_module.constu32(memoryScope),
m_module.constu32(memorySemantics));
} else if (memorySemantics != spv::MemorySemanticsMaskNone) {
m_module.opMemoryBarrier(
m_module.constu32(memoryScope),
m_module.constu32(memorySemantics));
} else {
Logger::warn("DxbcCompiler: sync instruction has no effect");
}
}
void DxbcCompiler::emitBufferLoad(const DxbcShaderInstruction& ins) {
// ld_raw takes three arguments:
// (dst0) Destination register
@ -2142,7 +2246,7 @@ namespace dxvk {
uint32_t dstIndex = 0;
for (uint32_t i = 0; i < value.type.ccount; i++) {
for (uint32_t i = 0; i < 4; i++) {
if (writeMask[i])
indices[dstIndex++] = swizzle[i];
}
@ -2500,6 +2604,26 @@ namespace dxvk {
case DxbcOperandType::ImmediateConstantBuffer:
return emitGetImmConstBufPtr(operand);
case DxbcOperandType::InputThreadId:
return DxbcRegisterPointer {
{ DxbcScalarType::Uint32, 3 },
m_cs.builtinGlobalInvocationId };
case DxbcOperandType::InputThreadGroupId:
return DxbcRegisterPointer {
{ DxbcScalarType::Uint32, 3 },
m_cs.builtinWorkgroupId };
case DxbcOperandType::InputThreadIdInGroup:
return DxbcRegisterPointer {
{ DxbcScalarType::Uint32, 3 },
m_cs.builtinLocalInvocationId };
case DxbcOperandType::InputThreadIndexInGroup:
return DxbcRegisterPointer {
{ DxbcScalarType::Uint32, 1 },
m_cs.builtinLocalInvocationIndex };
default:
throw DxvkError(str::format(
"DxbcCompiler: Unhandled operand type: ",
@ -2561,6 +2685,11 @@ namespace dxvk {
scalarTypeId, bufferId, elementIndexAdjusted,
SpirvImageOperands());
case DxbcOperandType::ThreadGroupSharedMemory:
return m_module.opLoad(scalarTypeId,
m_module.opAccessChain(bufferInfo.typeId,
bufferInfo.varId, 1, &elementIndexAdjusted));
default:
throw DxvkError("DxbcCompiler: Invalid operand type for strucured/raw load");
}
@ -2626,6 +2755,13 @@ namespace dxvk {
SpirvImageOperands());
break;
case DxbcOperandType::ThreadGroupSharedMemory:
m_module.opStore(
m_module.opAccessChain(bufferInfo.typeId,
bufferInfo.varId, 1, &elementIndexAdjusted),
srcComponentId);
break;
default:
throw DxvkError("DxbcCompiler: Invalid operand type for strucured/raw store");
}
@ -3090,46 +3226,6 @@ namespace dxvk {
}
void DxbcCompiler::emitCsInitBuiltins() {
m_cs.builtinGlobalInvocationId = emitNewBuiltinVariable({
{ DxbcScalarType::Uint32, 3, 0 },
spv::StorageClassInput },
spv::BuiltInGlobalInvocationId,
"cs_global_invocation_id");
m_cs.builtinLocalInvocationId = emitNewBuiltinVariable({
{ DxbcScalarType::Uint32, 3, 0 },
spv::StorageClassInput },
spv::BuiltInLocalInvocationId,
"cs_local_invocation_id");
// FIXME Vulkan might not support this? not documented
m_cs.builtinLocalInvocationIndex = emitNewBuiltinVariable({
{ DxbcScalarType::Uint32, 1, 0 },
spv::StorageClassInput },
spv::BuiltInLocalInvocationIndex,
"cs_local_invocation_index");
m_cs.builtinWorkgroupId = emitNewBuiltinVariable({
{ DxbcScalarType::Uint32, 3, 0 },
spv::StorageClassInput },
spv::BuiltInWorkgroupId,
"cs_workgroup_id");
m_cs.builtinWorkgroupSize = emitNewBuiltinVariable({
{ DxbcScalarType::Uint32, 3, 0 },
spv::StorageClassInput },
spv::BuiltInWorkgroupSize,
"cs_workgroup_size");
m_cs.builtinWorkgroupCount = emitNewBuiltinVariable({
{ DxbcScalarType::Uint32, 3, 0 },
spv::StorageClassInput },
spv::BuiltInNumWorkgroups,
"cs_workgroup_count");
}
void DxbcCompiler::emitVsInit() {
m_module.enableCapability(spv::CapabilityClipDistance);
m_module.enableCapability(spv::CapabilityCullDistance);
@ -3247,10 +3343,6 @@ namespace dxvk {
void DxbcCompiler::emitCsInit() {
// There are no input or output
// variables for compute shaders
emitCsInitBuiltins();
// Main function of the compute shader
m_cs.functionId = m_module.allocateId();
m_module.setDebugName(m_cs.functionId, "cs_main");
@ -3403,9 +3495,16 @@ namespace dxvk {
return result;
} break;
// TODO implement
// case DxbcOperandType::ThreadGroupSharedMemory: {
// } break;
case DxbcOperandType::ThreadGroupSharedMemory: {
DxbcBufferInfo result;
result.type = m_gRegs.at(registerId).type;
result.typeId = m_module.defPointerType(
getScalarTypeId(DxbcScalarType::Uint32),
spv::StorageClassWorkgroup);
result.varId = m_gRegs.at(registerId).varId;
result.stride = m_gRegs.at(registerId).elementStride;
return result;
} break;
default:
throw DxvkError(str::format("DxbcCompiler: Invalid operand type for buffer: ", reg.type));

View File

@ -86,6 +86,14 @@ namespace dxvk {
};
struct DxbcGreg {
DxbcResourceType type = DxbcResourceType::Raw;
uint32_t elementStride = 0;
uint32_t elementCount = 0;
uint32_t varId = 0;
};
/**
* \brief Vertex shader-specific structure
*/
@ -133,8 +141,6 @@ namespace dxvk {
uint32_t builtinLocalInvocationId = 0;
uint32_t builtinLocalInvocationIndex = 0;
uint32_t builtinWorkgroupId = 0;
uint32_t builtinWorkgroupSize = 0;
uint32_t builtinWorkgroupCount = 0;
};
@ -226,6 +232,10 @@ namespace dxvk {
std::vector<uint32_t> m_rRegs;
std::vector<DxbcXreg> m_xRegs;
/////////////////////////////////////////////
// Thread group shared memory (g#) registers
std::vector<DxbcGreg> m_gRegs;
///////////////////////////////////////////////////////////
// v# registers as defined by the shader. The type of each
// of these inputs is either float4 or an array of float4.
@ -382,6 +392,9 @@ namespace dxvk {
void emitAtomic(
const DxbcShaderInstruction& ins);
void emitBarrier(
const DxbcShaderInstruction& ins);
void emitBufferLoad(
const DxbcShaderInstruction& ins);
@ -577,7 +590,6 @@ namespace dxvk {
void emitVsInitBuiltins();
void emitGsInitBuiltins(uint32_t vertexCount);
void emitPsInitBuiltins();
void emitCsInitBuiltins();
/////////////////////////////////
// Shader initialization methods

View File

@ -811,7 +811,7 @@ namespace dxvk {
{ DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 },
} },
/* Sync */
{ },
{ 0, DxbcInstClass::Barrier },
/* DAdd */
{ },
/* DMax */

View File

@ -33,6 +33,7 @@ namespace dxvk {
ControlFlow, ///< Control flow instructions
GeometryEmit, ///< Special geometry shader instructions
Atomic, ///< Atomic operations
Barrier, ///< Execution or memory barrier
BufferLoad, ///< Structured or raw buffer load
BufferStore, ///< Structured or raw buffer store
TextureQuery, ///< Texture query instruction

View File

@ -1869,6 +1869,26 @@ namespace dxvk {
}
void SpirvModule::opControlBarrier(
uint32_t execution,
uint32_t memory,
uint32_t semantics) {
m_code.putIns (spv::OpControlBarrier, 4);
m_code.putWord(execution);
m_code.putWord(memory);
m_code.putWord(semantics);
}
void SpirvModule::opMemoryBarrier(
uint32_t memory,
uint32_t semantics) {
m_code.putIns (spv::OpMemoryBarrier, 3);
m_code.putWord(memory);
m_code.putWord(semantics);
}
void SpirvModule::opLoopMerge(
uint32_t mergeBlock,
uint32_t continueTarget,

View File

@ -643,6 +643,15 @@ namespace dxvk {
uint32_t reference,
const SpirvImageOperands& operands);
void opControlBarrier(
uint32_t execution,
uint32_t memory,
uint32_t semantics);
void opMemoryBarrier(
uint32_t memory,
uint32_t semantics);
void opLoopMerge(
uint32_t mergeBlock,
uint32_t continueTarget,

View File

@ -13,9 +13,21 @@ using namespace dxvk;
const std::string g_computeShaderCode =
"StructuredBuffer<uint> buf_in : register(t0);\n"
"RWStructuredBuffer<uint> buf_out : register(u0);\n"
"[numthreads(1,1,1)]\n"
"void main() {\n"
" buf_out[0] = buf_in[0] * buf_in[1];\n"
"groupshared uint tmp[64];\n"
"[numthreads(64,1,1)]\n"
"void main(uint localId : SV_GroupIndex, uint3 globalId : SV_DispatchThreadID) {\n"
" tmp[localId] = buf_in[2 * globalId.x + 0]\n"
" + buf_in[2 * globalId.x + 1];\n"
" GroupMemoryBarrierWithGroupSync();\n"
" uint activeGroups = 32;\n"
" while (activeGroups != 0) {\n"
" if (localId < activeGroups)\n"
" tmp[localId] += tmp[localId + activeGroups];\n"
" GroupMemoryBarrierWithGroupSync();\n"
" activeGroups >>= 1;\n"
" }\n"
" if (localId == 0)\n"
" buf_out[0] = tmp[0];\n"
"}\n";
int WINAPI WinMain(HINSTANCE hInstance,
@ -63,7 +75,7 @@ int WINAPI WinMain(HINSTANCE hInstance,
return 1;
}
std::array<uint32_t, 64> srcData;
std::array<uint32_t, 128> srcData;
for (uint32_t i = 0; i < srcData.size(); i++)
srcData[i] = i + 1;
@ -93,7 +105,7 @@ int WINAPI WinMain(HINSTANCE hInstance,
dstBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
dstBufferDesc.StructureByteStride = sizeof(uint32_t);
if (FAILED(device->CreateBuffer(&dstBufferDesc, nullptr, &dstBuffer))) {
if (FAILED(device->CreateBuffer(&dstBufferDesc, &srcDataInfo, &dstBuffer))) {
std::cerr << "Failed to create destination buffer" << std::endl;
return 1;
}