swr/rast: SIMD16 shaders - widen fetch and vertex shaders
Work in progress, disabled by default. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
39ed8e297c
commit
a3f97ff28b
|
@ -1478,13 +1478,22 @@ void ProcessDraw(
|
|||
PA_STATE& pa = paFactory.GetPA();
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
#if USE_SIMD16_SHADERS
|
||||
simd16vertex vin;
|
||||
#else
|
||||
simdvertex vin_lo;
|
||||
simdvertex vin_hi;
|
||||
#endif
|
||||
SWR_VS_CONTEXT vsContext_lo;
|
||||
SWR_VS_CONTEXT vsContext_hi;
|
||||
|
||||
#if USE_SIMD16_SHADERS
|
||||
vsContext_lo.pVin = reinterpret_cast<simdvertex *>(&vin);
|
||||
vsContext_hi.pVin = reinterpret_cast<simdvertex *>(&vin);
|
||||
#else
|
||||
vsContext_lo.pVin = &vin_lo;
|
||||
vsContext_hi.pVin = &vin_hi;
|
||||
#endif
|
||||
vsContext_lo.AlternateOffset = 0;
|
||||
vsContext_hi.AlternateOffset = 1;
|
||||
|
||||
|
@ -1565,17 +1574,31 @@ void ProcessDraw(
|
|||
{
|
||||
// 1. Execute FS/VS for a single SIMD.
|
||||
AR_BEGIN(FEFetchShader, pDC->drawId);
|
||||
#if USE_SIMD16_SHADERS
|
||||
state.pfnFetchFunc(fetchInfo_lo, vin);
|
||||
#else
|
||||
state.pfnFetchFunc(fetchInfo_lo, vin_lo);
|
||||
|
||||
if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
|
||||
{
|
||||
state.pfnFetchFunc(fetchInfo_hi, vin_hi);
|
||||
}
|
||||
#endif
|
||||
AR_END(FEFetchShader, 0);
|
||||
|
||||
// forward fetch generated vertex IDs to the vertex shader
|
||||
#if USE_SIMD16_SHADERS
|
||||
#if 0
|
||||
vsContext_lo.VertexID = _simd16_extract(fetchInfo_lo.VertexID, 0);
|
||||
vsContext_hi.VertexID = _simd16_extract(fetchInfo_lo.VertexID, 1);
|
||||
#else
|
||||
vsContext_lo.VertexID = fetchInfo_lo.VertexID;
|
||||
vsContext_hi.VertexID = fetchInfo_lo.VertexID2;
|
||||
#endif
|
||||
#else
|
||||
vsContext_lo.VertexID = fetchInfo_lo.VertexID;
|
||||
vsContext_hi.VertexID = fetchInfo_hi.VertexID;
|
||||
#endif
|
||||
|
||||
// Setup active mask for vertex shader.
|
||||
vsContext_lo.mask = GenerateMask(endVertex - i);
|
||||
|
@ -1584,8 +1607,18 @@ void ProcessDraw(
|
|||
// forward cut mask to the PA
|
||||
if (IsIndexedT::value)
|
||||
{
|
||||
#if USE_SIMD16_SHADERS
|
||||
#if 0
|
||||
*pvCutIndices_lo = _simd_movemask_ps(_simd_castsi_ps(_simd16_extract(fetchInfo_lo.CutMask, 0)));
|
||||
*pvCutIndices_hi = _simd_movemask_ps(_simd_castsi_ps(_simd16_extract(fetchInfo_lo.CutMask, 1)));
|
||||
#else
|
||||
*pvCutIndices_lo = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask));
|
||||
*pvCutIndices_hi = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask2));
|
||||
#endif
|
||||
#else
|
||||
*pvCutIndices_lo = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask));
|
||||
*pvCutIndices_hi = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_hi.CutMask));
|
||||
#endif
|
||||
}
|
||||
|
||||
UPDATE_STAT_FE(IaVertices, GetNumInvocations(i, endVertex));
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#define ENABLE_AVX512_SIMD16 1
|
||||
#define USE_8x2_TILE_BACKEND 1
|
||||
#define USE_SIMD16_FRONTEND 1
|
||||
#define USE_SIMD16_SHADERS 0 // requires USE_SIMD16_FRONTEND
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Architecture validation
|
||||
|
|
|
@ -577,6 +577,12 @@ struct SWR_FETCH_CONTEXT
|
|||
uint32_t StartInstance; // IN: start instance
|
||||
simdscalari VertexID; // OUT: vector of vertex IDs
|
||||
simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
|
||||
#if USE_SIMD16_SHADERS
|
||||
// simd16scalari VertexID; // OUT: vector of vertex IDs
|
||||
// simd16scalari CutMask; // OUT: vector mask of indices which have the cut index value
|
||||
simdscalari VertexID2; // OUT: vector of vertex IDs
|
||||
simdscalari CutMask2; // OUT: vector mask of indices which have the cut index value
|
||||
#endif
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
@ -830,7 +836,11 @@ static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
/// FUNCTION POINTERS FOR SHADERS
|
||||
|
||||
#if USE_SIMD16_SHADERS
|
||||
typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
|
||||
#else
|
||||
typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
|
||||
#endif
|
||||
typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
|
||||
typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
|
||||
typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
|
||||
|
|
|
@ -152,10 +152,18 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
|
|||
mInt64Ty = Type::getInt64Ty(mContext); // int type
|
||||
|
||||
// fetch function signature
|
||||
#if USE_SIMD16_SHADERS
|
||||
// typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
|
||||
#else
|
||||
// typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
|
||||
#endif
|
||||
std::vector<Type*> fsArgs;
|
||||
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
|
||||
#if USE_SIMD16_SHADERS
|
||||
fsArgs.push_back(PointerType::get(Gen_simd16vertex(this), 0));
|
||||
#else
|
||||
fsArgs.push_back(PointerType::get(Gen_simdvertex(this), 0));
|
||||
#endif
|
||||
|
||||
mFetchShaderTy = FunctionType::get(Type::getVoidTy(mContext), fsArgs, false);
|
||||
|
||||
|
@ -165,6 +173,14 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
|
|||
mSimdVectorTy = ArrayType::get(mSimtFP32Ty, 4);
|
||||
mSimdVectorInt32Ty = ArrayType::get(mSimtInt32Ty, 4);
|
||||
|
||||
#if USE_SIMD16_SHADERS
|
||||
mSimd16FP32Ty = ArrayType::get(mSimtFP32Ty, 2);
|
||||
mSimd16Int32Ty = ArrayType::get(mSimtInt32Ty, 2);
|
||||
|
||||
mSimd16VectorFP32Ty = ArrayType::get(mSimd16FP32Ty, 4);
|
||||
mSimd16VectorInt32Ty = ArrayType::get(mSimd16Int32Ty, 4);
|
||||
|
||||
#endif
|
||||
#if defined(_WIN32)
|
||||
// explicitly instantiate used symbols from potentially staticly linked libs
|
||||
sys::DynamicLibrary::AddSymbol("exp2f", &exp2f);
|
||||
|
|
|
@ -194,6 +194,14 @@ struct JitManager
|
|||
llvm::Type* mSimdVectorInt32Ty;
|
||||
llvm::Type* mSimdVectorTy;
|
||||
|
||||
#if USE_SIMD16_SHADERS
|
||||
llvm::Type* mSimd16FP32Ty;
|
||||
llvm::Type* mSimd16Int32Ty;
|
||||
|
||||
llvm::Type* mSimd16VectorFP32Ty;
|
||||
llvm::Type* mSimd16VectorInt32Ty;
|
||||
|
||||
#endif
|
||||
// fetch shader types
|
||||
llvm::FunctionType* mFetchShaderTy;
|
||||
|
||||
|
|
|
@ -65,18 +65,34 @@ struct FetchJit : public Builder
|
|||
typedef std::tuple<Value*&, Value*, const Instruction::CastOps, const ConversionType,
|
||||
uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4],
|
||||
const uint32_t(&)[4]> Shuffle8bpcArgs;
|
||||
#if USE_SIMD16_SHADERS
|
||||
void Shuffle8bpcGatherd(Shuffle8bpcArgs &args, bool useVertexID2);
|
||||
#else
|
||||
void Shuffle8bpcGatherd(Shuffle8bpcArgs &args);
|
||||
#endif
|
||||
|
||||
typedef std::tuple<Value*(&)[2], Value*, const Instruction::CastOps, const ConversionType,
|
||||
uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4]> Shuffle16bpcArgs;
|
||||
#if USE_SIMD16_SHADERS
|
||||
void Shuffle16bpcGather(Shuffle16bpcArgs &args, bool useVertexID2);
|
||||
#else
|
||||
void Shuffle16bpcGather(Shuffle16bpcArgs &args);
|
||||
#endif
|
||||
|
||||
void StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, const uint32_t numEltsToStore, Value* (&vVertexElements)[4]);
|
||||
|
||||
#if USE_SIMD16_SHADERS
|
||||
Value* GenerateCompCtrlVector(const ComponentControl ctrl, bool useVertexID2);
|
||||
#else
|
||||
Value* GenerateCompCtrlVector(const ComponentControl ctrl);
|
||||
#endif
|
||||
|
||||
void JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut);
|
||||
#if USE_SIMD16_SHADERS
|
||||
void JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut, bool useVertexID2);
|
||||
#else
|
||||
void JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut);
|
||||
#endif
|
||||
|
||||
bool IsOddFormat(SWR_FORMAT format);
|
||||
bool IsUniformFormat(SWR_FORMAT format);
|
||||
|
@ -114,7 +130,15 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
|
|||
std::vector<Value*> vtxInputIndices(2, C(0));
|
||||
// GEP
|
||||
pVtxOut = GEP(pVtxOut, C(0));
|
||||
#if USE_SIMD16_SHADERS
|
||||
#if 0
|
||||
pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth * 2), 0));
|
||||
#else
|
||||
pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth), 0));
|
||||
#endif
|
||||
#else
|
||||
pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth), 0));
|
||||
#endif
|
||||
|
||||
// SWR_FETCH_CONTEXT::pStreams
|
||||
Value* streams = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pStreams});
|
||||
|
@ -130,38 +154,78 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
|
|||
|
||||
|
||||
Value* vIndices;
|
||||
#if USE_SIMD16_SHADERS
|
||||
Value* indices2;
|
||||
Value* vIndices2;
|
||||
#endif
|
||||
switch(fetchState.indexType)
|
||||
{
|
||||
case R8_UINT:
|
||||
indices = BITCAST(indices, Type::getInt8PtrTy(JM()->mContext, 0));
|
||||
if(fetchState.bDisableIndexOOBCheck){
|
||||
#if USE_SIMD16_SHADERS
|
||||
indices2 = GEP(indices, C(8));
|
||||
#endif
|
||||
if(fetchState.bDisableIndexOOBCheck)
|
||||
{
|
||||
vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt8Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0});
|
||||
vIndices = Z_EXT(vIndices, mSimdInt32Ty);
|
||||
#if USE_SIMD16_SHADERS
|
||||
vIndices2 = LOAD(BITCAST(indices2, PointerType::get(VectorType::get(mInt8Ty, mpJitMgr->mVWidth), 0)), { (uint32_t)0 });
|
||||
vIndices2 = Z_EXT(vIndices2, mSimdInt32Ty);
|
||||
#endif
|
||||
}
|
||||
else{
|
||||
else
|
||||
{
|
||||
pLastIndex = BITCAST(pLastIndex, Type::getInt8PtrTy(JM()->mContext, 0));
|
||||
vIndices = GetSimdValid8bitIndices(indices, pLastIndex);
|
||||
#if USE_SIMD16_SHADERS
|
||||
pLastIndex = BITCAST(pLastIndex, Type::getInt8PtrTy(JM()->mContext, 0));
|
||||
vIndices2 = GetSimdValid8bitIndices(indices2, pLastIndex);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case R16_UINT:
|
||||
indices = BITCAST(indices, Type::getInt16PtrTy(JM()->mContext, 0));
|
||||
if(fetchState.bDisableIndexOOBCheck){
|
||||
#if USE_SIMD16_SHADERS
|
||||
indices2 = GEP(indices, C(8));
|
||||
#endif
|
||||
if(fetchState.bDisableIndexOOBCheck)
|
||||
{
|
||||
vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0});
|
||||
vIndices = Z_EXT(vIndices, mSimdInt32Ty);
|
||||
#if USE_SIMD16_SHADERS
|
||||
vIndices2 = LOAD(BITCAST(indices2, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)), { (uint32_t)0 });
|
||||
vIndices2 = Z_EXT(vIndices2, mSimdInt32Ty);
|
||||
#endif
|
||||
}
|
||||
else{
|
||||
else
|
||||
{
|
||||
pLastIndex = BITCAST(pLastIndex, Type::getInt16PtrTy(JM()->mContext, 0));
|
||||
vIndices = GetSimdValid16bitIndices(indices, pLastIndex);
|
||||
#if USE_SIMD16_SHADERS
|
||||
pLastIndex = BITCAST(pLastIndex, Type::getInt16PtrTy(JM()->mContext, 0));
|
||||
vIndices2 = GetSimdValid16bitIndices(indices2, pLastIndex);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case R32_UINT:
|
||||
#if USE_SIMD16_SHADERS
|
||||
indices2 = GEP(indices, C(8));
|
||||
#endif
|
||||
(fetchState.bDisableIndexOOBCheck) ? vIndices = LOAD(BITCAST(indices, PointerType::get(mSimdInt32Ty,0)),{(uint32_t)0})
|
||||
: vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
|
||||
#if USE_SIMD16_SHADERS
|
||||
(fetchState.bDisableIndexOOBCheck) ? vIndices2 = LOAD(BITCAST(indices2, PointerType::get(mSimdInt32Ty, 0)), { (uint32_t)0 })
|
||||
: vIndices2 = GetSimdValid32bitIndices(indices2, pLastIndex);
|
||||
#endif
|
||||
break; // incoming type is already 32bit int
|
||||
default: SWR_INVALID("Unsupported index type"); vIndices = nullptr; break;
|
||||
}
|
||||
|
||||
Value* vVertexId = vIndices;
|
||||
#if USE_SIMD16_SHADERS
|
||||
Value* vVertexId2 = vIndices2;
|
||||
#endif
|
||||
if (fetchState.bVertexIDOffsetEnable)
|
||||
{
|
||||
// Assuming one of baseVertex or startVertex is 0, so adding both should be functionally correct
|
||||
|
@ -169,10 +233,17 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
|
|||
Value* vStartVertex = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_StartVertex }));
|
||||
vVertexId = ADD(vIndices, vBaseVertex);
|
||||
vVertexId = ADD(vVertexId, vStartVertex);
|
||||
#if USE_SIMD16_SHADERS
|
||||
vVertexId2 = ADD(vIndices2, vBaseVertex);
|
||||
vVertexId2 = ADD(vVertexId2, vStartVertex);
|
||||
#endif
|
||||
}
|
||||
|
||||
// store out vertex IDs
|
||||
STORE(vVertexId, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
|
||||
#if USE_SIMD16_SHADERS
|
||||
STORE(vVertexId2, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID2 }));
|
||||
#endif
|
||||
|
||||
// store out cut mask if enabled
|
||||
if (fetchState.bEnableCutIndex)
|
||||
|
@ -180,12 +251,29 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
|
|||
Value* vCutIndex = VIMMED1(fetchState.cutIndex);
|
||||
Value* cutMask = VMASK(ICMP_EQ(vIndices, vCutIndex));
|
||||
STORE(cutMask, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask }));
|
||||
#if USE_SIMD16_SHADERS
|
||||
Value* cutMask2 = VMASK(ICMP_EQ(vIndices2, vCutIndex));
|
||||
STORE(cutMask2, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask2 }));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Fetch attributes from memory and output to a simdvertex struct
|
||||
// since VGATHER has a perf penalty on HSW vs BDW, allow client to choose which fetch method to use
|
||||
#if USE_SIMD16_SHADERS
|
||||
if (fetchState.bDisableVGATHER)
|
||||
{
|
||||
JitLoadVertices(fetchState, streams, vIndices, pVtxOut);
|
||||
JitLoadVertices(fetchState, streams, vIndices2, GEP(pVtxOut, C(1)));
|
||||
}
|
||||
else
|
||||
{
|
||||
JitGatherVertices(fetchState, streams, vIndices, pVtxOut, false);
|
||||
JitGatherVertices(fetchState, streams, vIndices2, GEP(pVtxOut, C(1)), true);
|
||||
}
|
||||
#else
|
||||
(fetchState.bDisableVGATHER) ? JitLoadVertices(fetchState, streams, vIndices, pVtxOut)
|
||||
: JitGatherVertices(fetchState, streams, vIndices, pVtxOut);
|
||||
#endif
|
||||
|
||||
RET_VOID();
|
||||
|
||||
|
@ -531,7 +619,11 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str
|
|||
|
||||
for(uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
#if USE_SIMD16_SHADERS
|
||||
Value* dest = GEP(pVtxOut, C(nelt * 8 + c * 2), "destGEP");
|
||||
#else
|
||||
Value* dest = GEP(pVtxOut, C(nelt * 4 + c), "destGEP");
|
||||
#endif
|
||||
STORE(elements[c], dest);
|
||||
}
|
||||
}
|
||||
|
@ -678,8 +770,13 @@ void FetchJit::ConvertFormat(SWR_FORMAT format, Value *texels[4])
|
|||
/// @param streams - value pointer to the current vertex stream
|
||||
/// @param vIndices - vector value of indices to gather
|
||||
/// @param pVtxOut - value pointer to output simdvertex struct
|
||||
#if USE_SIMD16_SHADERS
|
||||
void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
||||
Value* streams, Value* vIndices, Value* pVtxOut)
|
||||
Value* streams, Value* vIndices, Value* pVtxOut, bool useVertexID2)
|
||||
#else
|
||||
void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
||||
Value* streams, Value* vIndices, Value* pVtxOut)
|
||||
#endif
|
||||
{
|
||||
uint32_t currentVertexElement = 0;
|
||||
uint32_t outputElt = 0;
|
||||
|
@ -887,7 +984,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
|
||||
|
||||
// Shuffle gathered components into place in simdvertex struct
|
||||
#if USE_SIMD16_SHADERS
|
||||
Shuffle16bpcGather(args, useVertexID2); // outputs to vVertexElements ref
|
||||
#else
|
||||
Shuffle16bpcGather(args); // outputs to vVertexElements ref
|
||||
#endif
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -908,7 +1009,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
}
|
||||
else
|
||||
{
|
||||
#if USE_SIMD16_SHADERS
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
|
||||
#else
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (currentVertexElement > 3)
|
||||
|
@ -960,7 +1065,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
}
|
||||
else
|
||||
{
|
||||
#if USE_SIMD16_SHADERS
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
|
||||
#else
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (currentVertexElement > 3)
|
||||
|
@ -1038,7 +1147,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements, info.swizzle);
|
||||
|
||||
// Shuffle gathered components into place in simdvertex struct
|
||||
#if USE_SIMD16_SHADERS
|
||||
Shuffle8bpcGatherd(args, useVertexID2); // outputs to vVertexElements ref
|
||||
#else
|
||||
Shuffle8bpcGatherd(args); // outputs to vVertexElements ref
|
||||
#endif
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1078,7 +1191,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
|
||||
|
||||
// Shuffle gathered components into place in simdvertex struct
|
||||
#if USE_SIMD16_SHADERS
|
||||
Shuffle16bpcGather(args, useVertexID2); // outputs to vVertexElements ref
|
||||
#else
|
||||
Shuffle16bpcGather(args); // outputs to vVertexElements ref
|
||||
#endif
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1117,7 +1234,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
}
|
||||
else
|
||||
{
|
||||
#if USE_SIMD16_SHADERS
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
|
||||
#else
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (currentVertexElement > 3)
|
||||
|
@ -1265,7 +1386,11 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
|
|||
/// @param compCtrl - component control val
|
||||
/// @param vVertexElements[4] - vertex components to output
|
||||
/// @param swizzle[4] - component swizzle location
|
||||
#if USE_SIMD16_SHADERS
|
||||
void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args, bool useVertexID2)
|
||||
#else
|
||||
void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
|
||||
#endif
|
||||
{
|
||||
// Unpack tuple args
|
||||
Value*& vGatherResult = std::get<0>(args);
|
||||
|
@ -1367,7 +1492,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
|
|||
}
|
||||
else
|
||||
{
|
||||
#if USE_SIMD16_SHADERS
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
|
||||
#else
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (currentVertexElement > 3)
|
||||
|
@ -1456,7 +1585,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
|
|||
}
|
||||
else
|
||||
{
|
||||
#if USE_SIMD16_SHADERS
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
|
||||
#else
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (currentVertexElement > 3)
|
||||
|
@ -1488,7 +1621,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
|
|||
/// @param compMask - component packing mask
|
||||
/// @param compCtrl - component control val
|
||||
/// @param vVertexElements[4] - vertex components to output
|
||||
#if USE_SIMD16_SHADERS
|
||||
void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args, bool useVertexID2)
|
||||
#else
|
||||
void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
|
||||
#endif
|
||||
{
|
||||
// Unpack tuple args
|
||||
Value* (&vGatherResult)[2] = std::get<0>(args);
|
||||
|
@ -1591,7 +1728,11 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
|
|||
}
|
||||
else
|
||||
{
|
||||
#if USE_SIMD16_SHADERS
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
|
||||
#else
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (currentVertexElement > 3)
|
||||
|
@ -1670,7 +1811,11 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
|
|||
}
|
||||
else
|
||||
{
|
||||
#if USE_SIMD16_SHADERS
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
|
||||
#else
|
||||
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (currentVertexElement > 3)
|
||||
|
@ -1715,7 +1860,11 @@ void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, con
|
|||
#endif
|
||||
// outputElt * 4 = offsetting by the size of a simdvertex
|
||||
// + c offsets to a 32bit x vWidth row within the current vertex
|
||||
#if USE_SIMD16_SHADERS
|
||||
Value* dest = GEP(pVtxOut, C(outputElt * 8 + c * 2), "destGEP");
|
||||
#else
|
||||
Value* dest = GEP(pVtxOut, C(outputElt * 4 + c), "destGEP");
|
||||
#endif
|
||||
STORE(vVertexElements[c], dest);
|
||||
}
|
||||
}
|
||||
|
@ -1724,7 +1873,11 @@ void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, con
|
|||
/// @brief Generates a constant vector of values based on the
|
||||
/// ComponentControl value
|
||||
/// @param ctrl - ComponentControl value
|
||||
#if USE_SIMD16_SHADERS
|
||||
Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl, bool useVertexID2)
|
||||
#else
|
||||
Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl)
|
||||
#endif
|
||||
{
|
||||
switch(ctrl)
|
||||
{
|
||||
|
@ -1734,7 +1887,19 @@ Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl)
|
|||
case Store1Int: return VIMMED1(1);
|
||||
case StoreVertexId:
|
||||
{
|
||||
#if USE_SIMD16_SHADERS
|
||||
Value* pId;
|
||||
if (useVertexID2)
|
||||
{
|
||||
pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID2 })), mSimdFP32Ty);
|
||||
}
|
||||
else
|
||||
{
|
||||
pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), mSimdFP32Ty);
|
||||
}
|
||||
#else
|
||||
Value* pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), mSimdFP32Ty);
|
||||
#endif
|
||||
return VBROADCAST(pId);
|
||||
}
|
||||
case StoreInstanceId:
|
||||
|
|
Loading…
Reference in New Issue