swr/rast: SIMD16 shaders - widen fetch and vertex shaders

Work in progress, disabled by default.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Tim Rowley 2017-07-20 18:27:51 -05:00
parent 39ed8e297c
commit a3f97ff28b
6 changed files with 238 additions and 5 deletions

View File

@ -1478,13 +1478,22 @@ void ProcessDraw(
PA_STATE& pa = paFactory.GetPA();
#if USE_SIMD16_FRONTEND
#if USE_SIMD16_SHADERS
simd16vertex vin;
#else
simdvertex vin_lo;
simdvertex vin_hi;
#endif
SWR_VS_CONTEXT vsContext_lo;
SWR_VS_CONTEXT vsContext_hi;
#if USE_SIMD16_SHADERS
vsContext_lo.pVin = reinterpret_cast<simdvertex *>(&vin);
vsContext_hi.pVin = reinterpret_cast<simdvertex *>(&vin);
#else
vsContext_lo.pVin = &vin_lo;
vsContext_hi.pVin = &vin_hi;
#endif
vsContext_lo.AlternateOffset = 0;
vsContext_hi.AlternateOffset = 1;
@ -1565,17 +1574,31 @@ void ProcessDraw(
{
// 1. Execute FS/VS for a single SIMD.
AR_BEGIN(FEFetchShader, pDC->drawId);
#if USE_SIMD16_SHADERS
state.pfnFetchFunc(fetchInfo_lo, vin);
#else
state.pfnFetchFunc(fetchInfo_lo, vin_lo);
if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
{
state.pfnFetchFunc(fetchInfo_hi, vin_hi);
}
#endif
AR_END(FEFetchShader, 0);
// forward fetch generated vertex IDs to the vertex shader
#if USE_SIMD16_SHADERS
#if 0
vsContext_lo.VertexID = _simd16_extract(fetchInfo_lo.VertexID, 0);
vsContext_hi.VertexID = _simd16_extract(fetchInfo_lo.VertexID, 1);
#else
vsContext_lo.VertexID = fetchInfo_lo.VertexID;
vsContext_hi.VertexID = fetchInfo_lo.VertexID2;
#endif
#else
vsContext_lo.VertexID = fetchInfo_lo.VertexID;
vsContext_hi.VertexID = fetchInfo_hi.VertexID;
#endif
// Setup active mask for vertex shader.
vsContext_lo.mask = GenerateMask(endVertex - i);
@ -1584,8 +1607,18 @@ void ProcessDraw(
// forward cut mask to the PA
if (IsIndexedT::value)
{
#if USE_SIMD16_SHADERS
#if 0
*pvCutIndices_lo = _simd_movemask_ps(_simd_castsi_ps(_simd16_extract(fetchInfo_lo.CutMask, 0)));
*pvCutIndices_hi = _simd_movemask_ps(_simd_castsi_ps(_simd16_extract(fetchInfo_lo.CutMask, 1)));
#else
*pvCutIndices_lo = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask));
*pvCutIndices_hi = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask2));
#endif
#else
*pvCutIndices_lo = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask));
*pvCutIndices_hi = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_hi.CutMask));
#endif
}
UPDATE_STAT_FE(IaVertices, GetNumInvocations(i, endVertex));

View File

@ -41,6 +41,7 @@
#define ENABLE_AVX512_SIMD16 1
#define USE_8x2_TILE_BACKEND 1
#define USE_SIMD16_FRONTEND 1
#define USE_SIMD16_SHADERS 0 // requires USE_SIMD16_FRONTEND
///////////////////////////////////////////////////////////////////////////////
// Architecture validation

View File

@ -577,6 +577,12 @@ struct SWR_FETCH_CONTEXT
uint32_t StartInstance; // IN: start instance
simdscalari VertexID; // OUT: vector of vertex IDs
simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
#if USE_SIMD16_SHADERS
// simd16scalari VertexID; // OUT: vector of vertex IDs
// simd16scalari CutMask; // OUT: vector mask of indices which have the cut index value
simdscalari VertexID2; // OUT: vector of vertex IDs
simdscalari CutMask2; // OUT: vector mask of indices which have the cut index value
#endif
};
//////////////////////////////////////////////////////////////////////////
@ -830,7 +836,11 @@ static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
//////////////////////////////////////////////////////////////////////////
/// FUNCTION POINTERS FOR SHADERS
#if USE_SIMD16_SHADERS
typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
#else
typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
#endif
typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);

View File

@ -152,10 +152,18 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
mInt64Ty = Type::getInt64Ty(mContext); // int type
// fetch function signature
#if USE_SIMD16_SHADERS
// typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
#else
// typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
#endif
std::vector<Type*> fsArgs;
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
#if USE_SIMD16_SHADERS
fsArgs.push_back(PointerType::get(Gen_simd16vertex(this), 0));
#else
fsArgs.push_back(PointerType::get(Gen_simdvertex(this), 0));
#endif
mFetchShaderTy = FunctionType::get(Type::getVoidTy(mContext), fsArgs, false);
@ -165,6 +173,14 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
mSimdVectorTy = ArrayType::get(mSimtFP32Ty, 4);
mSimdVectorInt32Ty = ArrayType::get(mSimtInt32Ty, 4);
#if USE_SIMD16_SHADERS
mSimd16FP32Ty = ArrayType::get(mSimtFP32Ty, 2);
mSimd16Int32Ty = ArrayType::get(mSimtInt32Ty, 2);
mSimd16VectorFP32Ty = ArrayType::get(mSimd16FP32Ty, 4);
mSimd16VectorInt32Ty = ArrayType::get(mSimd16Int32Ty, 4);
#endif
#if defined(_WIN32)
// explicitly instantiate used symbols from potentially staticly linked libs
sys::DynamicLibrary::AddSymbol("exp2f", &exp2f);

View File

@ -194,6 +194,14 @@ struct JitManager
llvm::Type* mSimdVectorInt32Ty;
llvm::Type* mSimdVectorTy;
#if USE_SIMD16_SHADERS
llvm::Type* mSimd16FP32Ty;
llvm::Type* mSimd16Int32Ty;
llvm::Type* mSimd16VectorFP32Ty;
llvm::Type* mSimd16VectorInt32Ty;
#endif
// fetch shader types
llvm::FunctionType* mFetchShaderTy;

View File

@ -65,18 +65,34 @@ struct FetchJit : public Builder
typedef std::tuple<Value*&, Value*, const Instruction::CastOps, const ConversionType,
uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4],
const uint32_t(&)[4]> Shuffle8bpcArgs;
#if USE_SIMD16_SHADERS
void Shuffle8bpcGatherd(Shuffle8bpcArgs &args, bool useVertexID2);
#else
void Shuffle8bpcGatherd(Shuffle8bpcArgs &args);
#endif
typedef std::tuple<Value*(&)[2], Value*, const Instruction::CastOps, const ConversionType,
uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4]> Shuffle16bpcArgs;
#if USE_SIMD16_SHADERS
void Shuffle16bpcGather(Shuffle16bpcArgs &args, bool useVertexID2);
#else
void Shuffle16bpcGather(Shuffle16bpcArgs &args);
#endif
void StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, const uint32_t numEltsToStore, Value* (&vVertexElements)[4]);
#if USE_SIMD16_SHADERS
Value* GenerateCompCtrlVector(const ComponentControl ctrl, bool useVertexID2);
#else
Value* GenerateCompCtrlVector(const ComponentControl ctrl);
#endif
void JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut);
#if USE_SIMD16_SHADERS
void JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut, bool useVertexID2);
#else
void JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut);
#endif
bool IsOddFormat(SWR_FORMAT format);
bool IsUniformFormat(SWR_FORMAT format);
@ -114,7 +130,15 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
std::vector<Value*> vtxInputIndices(2, C(0));
// GEP
pVtxOut = GEP(pVtxOut, C(0));
#if USE_SIMD16_SHADERS
#if 0
pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth * 2), 0));
#else
pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth), 0));
#endif
#else
pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth), 0));
#endif
// SWR_FETCH_CONTEXT::pStreams
Value* streams = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pStreams});
@ -130,38 +154,78 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
Value* vIndices;
#if USE_SIMD16_SHADERS
Value* indices2;
Value* vIndices2;
#endif
switch(fetchState.indexType)
{
case R8_UINT:
indices = BITCAST(indices, Type::getInt8PtrTy(JM()->mContext, 0));
if(fetchState.bDisableIndexOOBCheck){
#if USE_SIMD16_SHADERS
indices2 = GEP(indices, C(8));
#endif
if(fetchState.bDisableIndexOOBCheck)
{
vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt8Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0});
vIndices = Z_EXT(vIndices, mSimdInt32Ty);
#if USE_SIMD16_SHADERS
vIndices2 = LOAD(BITCAST(indices2, PointerType::get(VectorType::get(mInt8Ty, mpJitMgr->mVWidth), 0)), { (uint32_t)0 });
vIndices2 = Z_EXT(vIndices2, mSimdInt32Ty);
#endif
}
else{
else
{
pLastIndex = BITCAST(pLastIndex, Type::getInt8PtrTy(JM()->mContext, 0));
vIndices = GetSimdValid8bitIndices(indices, pLastIndex);
#if USE_SIMD16_SHADERS
pLastIndex = BITCAST(pLastIndex, Type::getInt8PtrTy(JM()->mContext, 0));
vIndices2 = GetSimdValid8bitIndices(indices2, pLastIndex);
#endif
}
break;
case R16_UINT:
indices = BITCAST(indices, Type::getInt16PtrTy(JM()->mContext, 0));
if(fetchState.bDisableIndexOOBCheck){
#if USE_SIMD16_SHADERS
indices2 = GEP(indices, C(8));
#endif
if(fetchState.bDisableIndexOOBCheck)
{
vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0});
vIndices = Z_EXT(vIndices, mSimdInt32Ty);
#if USE_SIMD16_SHADERS
vIndices2 = LOAD(BITCAST(indices2, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)), { (uint32_t)0 });
vIndices2 = Z_EXT(vIndices2, mSimdInt32Ty);
#endif
}
else{
else
{
pLastIndex = BITCAST(pLastIndex, Type::getInt16PtrTy(JM()->mContext, 0));
vIndices = GetSimdValid16bitIndices(indices, pLastIndex);
#if USE_SIMD16_SHADERS
pLastIndex = BITCAST(pLastIndex, Type::getInt16PtrTy(JM()->mContext, 0));
vIndices2 = GetSimdValid16bitIndices(indices2, pLastIndex);
#endif
}
break;
case R32_UINT:
#if USE_SIMD16_SHADERS
indices2 = GEP(indices, C(8));
#endif
(fetchState.bDisableIndexOOBCheck) ? vIndices = LOAD(BITCAST(indices, PointerType::get(mSimdInt32Ty,0)),{(uint32_t)0})
: vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
#if USE_SIMD16_SHADERS
(fetchState.bDisableIndexOOBCheck) ? vIndices2 = LOAD(BITCAST(indices2, PointerType::get(mSimdInt32Ty, 0)), { (uint32_t)0 })
: vIndices2 = GetSimdValid32bitIndices(indices2, pLastIndex);
#endif
break; // incoming type is already 32bit int
default: SWR_INVALID("Unsupported index type"); vIndices = nullptr; break;
}
Value* vVertexId = vIndices;
#if USE_SIMD16_SHADERS
Value* vVertexId2 = vIndices2;
#endif
if (fetchState.bVertexIDOffsetEnable)
{
// Assuming one of baseVertex or startVertex is 0, so adding both should be functionally correct
@ -169,10 +233,17 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
Value* vStartVertex = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_StartVertex }));
vVertexId = ADD(vIndices, vBaseVertex);
vVertexId = ADD(vVertexId, vStartVertex);
#if USE_SIMD16_SHADERS
vVertexId2 = ADD(vIndices2, vBaseVertex);
vVertexId2 = ADD(vVertexId2, vStartVertex);
#endif
}
// store out vertex IDs
STORE(vVertexId, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
#if USE_SIMD16_SHADERS
STORE(vVertexId2, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID2 }));
#endif
// store out cut mask if enabled
if (fetchState.bEnableCutIndex)
@ -180,12 +251,29 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
Value* vCutIndex = VIMMED1(fetchState.cutIndex);
Value* cutMask = VMASK(ICMP_EQ(vIndices, vCutIndex));
STORE(cutMask, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask }));
#if USE_SIMD16_SHADERS
Value* cutMask2 = VMASK(ICMP_EQ(vIndices2, vCutIndex));
STORE(cutMask2, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask2 }));
#endif
}
// Fetch attributes from memory and output to a simdvertex struct
// since VGATHER has a perf penalty on HSW vs BDW, allow client to choose which fetch method to use
#if USE_SIMD16_SHADERS
if (fetchState.bDisableVGATHER)
{
JitLoadVertices(fetchState, streams, vIndices, pVtxOut);
JitLoadVertices(fetchState, streams, vIndices2, GEP(pVtxOut, C(1)));
}
else
{
JitGatherVertices(fetchState, streams, vIndices, pVtxOut, false);
JitGatherVertices(fetchState, streams, vIndices2, GEP(pVtxOut, C(1)), true);
}
#else
(fetchState.bDisableVGATHER) ? JitLoadVertices(fetchState, streams, vIndices, pVtxOut)
: JitGatherVertices(fetchState, streams, vIndices, pVtxOut);
#endif
RET_VOID();
@ -531,7 +619,11 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* str
for(uint32_t c = 0; c < 4; ++c)
{
#if USE_SIMD16_SHADERS
Value* dest = GEP(pVtxOut, C(nelt * 8 + c * 2), "destGEP");
#else
Value* dest = GEP(pVtxOut, C(nelt * 4 + c), "destGEP");
#endif
STORE(elements[c], dest);
}
}
@ -678,8 +770,13 @@ void FetchJit::ConvertFormat(SWR_FORMAT format, Value *texels[4])
/// @param streams - value pointer to the current vertex stream
/// @param vIndices - vector value of indices to gather
/// @param pVtxOut - value pointer to output simdvertex struct
#if USE_SIMD16_SHADERS
void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
Value* streams, Value* vIndices, Value* pVtxOut)
Value* streams, Value* vIndices, Value* pVtxOut, bool useVertexID2)
#else
void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
Value* streams, Value* vIndices, Value* pVtxOut)
#endif
{
uint32_t currentVertexElement = 0;
uint32_t outputElt = 0;
@ -887,7 +984,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
// Shuffle gathered components into place in simdvertex struct
#if USE_SIMD16_SHADERS
Shuffle16bpcGather(args, useVertexID2); // outputs to vVertexElements ref
#else
Shuffle16bpcGather(args); // outputs to vVertexElements ref
#endif
}
}
break;
@ -908,7 +1009,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
}
else
{
#if USE_SIMD16_SHADERS
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
#else
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
#endif
}
if (currentVertexElement > 3)
@ -960,7 +1065,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
}
else
{
#if USE_SIMD16_SHADERS
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
#else
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
#endif
}
if (currentVertexElement > 3)
@ -1038,7 +1147,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements, info.swizzle);
// Shuffle gathered components into place in simdvertex struct
#if USE_SIMD16_SHADERS
Shuffle8bpcGatherd(args, useVertexID2); // outputs to vVertexElements ref
#else
Shuffle8bpcGatherd(args); // outputs to vVertexElements ref
#endif
}
}
break;
@ -1078,7 +1191,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
// Shuffle gathered components into place in simdvertex struct
#if USE_SIMD16_SHADERS
Shuffle16bpcGather(args, useVertexID2); // outputs to vVertexElements ref
#else
Shuffle16bpcGather(args); // outputs to vVertexElements ref
#endif
}
}
break;
@ -1117,7 +1234,11 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
}
else
{
#if USE_SIMD16_SHADERS
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
#else
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
#endif
}
if (currentVertexElement > 3)
@ -1265,7 +1386,11 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
/// @param compCtrl - component control val
/// @param vVertexElements[4] - vertex components to output
/// @param swizzle[4] - component swizzle location
#if USE_SIMD16_SHADERS
void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args, bool useVertexID2)
#else
void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
#endif
{
// Unpack tuple args
Value*& vGatherResult = std::get<0>(args);
@ -1367,7 +1492,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
}
else
{
#if USE_SIMD16_SHADERS
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
#else
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
#endif
}
if (currentVertexElement > 3)
@ -1456,7 +1585,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
}
else
{
#if USE_SIMD16_SHADERS
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
#else
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
#endif
}
if (currentVertexElement > 3)
@ -1488,7 +1621,11 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
/// @param compMask - component packing mask
/// @param compCtrl - component control val
/// @param vVertexElements[4] - vertex components to output
#if USE_SIMD16_SHADERS
void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args, bool useVertexID2)
#else
void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
#endif
{
// Unpack tuple args
Value* (&vGatherResult)[2] = std::get<0>(args);
@ -1591,7 +1728,11 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
}
else
{
#if USE_SIMD16_SHADERS
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
#else
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
#endif
}
if (currentVertexElement > 3)
@ -1670,7 +1811,11 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
}
else
{
#if USE_SIMD16_SHADERS
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
#else
vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
#endif
}
if (currentVertexElement > 3)
@ -1715,7 +1860,11 @@ void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, con
#endif
// outputElt * 4 = offsetting by the size of a simdvertex
// + c offsets to a 32bit x vWidth row within the current vertex
#if USE_SIMD16_SHADERS
Value* dest = GEP(pVtxOut, C(outputElt * 8 + c * 2), "destGEP");
#else
Value* dest = GEP(pVtxOut, C(outputElt * 4 + c), "destGEP");
#endif
STORE(vVertexElements[c], dest);
}
}
@ -1724,7 +1873,11 @@ void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, con
/// @brief Generates a constant vector of values based on the
/// ComponentControl value
/// @param ctrl - ComponentControl value
#if USE_SIMD16_SHADERS
Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl, bool useVertexID2)
#else
Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl)
#endif
{
switch(ctrl)
{
@ -1734,7 +1887,19 @@ Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl)
case Store1Int: return VIMMED1(1);
case StoreVertexId:
{
#if USE_SIMD16_SHADERS
Value* pId;
if (useVertexID2)
{
pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID2 })), mSimdFP32Ty);
}
else
{
pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), mSimdFP32Ty);
}
#else
Value* pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), mSimdFP32Ty);
#endif
return VBROADCAST(pId);
}
case StoreInstanceId: