swr/rast: Convert gather masks to Nx1bit
Simplifies calling code, gets gather function interface closer to llvm's masked_gather. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
36e276b6b0
commit
3ec98ab5d4
|
@ -602,7 +602,7 @@ namespace SwrJit
|
|||
if(JM()->mArch.AVX2())
|
||||
{
|
||||
// force mask to <N x float>, required by vgather
|
||||
Value *mask = BITCAST(vMask, mSimdFP32Ty);
|
||||
Value *mask = BITCAST(VMASK(vMask), mSimdFP32Ty);
|
||||
|
||||
vGather = VGATHERPS(vSrc, pBase, vIndices, mask, C(scale));
|
||||
}
|
||||
|
@ -617,7 +617,6 @@ namespace SwrJit
|
|||
vGather = VUNDEF_F();
|
||||
Value *vScaleVec = VIMMED1((uint32_t)scale);
|
||||
Value *vOffsets = MUL(vIndices,vScaleVec);
|
||||
Value *mask = MASK(vMask);
|
||||
for(uint32_t i = 0; i < mVWidth; ++i)
|
||||
{
|
||||
// single component byte index
|
||||
|
@ -627,7 +626,7 @@ namespace SwrJit
|
|||
loadAddress = BITCAST(loadAddress,PointerType::get(mFP32Ty,0));
|
||||
// pointer to the value to load if we're masking off a component
|
||||
Value *maskLoadAddress = GEP(vSrcPtr,{C(0), C(i)});
|
||||
Value *selMask = VEXTRACT(mask,C(i));
|
||||
Value *selMask = VEXTRACT(vMask,C(i));
|
||||
// switch in a safe address to load if we're trying to access a vertex
|
||||
Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
|
||||
Value *val = LOAD(validAddress);
|
||||
|
@ -648,7 +647,7 @@ namespace SwrJit
|
|||
if (JM()->mArch.AVX512F())
|
||||
{
|
||||
// force mask to <N-bit Integer>, required by vgather2
|
||||
Value *mask = BITCAST(MASK2(vMask), mInt16Ty);
|
||||
Value *mask = BITCAST(vMask, mInt16Ty);
|
||||
|
||||
vGather = VGATHERPS2(vSrc, pBase, vIndices, mask, C((uint32_t)scale));
|
||||
}
|
||||
|
@ -689,7 +688,7 @@ namespace SwrJit
|
|||
// use avx2 gather instruction if available
|
||||
if(JM()->mArch.AVX2())
|
||||
{
|
||||
vGather = VGATHERDD(vSrc, pBase, vIndices, vMask, C(scale));
|
||||
vGather = VGATHERDD(vSrc, pBase, vIndices, VMASK(vMask), C(scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -702,7 +701,6 @@ namespace SwrJit
|
|||
vGather = VUNDEF_I();
|
||||
Value *vScaleVec = VIMMED1((uint32_t)scale);
|
||||
Value *vOffsets = MUL(vIndices, vScaleVec);
|
||||
Value *mask = MASK(vMask);
|
||||
for(uint32_t i = 0; i < mVWidth; ++i)
|
||||
{
|
||||
// single component byte index
|
||||
|
@ -712,7 +710,7 @@ namespace SwrJit
|
|||
loadAddress = BITCAST(loadAddress, PointerType::get(mInt32Ty, 0));
|
||||
// pointer to the value to load if we're masking off a component
|
||||
Value *maskLoadAddress = GEP(vSrcPtr, {C(0), C(i)});
|
||||
Value *selMask = VEXTRACT(mask, C(i));
|
||||
Value *selMask = VEXTRACT(vMask, C(i));
|
||||
// switch in a safe address to load if we're trying to access a vertex
|
||||
Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
|
||||
Value *val = LOAD(validAddress, C(0));
|
||||
|
@ -739,6 +737,7 @@ namespace SwrJit
|
|||
// use avx2 gather instruction if available
|
||||
if(JM()->mArch.AVX2())
|
||||
{
|
||||
vMask = BITCAST(S_EXT(vMask, VectorType::get(mInt64Ty, mVWidth/2)), VectorType::get(mDoubleTy, mVWidth/2));
|
||||
vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale));
|
||||
}
|
||||
else
|
||||
|
@ -752,7 +751,6 @@ namespace SwrJit
|
|||
vGather = UndefValue::get(VectorType::get(mDoubleTy, 4));
|
||||
Value *vScaleVec = VECTOR_SPLAT(4, C((uint32_t)scale));
|
||||
Value *vOffsets = MUL(vIndices,vScaleVec);
|
||||
Value *mask = MASK(vMask);
|
||||
for(uint32_t i = 0; i < mVWidth/2; ++i)
|
||||
{
|
||||
// single component byte index
|
||||
|
@ -762,7 +760,7 @@ namespace SwrJit
|
|||
loadAddress = BITCAST(loadAddress,PointerType::get(mDoubleTy,0));
|
||||
// pointer to the value to load if we're masking off a component
|
||||
Value *maskLoadAddress = GEP(vSrcPtr,{C(0), C(i)});
|
||||
Value *selMask = VEXTRACT(mask,C(i));
|
||||
Value *selMask = VEXTRACT(vMask,C(i));
|
||||
// switch in a safe address to load if we're trying to access a vertex
|
||||
Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
|
||||
Value *val = LOAD(validAddress);
|
||||
|
@ -1094,14 +1092,10 @@ namespace SwrJit
|
|||
const SWR_FORMAT_INFO &info = GetFormatInfo(format);
|
||||
if(info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
|
||||
{
|
||||
// ensure our mask is the correct type
|
||||
mask = BITCAST(mask, mSimdFP32Ty);
|
||||
GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput);
|
||||
}
|
||||
else
|
||||
{
|
||||
// ensure our mask is the correct type
|
||||
mask = BITCAST(mask, mSimdInt32Ty);
|
||||
GATHER4DD(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1004,10 +1004,6 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
// blend in any partially OOB indices that have valid elements
|
||||
vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask);
|
||||
vGatherMask2 = SELECT(vPartialOOBMask2, vElementInBoundsMask, vGatherMask2);
|
||||
Value *pMask = vGatherMask;
|
||||
Value *pMask2 = vGatherMask2;
|
||||
vGatherMask = VMASK(vGatherMask);
|
||||
vGatherMask2 = VMASK(vGatherMask2);
|
||||
|
||||
// calculate the actual offsets into the VB
|
||||
Value* vOffsets = MUL(vCurIndices, vStride);
|
||||
|
@ -1051,8 +1047,6 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
|
||||
// blend in any partially OOB indices that have valid elements
|
||||
vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask);
|
||||
Value* pMask = vGatherMask;
|
||||
vGatherMask = VMASK(vGatherMask);
|
||||
|
||||
// calculate the actual offsets into the VB
|
||||
Value* vOffsets = MUL(vCurIndices, vStride);
|
||||
|
@ -1289,9 +1283,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
indices = INSERT2_I(indices, vShiftedOffsets, 0);
|
||||
indices = INSERT2_I(indices, vShiftedOffsets2, 1);
|
||||
|
||||
Value *mask = VUNDEF2_I();
|
||||
mask = INSERT2_I(mask, vGatherMask, 0);
|
||||
mask = INSERT2_I(mask, vGatherMask2, 1);
|
||||
Value *mask = VSHUFFLE(vGatherMask, vGatherMask2, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
|
||||
|
||||
pVtxSrc2[currentVertexElement] = GATHERPS2(gatherSrc16, pStreamBase, indices, mask, 2);
|
||||
#else
|
||||
|
@ -1396,18 +1388,10 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
// if we need to gather the component
|
||||
if (compCtrl[i] == StoreSrc)
|
||||
{
|
||||
Value *vMaskLo = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({ 0, 1, 2, 3 }));
|
||||
Value *vMaskLo2 = VSHUFFLE(pMask2, VUNDEF(mInt1Ty, 8), C({ 0, 1, 2, 3 }));
|
||||
Value *vMaskHi = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({ 4, 5, 6, 7 }));
|
||||
Value *vMaskHi2 = VSHUFFLE(pMask2, VUNDEF(mInt1Ty, 8), C({ 4, 5, 6, 7 }));
|
||||
vMaskLo = S_EXT(vMaskLo, VectorType::get(mInt64Ty, 4));
|
||||
vMaskLo2 = S_EXT(vMaskLo2, VectorType::get(mInt64Ty, 4));
|
||||
vMaskHi = S_EXT(vMaskHi, VectorType::get(mInt64Ty, 4));
|
||||
vMaskHi2 = S_EXT(vMaskHi2, VectorType::get(mInt64Ty, 4));
|
||||
vMaskLo = BITCAST(vMaskLo, VectorType::get(mDoubleTy, 4));
|
||||
vMaskLo2 = BITCAST(vMaskLo2, VectorType::get(mDoubleTy, 4));
|
||||
vMaskHi = BITCAST(vMaskHi, VectorType::get(mDoubleTy, 4));
|
||||
vMaskHi2 = BITCAST(vMaskHi2, VectorType::get(mDoubleTy, 4));
|
||||
Value *vMaskLo = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({ 0, 1, 2, 3 }));
|
||||
Value *vMaskLo2 = VSHUFFLE(vGatherMask2, VUNDEF(mInt1Ty, 8), C({ 0, 1, 2, 3 }));
|
||||
Value *vMaskHi = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({ 4, 5, 6, 7 }));
|
||||
Value *vMaskHi2 = VSHUFFLE(vGatherMask2, VUNDEF(mInt1Ty, 8), C({ 4, 5, 6, 7 }));
|
||||
|
||||
Value *vOffsetsLo = VEXTRACTI128(vOffsets, C(0));
|
||||
Value *vOffsetsLo2 = VEXTRACTI128(vOffsets2, C(0));
|
||||
|
@ -1483,12 +1467,8 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
|||
// if we need to gather the component
|
||||
if (compCtrl[i] == StoreSrc)
|
||||
{
|
||||
Value *vMaskLo = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({0, 1, 2, 3}));
|
||||
Value *vMaskHi = VSHUFFLE(pMask, VUNDEF(mInt1Ty, 8), C({4, 5, 6, 7}));
|
||||
vMaskLo = S_EXT(vMaskLo, VectorType::get(mInt64Ty, 4));
|
||||
vMaskHi = S_EXT(vMaskHi, VectorType::get(mInt64Ty, 4));
|
||||
vMaskLo = BITCAST(vMaskLo, VectorType::get(mDoubleTy, 4));
|
||||
vMaskHi = BITCAST(vMaskHi, VectorType::get(mDoubleTy, 4));
|
||||
Value *vMaskLo = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({0, 1, 2, 3}));
|
||||
Value *vMaskHi = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({4, 5, 6, 7}));
|
||||
|
||||
Value *vOffsetsLo = VEXTRACTI128(vOffsets, C(0));
|
||||
Value *vOffsetsHi = VEXTRACTI128(vOffsets, C(1));
|
||||
|
|
Loading…
Reference in New Issue