swr/rast: Implement Early Rasterization optimization

Early Rasterization is an optimization for small triangles.

Scientific workloads often contain very small triangles that has non-zero
area and cannot be trivially rejected as falling between pixel centers,
but does not cover any pixel center. Those triangles can be initially
rasterized as early as in binner and rejected if they cover no pixels The
optimization can be disabled in compilation using KNOB_ENABLE_EARLY_RAST
option in knobs.h

The Early Rast is disabled by default.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
George Kyriazis 2018-01-19 15:47:14 -06:00
parent be3cd7add1
commit bacfbe5a32
4 changed files with 388 additions and 0 deletions

View File

@ -292,6 +292,341 @@ void TransposeVertices(simd4scalar(&dst)[16], const simd16scalar &src0, const si
vTranspose4x16(reinterpret_cast<simd16scalar(&)[4]>(dst), src0, src1, src2, _simd16_setzero_ps());
}
#if KNOB_ENABLE_EARLY_RAST
#define ER_SIMD_TILE_X_DIM (1 << ER_SIMD_TILE_X_SHIFT)
#define ER_SIMD_TILE_Y_DIM (1 << ER_SIMD_TILE_Y_SHIFT)
template<typename SIMD_T>
struct EarlyRastHelper
{
};
template<>
struct EarlyRastHelper<SIMD256>
{
static SIMD256::Integer InitShiftCntrl()
{
return SIMD256::set_epi32(24, 25, 26, 27, 28, 29, 30, 31);
}
};
#if USE_SIMD16_FRONTEND
template<>
struct EarlyRastHelper<SIMD512>
{
static SIMD512::Integer InitShiftCntrl()
{
return SIMD512::set_epi32(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
}
};
#endif
//////////////////////////////////////////////////////////////////////////
/// @brief Early Rasterizer (ER); triangles that fit small (e.g. 4x4) tile
/// (ER tile) can be rasterized as early as in binner to check if
/// they cover any pixels. If not - the triangles can be
/// culled in binner.
///
/// @param er_bbox - coordinates of ER tile for each triangle
/// @param vAi - A coefficients of triangle edges
/// @param vBi - B coefficients of triangle edges
/// @param vXi - X coordinates of triangle vertices
/// @param vYi - Y coordinates of triangle vertices
/// @param frontWindingTris - mask indicating CCW/CW triangles
/// @param triMask - mask for valid SIMD lanes (triangles)
/// @param oneTileMask - defines triangles for ER to work on
/// (tris that fit into ER tile)
template <typename SIMD_T, uint32_t SIMD_WIDTH, typename CT>
uint32_t SIMDCALL EarlyRasterizer(
SIMDBBOX_T<SIMD_T> &er_bbox,
typename SIMD_T::Integer (&vAi)[3],
typename SIMD_T::Integer (&vBi)[3],
typename SIMD_T::Integer (&vXi)[3],
typename SIMD_T::Integer (&vYi)[3],
uint32_t cwTrisMask,
uint32_t triMask,
uint32_t oneTileMask)
{
// step to pixel center of top-left pixel of the triangle bbox
typename SIMD_T::Integer vTopLeftX = SIMD_T::template slli_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(er_bbox.xmin);
vTopLeftX = SIMD_T::add_epi32(vTopLeftX, SIMD_T::set1_epi32(FIXED_POINT_SCALE / 2));
typename SIMD_T::Integer vTopLeftY = SIMD_T::template slli_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(er_bbox.ymin);
vTopLeftY = SIMD_T::add_epi32(vTopLeftY, SIMD_T::set1_epi32(FIXED_POINT_SCALE / 2));
// negate A and B for CW tris
typename SIMD_T::Integer vNegA0 = SIMD_T::mullo_epi32(vAi[0], SIMD_T::set1_epi32(-1));
typename SIMD_T::Integer vNegA1 = SIMD_T::mullo_epi32(vAi[1], SIMD_T::set1_epi32(-1));
typename SIMD_T::Integer vNegA2 = SIMD_T::mullo_epi32(vAi[2], SIMD_T::set1_epi32(-1));
typename SIMD_T::Integer vNegB0 = SIMD_T::mullo_epi32(vBi[0], SIMD_T::set1_epi32(-1));
typename SIMD_T::Integer vNegB1 = SIMD_T::mullo_epi32(vBi[1], SIMD_T::set1_epi32(-1));
typename SIMD_T::Integer vNegB2 = SIMD_T::mullo_epi32(vBi[2], SIMD_T::set1_epi32(-1));
RDTSC_EVENT(FEEarlyRastEnter, _mm_popcnt_u32(oneTileMask & triMask), 0);
typename SIMD_T::Integer vShiftCntrl = EarlyRastHelper <SIMD_T>::InitShiftCntrl();
typename SIMD_T::Integer vCwTris = SIMD_T::set1_epi32(cwTrisMask);
typename SIMD_T::Integer vMask = SIMD_T::sllv_epi32(vCwTris, vShiftCntrl);
vAi[0] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vAi[0]), SIMD_T::castsi_ps(vNegA0), SIMD_T::castsi_ps(vMask)));
vAi[1] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vAi[1]), SIMD_T::castsi_ps(vNegA1), SIMD_T::castsi_ps(vMask)));
vAi[2] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vAi[2]), SIMD_T::castsi_ps(vNegA2), SIMD_T::castsi_ps(vMask)));
vBi[0] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vBi[0]), SIMD_T::castsi_ps(vNegB0), SIMD_T::castsi_ps(vMask)));
vBi[1] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vBi[1]), SIMD_T::castsi_ps(vNegB1), SIMD_T::castsi_ps(vMask)));
vBi[2] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vBi[2]), SIMD_T::castsi_ps(vNegB2), SIMD_T::castsi_ps(vMask)));
// evaluate edge equations at top-left pixel
typename SIMD_T::Integer vDeltaX0 = SIMD_T::sub_epi32(vTopLeftX, vXi[0]);
typename SIMD_T::Integer vDeltaX1 = SIMD_T::sub_epi32(vTopLeftX, vXi[1]);
typename SIMD_T::Integer vDeltaX2 = SIMD_T::sub_epi32(vTopLeftX, vXi[2]);
typename SIMD_T::Integer vDeltaY0 = SIMD_T::sub_epi32(vTopLeftY, vYi[0]);
typename SIMD_T::Integer vDeltaY1 = SIMD_T::sub_epi32(vTopLeftY, vYi[1]);
typename SIMD_T::Integer vDeltaY2 = SIMD_T::sub_epi32(vTopLeftY, vYi[2]);
typename SIMD_T::Integer vAX0 = SIMD_T::mullo_epi32(vAi[0], vDeltaX0);
typename SIMD_T::Integer vAX1 = SIMD_T::mullo_epi32(vAi[1], vDeltaX1);
typename SIMD_T::Integer vAX2 = SIMD_T::mullo_epi32(vAi[2], vDeltaX2);
typename SIMD_T::Integer vBY0 = SIMD_T::mullo_epi32(vBi[0], vDeltaY0);
typename SIMD_T::Integer vBY1 = SIMD_T::mullo_epi32(vBi[1], vDeltaY1);
typename SIMD_T::Integer vBY2 = SIMD_T::mullo_epi32(vBi[2], vDeltaY2);
typename SIMD_T::Integer vEdge0 = SIMD_T::add_epi32(vAX0, vBY0);
typename SIMD_T::Integer vEdge1 = SIMD_T::add_epi32(vAX1, vBY1);
typename SIMD_T::Integer vEdge2 = SIMD_T::add_epi32(vAX2, vBY2);
vEdge0 = SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vEdge0);
vEdge1 = SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vEdge1);
vEdge2 = SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vEdge2);
// top left rule
typename SIMD_T::Integer vEdgeAdjust0 = SIMD_T::sub_epi32(vEdge0, SIMD_T::set1_epi32(1));
typename SIMD_T::Integer vEdgeAdjust1 = SIMD_T::sub_epi32(vEdge1, SIMD_T::set1_epi32(1));
typename SIMD_T::Integer vEdgeAdjust2 = SIMD_T::sub_epi32(vEdge2, SIMD_T::set1_epi32(1));
// vA < 0
vEdge0 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge0), SIMD_T::castsi_ps(vEdgeAdjust0), SIMD_T::castsi_ps(vAi[0])));
vEdge1 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge1), SIMD_T::castsi_ps(vEdgeAdjust1), SIMD_T::castsi_ps(vAi[1])));
vEdge2 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge2), SIMD_T::castsi_ps(vEdgeAdjust2), SIMD_T::castsi_ps(vAi[2])));
// vA == 0 && vB < 0
typename SIMD_T::Integer vCmp0 = SIMD_T::cmpeq_epi32(vAi[0], SIMD_T::setzero_si());
typename SIMD_T::Integer vCmp1 = SIMD_T::cmpeq_epi32(vAi[1], SIMD_T::setzero_si());
typename SIMD_T::Integer vCmp2 = SIMD_T::cmpeq_epi32(vAi[2], SIMD_T::setzero_si());
vCmp0 = SIMD_T::and_si(vCmp0, vBi[0]);
vCmp1 = SIMD_T::and_si(vCmp1, vBi[1]);
vCmp2 = SIMD_T::and_si(vCmp2, vBi[2]);
vEdge0 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge0), SIMD_T::castsi_ps(vEdgeAdjust0), SIMD_T::castsi_ps(vCmp0)));
vEdge1 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge1), SIMD_T::castsi_ps(vEdgeAdjust1), SIMD_T::castsi_ps(vCmp1)));
vEdge2 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge2), SIMD_T::castsi_ps(vEdgeAdjust2), SIMD_T::castsi_ps(vCmp2)));
#if ER_SIMD_TILE_X_DIM == 4 && ER_SIMD_TILE_Y_DIM == 4
// Go down
// coverage pixel 0
typename SIMD_T::Integer vMask0 = SIMD_T::and_si(vEdge0, vEdge1);
vMask0 = SIMD_T::and_si(vMask0, vEdge2);
// coverage pixel 1
typename SIMD_T::Integer vEdge0N = SIMD_T::add_epi32(vEdge0, vBi[0]);
typename SIMD_T::Integer vEdge1N = SIMD_T::add_epi32(vEdge1, vBi[1]);
typename SIMD_T::Integer vEdge2N = SIMD_T::add_epi32(vEdge2, vBi[2]);
typename SIMD_T::Integer vMask1 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask1 = SIMD_T::and_si(vMask1, vEdge2N);
// coverage pixel 2
vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
typename SIMD_T::Integer vMask2 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask2 = SIMD_T::and_si(vMask2, vEdge2N);
// coverage pixel 3
vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
typename SIMD_T::Integer vMask3 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask3 = SIMD_T::and_si(vMask3, vEdge2N);
// One step to the right and then up
// coverage pixel 4
vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
typename SIMD_T::Integer vMask4 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask4 = SIMD_T::and_si(vMask4, vEdge2N);
// coverage pixel 5
vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
typename SIMD_T::Integer vMask5 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask5 = SIMD_T::and_si(vMask5, vEdge2N);
// coverage pixel 6
vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
typename SIMD_T::Integer vMask6 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask6 = SIMD_T::and_si(vMask6, vEdge2N);
// coverage pixel 7
vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
typename SIMD_T::Integer vMask7 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask7 = SIMD_T::and_si(vMask7, vEdge2N);
typename SIMD_T::Integer vLit1 = SIMD_T::or_si(vMask0, vMask1);
vLit1 = SIMD_T::or_si(vLit1, vMask2);
vLit1 = SIMD_T::or_si(vLit1, vMask3);
vLit1 = SIMD_T::or_si(vLit1, vMask4);
vLit1 = SIMD_T::or_si(vLit1, vMask5);
vLit1 = SIMD_T::or_si(vLit1, vMask6);
vLit1 = SIMD_T::or_si(vLit1, vMask7);
// Step to the right and go down again
// coverage pixel 0
vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
vMask0 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask0 = SIMD_T::and_si(vMask0, vEdge2N);
// coverage pixel 1
vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
vMask1 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask1 = SIMD_T::and_si(vMask1, vEdge2N);
// coverage pixel 2
vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
vMask2 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask2 = SIMD_T::and_si(vMask2, vEdge2N);
// coverage pixel 3
vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
vMask3 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask3 = SIMD_T::and_si(vMask3, vEdge2N);
// And for the last time - to the right and up
// coverage pixel 4
vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
vMask4 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask4 = SIMD_T::and_si(vMask4, vEdge2N);
// coverage pixel 5
vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
vMask5 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask5 = SIMD_T::and_si(vMask5, vEdge2N);
// coverage pixel 6
vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
vMask6 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask6 = SIMD_T::and_si(vMask6, vEdge2N);
// coverage pixel 7
vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
vMask7 = SIMD_T::and_si(vEdge0N, vEdge1N);
vMask7 = SIMD_T::and_si(vMask7, vEdge2N);
typename SIMD_T::Integer vLit2 = SIMD_T::or_si(vMask0, vMask1);
vLit2 = SIMD_T::or_si(vLit2, vMask2);
vLit2 = SIMD_T::or_si(vLit2, vMask3);
vLit2 = SIMD_T::or_si(vLit2, vMask4);
vLit2 = SIMD_T::or_si(vLit2, vMask5);
vLit2 = SIMD_T::or_si(vLit2, vMask6);
vLit2 = SIMD_T::or_si(vLit2, vMask7);
typename SIMD_T::Integer vLit = SIMD_T::or_si(vLit1, vLit2);
#else
// Generic algorithm sweeping in row by row order
typename SIMD_T::Integer vRowMask[ER_SIMD_TILE_Y_DIM];
typename SIMD_T::Integer vEdge0N = vEdge0;
typename SIMD_T::Integer vEdge1N = vEdge1;
typename SIMD_T::Integer vEdge2N = vEdge2;
for (uint32_t row = 0; row < ER_SIMD_TILE_Y_DIM; row++)
{
// Store edge values at the beginning of the row
typename SIMD_T::Integer vRowEdge0 = vEdge0N;
typename SIMD_T::Integer vRowEdge1 = vEdge1N;
typename SIMD_T::Integer vRowEdge2 = vEdge2N;
typename SIMD_T::Integer vColMask[ER_SIMD_TILE_X_DIM];
for (uint32_t col = 0; col < ER_SIMD_TILE_X_DIM; col++)
{
vColMask[col] = SIMD_T::and_si(vEdge0N, vEdge1N);
vColMask[col] = SIMD_T::and_si(vColMask[col], vEdge2N);
vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
}
vRowMask[row] = vColMask[0];
for (uint32_t col = 1; col < ER_SIMD_TILE_X_DIM; col++)
{
vRowMask[row] = SIMD_T::or_si(vRowMask[row], vColMask[col]);
}
// Restore values and go to the next row
vEdge0N = vRowEdge0;
vEdge1N = vRowEdge1;
vEdge2N = vRowEdge2;
vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
}
// compress all masks
typename SIMD_T::Integer vLit = vRowMask[0];
for (uint32_t row = 1; row < ER_SIMD_TILE_Y_DIM; row++)
{
vLit = SIMD_T::or_si(vLit, vRowMask[row]);
}
#endif
// Check which triangles has any pixel lit
uint32_t maskLit = SIMD_T::movemask_ps(SIMD_T::castsi_ps(vLit));
uint32_t maskUnlit = ~maskLit & oneTileMask;
uint32_t oldTriMask = triMask;
triMask &= ~maskUnlit;
if (triMask ^ oldTriMask)
{
RDTSC_EVENT(FEEarlyRastExit, _mm_popcnt_u32(triMask & oneTileMask), 0);
}
return triMask;
}
#endif // Early rasterizer
//////////////////////////////////////////////////////////////////////////
/// @brief Bin triangle primitives to macro tiles. Performs setup, clipping
/// culling, viewport transform, etc.
@ -592,6 +927,45 @@ void SIMDCALL BinTrianglesImpl(
triMask = triMask & ~maskOutsideScissor;
}
#if KNOB_ENABLE_EARLY_RAST
if (rastState.sampleCount == SWR_MULTISAMPLE_1X && !CT::IsConservativeT::value)
{
// Try early rasterization - culling small triangles which do not cover any pixels
// convert to ER tiles
SIMDBBOX_T<SIMD_T> er_bbox;
er_bbox.xmin = SIMD_T::template srai_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(bbox.xmin);
er_bbox.xmax = SIMD_T::template srai_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(bbox.xmax);
er_bbox.ymin = SIMD_T::template srai_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(bbox.ymin);
er_bbox.ymax = SIMD_T::template srai_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(bbox.ymax);
typename SIMD_T::Integer vTileX = SIMD_T::cmpeq_epi32(er_bbox.xmin, er_bbox.xmax);
typename SIMD_T::Integer vTileY = SIMD_T::cmpeq_epi32(er_bbox.ymin, er_bbox.ymax);
// Take only triangles that fit into ER tile
uint32_t oneTileMask = triMask & SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(vTileX, vTileY)));
if (oneTileMask)
{
// determine CW tris (det > 0)
uint32_t maskCwLo = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[0], SIMD_T::setzero_si())));
uint32_t maskCwHi = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[1], SIMD_T::setzero_si())));
uint32_t cwTrisMask = maskCwLo | (maskCwHi << (SIMD_WIDTH / 2));
// Try early rasterization
triMask = EarlyRasterizer<SIMD_T, SIMD_WIDTH, CT>(er_bbox, vAi, vBi, vXi, vYi, cwTrisMask, triMask, oneTileMask);
if (!triMask)
{
AR_END(FEBinTriangles, 1);
return;
}
}
}
#endif
endBinTriangles:

View File

@ -165,6 +165,16 @@
// enables cut-aware primitive assembler
#define KNOB_ENABLE_CUT_AWARE_PA TRUE
// enables early rasterization (useful for small triangles)
#if !defined(KNOB_ENABLE_EARLY_RAST)
#define KNOB_ENABLE_EARLY_RAST 0
#endif
#if KNOB_ENABLE_EARLY_RAST
#define ER_SIMD_TILE_X_SHIFT 2
#define ER_SIMD_TILE_Y_SHIFT 2
#endif
///////////////////////////////////////////////////////////////////////////////
// Debug knobs
///////////////////////////////////////////////////////////////////////////////

View File

@ -56,6 +56,8 @@ BUCKET_DESC gCoreBuckets[] = {
{ "FEClipTriangles", "", false, 0xffffffff },
{ "FECullZeroAreaAndBackface", "", false, 0xffffffff },
{ "FECullBetweenCenters", "", false, 0xffffffff },
{ "FEEarlyRastEnter", "", false, 0xffffffff },
{ "FEEarlyRastExit", "", false, 0xffffffff },
{ "FEProcessStoreTiles", "", true, 0xff39c864 },
{ "FEProcessInvalidateTiles", "", true, 0xffffffff },
{ "WorkerWorkOnFifoBE", "", false, 0xff40261c },

View File

@ -61,6 +61,8 @@ enum CORE_BUCKETS
FEClipTriangles,
FECullZeroAreaAndBackface,
FECullBetweenCenters,
FEEarlyRastEnter,
FEEarlyRastExit,
FEProcessStoreTiles,
FEProcessInvalidateTiles,
WorkerWorkOnFifoBE,