swr: [rasterizer core] only use Viewport/Scissors during SwrDraw* operations

Add explicit rects for:

- SwrClearRenderTarget
- SwrDiscardRect
- SwrInvalidateTiles
- SwrStoreTiles

Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
This commit is contained in:
Tim Rowley 2016-08-17 14:30:32 -05:00
parent 6209dbf5a4
commit 0ff57446e3
12 changed files with 398 additions and 413 deletions

View File

@ -46,6 +46,8 @@
#include "common/simdintrin.h"
#include "common/os.h"
static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y };
void SetupDefaultState(SWR_CONTEXT *pContext);
static INLINE SWR_CONTEXT* GetContext(HANDLE hContext)
@ -713,56 +715,46 @@ void SwrSetViewports(
void SwrSetScissorRects(
HANDLE hContext,
uint32_t numScissors,
const BBOX* pScissors)
const SWR_RECT* pScissors)
{
SWR_ASSERT(numScissors <= KNOB_NUM_VIEWPORTS_SCISSORS,
"Invalid number of scissor rects.");
API_STATE* pState = GetDrawState(GetContext(hContext));
memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(BBOX));
memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(pScissors[0]));
};
void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
{
API_STATE *pState = &pDC->pState->state;
uint32_t left, right, top, bottom;
// Set up scissor dimensions based on scissor or viewport
if (pState->rastState.scissorEnable)
{
// scissor rect right/bottom edge are exclusive, core expects scissor dimensions to be inclusive, so subtract one pixel from right/bottom edges
left = pState->scissorRects[0].left;
right = pState->scissorRects[0].right;
top = pState->scissorRects[0].top;
bottom = pState->scissorRects[0].bottom;
pState->scissorInFixedPoint = pState->scissorRects[0];
}
else
{
// the vp width and height must be added to origin un-rounded then the result round to -inf.
// The cast to int works for rounding assuming all [left, right, top, bottom] are positive.
left = (int32_t)pState->vp[0].x;
right = (int32_t)(pState->vp[0].x + pState->vp[0].width);
top = (int32_t)pState->vp[0].y;
bottom = (int32_t)(pState->vp[0].y + pState->vp[0].height);
pState->scissorInFixedPoint.xmin = (int32_t)pState->vp[0].x;
pState->scissorInFixedPoint.xmax = (int32_t)(pState->vp[0].x + pState->vp[0].width);
pState->scissorInFixedPoint.ymin = (int32_t)pState->vp[0].y;
pState->scissorInFixedPoint.ymax = (int32_t)(pState->vp[0].y + pState->vp[0].height);
}
right = std::min<uint32_t>(right, KNOB_MAX_SCISSOR_X);
bottom = std::min<uint32_t>(bottom, KNOB_MAX_SCISSOR_Y);
// Clamp to max rect
pState->scissorInFixedPoint &= g_MaxScissorRect;
if (left > KNOB_MAX_SCISSOR_X || top > KNOB_MAX_SCISSOR_Y)
{
pState->scissorInFixedPoint.left = 0;
pState->scissorInFixedPoint.right = 0;
pState->scissorInFixedPoint.top = 0;
pState->scissorInFixedPoint.bottom = 0;
}
else
{
pState->scissorInFixedPoint.left = left * FIXED_POINT_SCALE;
pState->scissorInFixedPoint.right = right * FIXED_POINT_SCALE - 1;
pState->scissorInFixedPoint.top = top * FIXED_POINT_SCALE;
pState->scissorInFixedPoint.bottom = bottom * FIXED_POINT_SCALE - 1;
}
// Scale to fixed point
pState->scissorInFixedPoint.xmin *= FIXED_POINT_SCALE;
pState->scissorInFixedPoint.xmax *= FIXED_POINT_SCALE;
pState->scissorInFixedPoint.ymin *= FIXED_POINT_SCALE;
pState->scissorInFixedPoint.ymax *= FIXED_POINT_SCALE;
// Make scissor inclusive
pState->scissorInFixedPoint.xmax -= 1;
pState->scissorInFixedPoint.ymax -= 1;
}
// templated backend function tables
@ -1303,9 +1295,12 @@ void SwrDrawIndexedInstanced(
/// @brief SwrInvalidateTiles
/// @param hContext - Handle passed back from SwrCreateContext
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
void SwrInvalidateTiles(
/// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to
/// be hottile size-aligned.
void SWR_API SwrInvalidateTiles(
HANDLE hContext,
uint32_t attachmentMask)
uint32_t attachmentMask,
const SWR_RECT& invalidateRect)
{
if (KNOB_TOSS_DRAW)
{
@ -1318,7 +1313,8 @@ void SwrInvalidateTiles(
pDC->FeWork.type = DISCARDINVALIDATETILES;
pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
memset(&pDC->FeWork.desc.discardInvalidateTiles.rect, 0, sizeof(SWR_RECT));
pDC->FeWork.desc.discardInvalidateTiles.rect = invalidateRect;
pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID;
pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = false;
pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false;
@ -1331,11 +1327,12 @@ void SwrInvalidateTiles(
/// @brief SwrDiscardRect
/// @param hContext - Handle passed back from SwrCreateContext
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
/// @param rect - if rect is all zeros, the entire attachment surface will be discarded
void SwrDiscardRect(
/// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be
/// discarded.
void SWR_API SwrDiscardRect(
HANDLE hContext,
uint32_t attachmentMask,
SWR_RECT rect)
const SWR_RECT& rect)
{
if (KNOB_TOSS_DRAW)
{
@ -1350,6 +1347,7 @@ void SwrDiscardRect(
pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
pDC->FeWork.desc.discardInvalidateTiles.rect = rect;
pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED;
pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = true;
pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true;
@ -1398,10 +1396,11 @@ void SwrDispatch(
// Deswizzles, converts and stores current contents of the hot tiles to surface
// described by pState
void SwrStoreTiles(
void SWR_API SwrStoreTiles(
HANDLE hContext,
SWR_RENDERTARGET_ATTACHMENT attachment,
SWR_TILE_STATE postStoreTileState)
SWR_TILE_STATE postStoreTileState,
const SWR_RECT& storeRect)
{
if (KNOB_TOSS_DRAW)
{
@ -1413,12 +1412,12 @@ void SwrStoreTiles(
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
SetupMacroTileScissors(pDC);
pDC->FeWork.type = STORETILES;
pDC->FeWork.pfnWork = ProcessStoreTiles;
pDC->FeWork.desc.storeTiles.attachment = attachment;
pDC->FeWork.desc.storeTiles.postStoreTileState = postStoreTileState;
pDC->FeWork.desc.storeTiles.rect = storeRect;
pDC->FeWork.desc.storeTiles.rect &= g_MaxScissorRect;
//enqueue
QueueDraw(pContext);
@ -1426,12 +1425,21 @@ void SwrStoreTiles(
RDTSC_STOP(APIStoreTiles, 0, 0);
}
void SwrClearRenderTarget(
//////////////////////////////////////////////////////////////////////////
/// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
/// @param hContext - Handle passed back from SwrCreateContext
/// @param clearMask - combination of SWR_CLEAR_COLOR / SWR_CLEAR_DEPTH / SWR_CLEAR_STENCIL flags (or SWR_CLEAR_NONE)
/// @param clearColor - color use for clearing render targets
/// @param z - depth value use for clearing depth buffer
/// @param stencil - stencil value used for clearing stencil buffer
/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
void SWR_API SwrClearRenderTarget(
HANDLE hContext,
uint32_t clearMask,
const float clearColor[4],
float z,
uint8_t stencil)
uint8_t stencil,
const SWR_RECT& clearRect)
{
if (KNOB_TOSS_DRAW)
{
@ -1441,16 +1449,16 @@ void SwrClearRenderTarget(
RDTSC_START(APIClearRenderTarget);
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
SetupMacroTileScissors(pDC);
CLEAR_FLAGS flags;
flags.bits = 0;
flags.mask = clearMask;
pDC->FeWork.type = CLEAR;
pDC->FeWork.pfnWork = ProcessClear;
pDC->FeWork.desc.clear.rect = clearRect;
pDC->FeWork.desc.clear.rect &= g_MaxScissorRect;
pDC->FeWork.desc.clear.flags = flags;
pDC->FeWork.desc.clear.clearDepth = z;
pDC->FeWork.desc.clear.clearRTColor[0] = clearColor[0];

View File

@ -32,17 +32,81 @@
#include "common/os.h"
#include <assert.h>
#include <vector>
#include <algorithm>
#include "common/simdintrin.h"
#include "common/formats.h"
#include "core/utils.h"
#include "core/state.h"
///@todo place all the API functions into the 'swr' namespace.
typedef void(SWR_API *PFN_CALLBACK_FUNC)(uint64_t data, uint64_t data2, uint64_t data3);
//////////////////////////////////////////////////////////////////////////
/// @brief Rectangle structure
struct SWR_RECT
{
int32_t xmin; ///< inclusive
int32_t ymin; ///< inclusive
int32_t xmax; ///< exclusive
int32_t ymax; ///< exclusive
bool operator == (const SWR_RECT& rhs)
{
return (this->ymin == rhs.ymin &&
this->ymax == rhs.ymax &&
this->xmin == rhs.xmin &&
this->xmax == rhs.xmax);
}
bool operator != (const SWR_RECT& rhs)
{
return !(*this == rhs);
}
SWR_RECT& Intersect(const SWR_RECT& other)
{
this->xmin = std::max(this->xmin, other.xmin);
this->ymin = std::max(this->ymin, other.ymin);
this->xmax = std::min(this->xmax, other.xmax);
this->ymax = std::min(this->ymax, other.ymax);
if (xmax - xmin < 0 ||
ymax - ymin < 0)
{
// Zero area
ymin = ymax = xmin = xmax = 0;
}
return *this;
}
SWR_RECT& operator &= (const SWR_RECT& other)
{
return Intersect(other);
}
SWR_RECT& Union(const SWR_RECT& other)
{
this->xmin = std::min(this->xmin, other.xmin);
this->ymin = std::min(this->ymin, other.ymin);
this->xmax = std::max(this->xmax, other.xmax);
this->ymax = std::max(this->ymax, other.ymax);
return *this;
}
SWR_RECT& operator |= (const SWR_RECT& other)
{
return Union(other);
}
void Translate(int32_t x, int32_t y)
{
xmin += x;
ymin += y;
xmax += x;
ymax += y;
}
};
//////////////////////////////////////////////////////////////////////////
/// @brief Function signature for load hot tiles
/// @param hPrivateContext - handle to private data
@ -105,6 +169,10 @@ typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext,
typedef void(SWR_API *PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext,
const SWR_STATS_FE* pStats);
//////////////////////////////////////////////////////////////////////////
/// BucketManager
/// Forward Declaration (see rdtsc_buckets.h for full definition)
/////////////////////////////////////////////////////////////////////////
class BucketManager;
//////////////////////////////////////////////////////////////////////////
@ -149,17 +217,6 @@ struct SWR_CREATECONTEXT_INFO
SWR_THREADING_INFO* pThreadInfo;
};
//////////////////////////////////////////////////////////////////////////
/// SWR_RECT
/////////////////////////////////////////////////////////////////////////
struct SWR_RECT
{
uint32_t left;
uint32_t right;
uint32_t top;
uint32_t bottom;
};
//////////////////////////////////////////////////////////////////////////
/// @brief Create SWR Context.
/// @param pCreateInfo - pointer to creation info.
@ -445,19 +502,23 @@ void SWR_API SwrDrawIndexedInstanced(
/// @brief SwrInvalidateTiles
/// @param hContext - Handle passed back from SwrCreateContext
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
/// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to
/// be hottile size-aligned.
void SWR_API SwrInvalidateTiles(
HANDLE hContext,
uint32_t attachmentMask);
uint32_t attachmentMask,
const SWR_RECT& invalidateRect);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrDiscardRect
/// @param hContext - Handle passed back from SwrCreateContext
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
/// @param rect - if rect is all zeros, the entire attachment surface will be discarded
/// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be
/// discarded.
void SWR_API SwrDiscardRect(
HANDLE hContext,
uint32_t attachmentMask,
SWR_RECT rect);
const SWR_RECT& rect);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrDispatch
@ -483,15 +544,30 @@ enum SWR_TILE_STATE
void SWR_API SwrStoreTiles(
HANDLE hContext,
SWR_RENDERTARGET_ATTACHMENT attachment,
SWR_TILE_STATE postStoreTileState);
SWR_TILE_STATE postStoreTileState,
const SWR_RECT& storeRect);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
/// @param hContext - Handle passed back from SwrCreateContext
/// @param clearMask - combination of SWR_CLEAR_COLOR / SWR_CLEAR_DEPTH / SWR_CLEAR_STENCIL flags (or SWR_CLEAR_NONE)
/// @param clearColor - color use for clearing render targets
/// @param z - depth value use for clearing depth buffer
/// @param stencil - stencil value used for clearing stencil buffer
/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
void SWR_API SwrClearRenderTarget(
HANDLE hContext,
uint32_t clearMask,
const float clearColor[4],
float z,
uint8_t stencil);
uint8_t stencil,
const SWR_RECT& clearRect);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrSetRastyState
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pRastState - New SWR_RASTSTATE used for SwrDraw* commands
void SWR_API SwrSetRastState(
HANDLE hContext,
const SWR_RASTSTATE *pRastState);
@ -516,7 +592,7 @@ void SWR_API SwrSetViewports(
void SWR_API SwrSetScissorRects(
HANDLE hContext,
uint32_t numScissors,
const BBOX* pScissors);
const SWR_RECT* pScissors);
//////////////////////////////////////////////////////////////////////////
/// @brief Returns a pointer to the private context state for the current
@ -555,4 +631,5 @@ void SWR_API SwrEnableStats(
/// @param hContext - Handle passed back from SwrCreateContext
void SWR_API SwrEndFrame(
HANDLE hContext);
#endif//__SWR_API_H__

View File

@ -37,7 +37,7 @@
#include <algorithm>
typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, DWORD[4]);
typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, DWORD[4], const SWR_RECT& rect);
static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
//////////////////////////////////////////////////////////////////////////
@ -88,7 +88,7 @@ void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
template<SWR_FORMAT format>
void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
{
auto lambda = [&](int comp)
auto lambda = [&](int32_t comp)
{
FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
pTileBuffer += (KNOB_SIMD_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
@ -102,7 +102,7 @@ void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
}
template<SWR_FORMAT format>
INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, DWORD clear[4])
INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, DWORD clear[4], const SWR_RECT& rect)
{
// convert clear color to hottile format
// clear color is in RGBA float/uint32
@ -122,32 +122,33 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
uint32_t tileX, tileY;
MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
const API_STATE& state = GetApiState(pDC);
int top = KNOB_MACROTILE_Y_DIM_FIXED * tileY;
int bottom = top + KNOB_MACROTILE_Y_DIM_FIXED - 1;
int left = KNOB_MACROTILE_X_DIM_FIXED * tileX;
int right = left + KNOB_MACROTILE_X_DIM_FIXED - 1;
// intersect with scissor
top = std::max(top, state.scissorInFixedPoint.top);
left = std::max(left, state.scissorInFixedPoint.left);
bottom = std::min(bottom, state.scissorInFixedPoint.bottom);
right = std::min(right, state.scissorInFixedPoint.right);
// Init to full macrotile
SWR_RECT clearTile =
{
KNOB_MACROTILE_X_DIM * int32_t(tileX),
KNOB_MACROTILE_Y_DIM * int32_t(tileY),
KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1),
};
// intersect with clear rect
clearTile &= rect;
// translate to local hottile origin
top -= KNOB_MACROTILE_Y_DIM_FIXED * tileY;
bottom -= KNOB_MACROTILE_Y_DIM_FIXED * tileY;
left -= KNOB_MACROTILE_X_DIM_FIXED * tileX;
right -= KNOB_MACROTILE_X_DIM_FIXED * tileX;
clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM, -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
// Make maximums inclusive (needed for convert to raster tiles)
clearTile.xmax -= 1;
clearTile.ymax -= 1;
// convert to raster tiles
top >>= (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
bottom >>= (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
left >>= (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
right >>= (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT);
clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT);
clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT);
clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT);
const int numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
// compute steps between raster tile samples / raster tiles / macro tile rows
const uint32_t rasterTileSampleStep = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
const uint32_t rasterTileStep = (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
@ -155,16 +156,16 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples);
uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, left, top)) * numSamples;
uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
// loop over all raster tiles in the current hot tile
for (int y = top; y <= bottom; ++y)
for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
{
uint8_t* pRasterTile = pRasterTileRow;
for (int x = left; x <= right; ++x)
for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
{
for( int sampleNum = 0; sampleNum < numSamples; sampleNum++)
for( int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
{
ClearRasterTile<format>(pRasterTile, vClear);
pRasterTile += rasterTileSampleStep;
@ -241,7 +242,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
SWR_ASSERT(pfnClearTiles != nullptr);
pfnClearTiles(pDC, SWR_ATTACHMENT_COLOR0, macroTile, clearData);
pfnClearTiles(pDC, SWR_ATTACHMENT_COLOR0, macroTile, clearData, pClear->rect);
}
if (pClear->flags.mask & SWR_CLEAR_DEPTH)
@ -251,7 +252,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
SWR_ASSERT(pfnClearTiles != nullptr);
pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, clearData);
pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, clearData, pClear->rect);
}
if (pClear->flags.mask & SWR_CLEAR_STENCIL)
@ -261,7 +262,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
clearData[0] = *(DWORD*)&value;
PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, clearData);
pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, clearData, pClear->rect);
}
RDTSC_STOP(BEClear, 0, 0);
@ -307,13 +308,13 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[srcFormat];
SWR_ASSERT(pfnClearTiles != nullptr);
pfnClearTiles(pDC, pDesc->attachment, macroTile, pHotTile->clearData);
pfnClearTiles(pDC, pDesc->attachment, macroTile, pHotTile->clearData, pDesc->rect);
}
if (pHotTile->state == HOTTILE_DIRTY || pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
{
int destX = KNOB_MACROTILE_X_DIM * x;
int destY = KNOB_MACROTILE_Y_DIM * y;
int32_t destX = KNOB_MACROTILE_X_DIM * x;
int32_t destY = KNOB_MACROTILE_Y_DIM * y;
pContext->pfnStoreTile(GetPrivateState(pDC), srcFormat,
pDesc->attachment, destX, destY, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
@ -334,7 +335,7 @@ void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint3
DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC *)pData;
SWR_CONTEXT *pContext = pDC->pContext;
const int numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; ++i)
{

View File

@ -109,6 +109,7 @@ union CLEAR_FLAGS
struct CLEAR_DESC
{
SWR_RECT rect;
CLEAR_FLAGS flags;
float clearRTColor[4]; // RGBA_32F
float clearDepth; // [0..1]
@ -136,6 +137,7 @@ struct STORE_TILES_DESC
{
SWR_RENDERTARGET_ATTACHMENT attachment;
SWR_TILE_STATE postStoreTileState;
SWR_RECT rect;
};
struct COMPUTE_DESC
@ -271,8 +273,8 @@ OSALIGNLINE(struct) API_STATE
SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
SWR_VIEWPORT_MATRICES vpMatrices;
BBOX scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
BBOX scissorInFixedPoint;
SWR_RECT scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
SWR_RECT scissorInFixedPoint;
// Backend state
SWR_BACKEND_STATE backendState;
@ -494,8 +496,5 @@ struct SWR_CONTEXT
TileSet singleThreadLockedTiles;
};
void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
void WakeAllThreads(SWR_CONTEXT *pContext);
#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; }
#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }

View File

@ -93,26 +93,24 @@ void ProcessClear(
uint32_t workerId,
void *pUserData)
{
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
CLEAR_DESC *pDesc = (CLEAR_DESC*)pUserData;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
const API_STATE& state = GetApiState(pDC);
// queue a clear to each macro tile
// compute macro tile bounds for the current scissor/viewport
uint32_t macroTileLeft = state.scissorInFixedPoint.left / KNOB_MACROTILE_X_DIM_FIXED;
uint32_t macroTileRight = state.scissorInFixedPoint.right / KNOB_MACROTILE_X_DIM_FIXED;
uint32_t macroTileTop = state.scissorInFixedPoint.top / KNOB_MACROTILE_Y_DIM_FIXED;
uint32_t macroTileBottom = state.scissorInFixedPoint.bottom / KNOB_MACROTILE_Y_DIM_FIXED;
// compute macro tile bounds for the specified rect
uint32_t macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM;
uint32_t macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM;
uint32_t macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM;
uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
BE_WORK work;
work.type = CLEAR;
work.pfnWork = ProcessClearBE;
work.desc.clear = *pClear;
work.desc.clear = *pDesc;
for (uint32_t y = macroTileTop; y <= macroTileBottom; ++y)
for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
{
for (uint32_t x = macroTileLeft; x <= macroTileRight; ++x)
for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
{
pTileMgr->enqueue(x, y, &work);
}
@ -133,28 +131,25 @@ void ProcessStoreTiles(
void *pUserData)
{
RDTSC_START(FEProcessStoreTiles);
STORE_TILES_DESC *pStore = (STORE_TILES_DESC*)pUserData;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
const API_STATE& state = GetApiState(pDC);
STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData;
// queue a store to each macro tile
// compute macro tile bounds for the current render target
const uint32_t macroWidth = KNOB_MACROTILE_X_DIM;
const uint32_t macroHeight = KNOB_MACROTILE_Y_DIM;
uint32_t numMacroTilesX = ((uint32_t)state.vp[0].width + (uint32_t)state.vp[0].x + (macroWidth - 1)) / macroWidth;
uint32_t numMacroTilesY = ((uint32_t)state.vp[0].height + (uint32_t)state.vp[0].y + (macroHeight - 1)) / macroHeight;
// compute macro tile bounds for the specified rect
uint32_t macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM;
uint32_t macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM;
uint32_t macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM;
uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
// store tiles
BE_WORK work;
work.type = STORETILES;
work.pfnWork = ProcessStoreTileBE;
work.desc.storeTiles = *pStore;
work.desc.storeTiles = *pDesc;
for (uint32_t x = 0; x < numMacroTilesX; ++x)
for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
{
for (uint32_t y = 0; y < numMacroTilesY; ++y)
for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
{
pTileMgr->enqueue(x, y, &work);
}
@ -177,64 +172,39 @@ void ProcessDiscardInvalidateTiles(
void *pUserData)
{
RDTSC_START(FEProcessInvalidateTiles);
DISCARD_INVALIDATE_TILES_DESC *pInv = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
SWR_RECT rect;
// compute macro tile bounds for the specified rect
uint32_t macroTileXMin = (pDesc->rect.xmin + KNOB_MACROTILE_X_DIM - 1) / KNOB_MACROTILE_X_DIM;
uint32_t macroTileXMax = (pDesc->rect.xmax / KNOB_MACROTILE_X_DIM) - 1;
uint32_t macroTileYMin = (pDesc->rect.ymin + KNOB_MACROTILE_Y_DIM - 1) / KNOB_MACROTILE_Y_DIM;
uint32_t macroTileYMax = (pDesc->rect.ymax / KNOB_MACROTILE_Y_DIM) - 1;
if (pInv->rect.top | pInv->rect.bottom | pInv->rect.right | pInv->rect.left)
{
// Valid rect
rect = pInv->rect;
}
else
{
// Use viewport dimensions
const API_STATE& state = GetApiState(pDC);
rect.left = (uint32_t)state.vp[0].x;
rect.right = (uint32_t)(state.vp[0].x + state.vp[0].width);
rect.top = (uint32_t)state.vp[0].y;
rect.bottom = (uint32_t)(state.vp[0].y + state.vp[0].height);
}
// queue a store to each macro tile
// compute macro tile bounds for the current render target
uint32_t macroWidth = KNOB_MACROTILE_X_DIM;
uint32_t macroHeight = KNOB_MACROTILE_Y_DIM;
// Setup region assuming full tiles
uint32_t macroTileStartX = (rect.left + (macroWidth - 1)) / macroWidth;
uint32_t macroTileStartY = (rect.top + (macroHeight - 1)) / macroHeight;
uint32_t macroTileEndX = rect.right / macroWidth;
uint32_t macroTileEndY = rect.bottom / macroHeight;
if (pInv->fullTilesOnly == false)
if (pDesc->fullTilesOnly == false)
{
// include partial tiles
macroTileStartX = rect.left / macroWidth;
macroTileStartY = rect.top / macroHeight;
macroTileEndX = (rect.right + macroWidth - 1) / macroWidth;
macroTileEndY = (rect.bottom + macroHeight - 1) / macroHeight;
macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM;
macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM;
macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM;
macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
}
SWR_ASSERT(macroTileEndX <= KNOB_NUM_HOT_TILES_X);
SWR_ASSERT(macroTileEndY <= KNOB_NUM_HOT_TILES_Y);
SWR_ASSERT(macroTileXMax <= KNOB_NUM_HOT_TILES_X);
SWR_ASSERT(macroTileYMax <= KNOB_NUM_HOT_TILES_Y);
macroTileEndX = std::min<uint32_t>(macroTileEndX, KNOB_NUM_HOT_TILES_X);
macroTileEndY = std::min<uint32_t>(macroTileEndY, KNOB_NUM_HOT_TILES_Y);
macroTileXMax = std::min<int32_t>(macroTileXMax, KNOB_NUM_HOT_TILES_X);
macroTileYMax = std::min<int32_t>(macroTileYMax, KNOB_NUM_HOT_TILES_Y);
// load tiles
BE_WORK work;
work.type = DISCARDINVALIDATETILES;
work.pfnWork = ProcessDiscardInvalidateTilesBE;
work.desc.discardInvalidateTiles = *pInv;
work.desc.discardInvalidateTiles = *pDesc;
for (uint32_t x = macroTileStartX; x < macroTileEndX; ++x)
for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
{
for (uint32_t y = macroTileStartY; y < macroTileEndY; ++y)
for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
{
pTileMgr->enqueue(x, y, &work);
}
@ -587,7 +557,7 @@ static void StreamOut(
//////////////////////////////////////////////////////////////////////////
/// @brief Computes number of invocations. The current index represents
/// the start of the SIMD. The max index represents how much work
/// items are remaining. If there is less then a SIMD's left of work
/// items are remaining. If there is less then a SIMD's xmin of work
/// then return the remaining amount of work.
/// @param curIndex - The start index for the SIMD.
/// @param maxIndex - The last index for all work items.
@ -1694,10 +1664,10 @@ INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari
vMaxY = _simd_max_epi32(vMaxY, vY[1]);
vMaxY = _simd_max_epi32(vMaxY, vY[2]);
bbox.left = vMinX;
bbox.right = vMaxX;
bbox.top = vMinY;
bbox.bottom = vMaxY;
bbox.xmin = vMinX;
bbox.xmax = vMaxX;
bbox.ymin = vMinY;
bbox.ymax = vMaxY;
}
//////////////////////////////////////////////////////////////////////////
@ -1727,10 +1697,10 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c
/// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
/// expand bbox by 1/256; coverage will be correctly handled in the rasterizer.
bbox.left = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
bbox.right = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
bbox.top = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
bbox.bottom = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
bbox.xmin = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
bbox.xmax = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
bbox.ymin = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
bbox.ymax = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
}
//////////////////////////////////////////////////////////////////////////
@ -1940,27 +1910,27 @@ void BinTriangles(
// determine if triangle falls between pixel centers and discard
// only discard for non-MSAA case and when conservative rast is disabled
// (left + 127) & ~255
// (right + 128) & ~255
// (xmin + 127) & ~255
// (xmax + 128) & ~255
if(rastState.sampleCount == SWR_MULTISAMPLE_1X && (!CT::IsConservativeT::value))
{
origTriMask = triMask;
int cullCenterMask;
{
simdscalari left = _simd_add_epi32(bbox.left, _simd_set1_epi32(127));
left = _simd_and_si(left, _simd_set1_epi32(~255));
simdscalari right = _simd_add_epi32(bbox.right, _simd_set1_epi32(128));
right = _simd_and_si(right, _simd_set1_epi32(~255));
simdscalari xmin = _simd_add_epi32(bbox.xmin, _simd_set1_epi32(127));
xmin = _simd_and_si(xmin, _simd_set1_epi32(~255));
simdscalari xmax = _simd_add_epi32(bbox.xmax, _simd_set1_epi32(128));
xmax = _simd_and_si(xmax, _simd_set1_epi32(~255));
simdscalari vMaskH = _simd_cmpeq_epi32(left, right);
simdscalari vMaskH = _simd_cmpeq_epi32(xmin, xmax);
simdscalari top = _simd_add_epi32(bbox.top, _simd_set1_epi32(127));
top = _simd_and_si(top, _simd_set1_epi32(~255));
simdscalari bottom = _simd_add_epi32(bbox.bottom, _simd_set1_epi32(128));
bottom = _simd_and_si(bottom, _simd_set1_epi32(~255));
simdscalari ymin = _simd_add_epi32(bbox.ymin, _simd_set1_epi32(127));
ymin = _simd_and_si(ymin, _simd_set1_epi32(~255));
simdscalari ymax = _simd_add_epi32(bbox.ymax, _simd_set1_epi32(128));
ymax = _simd_and_si(ymax, _simd_set1_epi32(~255));
simdscalari vMaskV = _simd_cmpeq_epi32(top, bottom);
simdscalari vMaskV = _simd_cmpeq_epi32(ymin, ymax);
vMaskV = _simd_or_si(vMaskH, vMaskV);
cullCenterMask = _simd_movemask_ps(_simd_castsi_ps(vMaskV));
}
@ -1973,26 +1943,26 @@ void BinTriangles(
}
}
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since right/bottom edge is exclusive.
bbox.left = _simd_max_epi32(bbox.left, _simd_set1_epi32(state.scissorInFixedPoint.left));
bbox.top = _simd_max_epi32(bbox.top, _simd_set1_epi32(state.scissorInFixedPoint.top));
bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right));
bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom));
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
if(CT::IsConservativeT::value)
{
// in the case where a degenerate triangle is on a scissor edge, we need to make sure the primitive bbox has
// some area. Bump the right/bottom edges out
simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.top, bbox.bottom);
bbox.bottom = _simd_blendv_epi32(bbox.bottom, _simd_add_epi32(bbox.bottom, _simd_set1_epi32(1)), topEqualsBottom);
simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.left, bbox.right);
bbox.right = _simd_blendv_epi32(bbox.right, _simd_add_epi32(bbox.right, _simd_set1_epi32(1)), leftEqualsRight);
// some area. Bump the xmax/ymax edges out
simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.ymin, bbox.ymax);
bbox.ymax = _simd_blendv_epi32(bbox.ymax, _simd_add_epi32(bbox.ymax, _simd_set1_epi32(1)), topEqualsBottom);
simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.xmin, bbox.xmax);
bbox.xmax = _simd_blendv_epi32(bbox.xmax, _simd_add_epi32(bbox.xmax, _simd_set1_epi32(1)), leftEqualsRight);
}
// Cull tris completely outside scissor
{
simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right);
simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.top, bbox.bottom);
simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax);
simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.ymin, bbox.ymax);
simdscalari maskOutsideScissorXY = _simd_or_si(maskOutsideScissorX, maskOutsideScissorY);
uint32_t maskOutsideScissor = _simd_movemask_ps(_simd_castsi_ps(maskOutsideScissorXY));
triMask = triMask & ~maskOutsideScissor;
@ -2004,16 +1974,16 @@ void BinTriangles(
}
// Convert triangle bbox to macrotile units.
bbox.left = _simd_srai_epi32(bbox.left, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
bbox.top = _simd_srai_epi32(bbox.top, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
bbox.right = _simd_srai_epi32(bbox.right, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
bbox.bottom = _simd_srai_epi32(bbox.bottom, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
bbox.xmin = _simd_srai_epi32(bbox.xmin, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
bbox.ymin = _simd_srai_epi32(bbox.ymin, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
bbox.xmax = _simd_srai_epi32(bbox.xmax, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
bbox.ymax = _simd_srai_epi32(bbox.ymax, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
OSALIGNSIMD(uint32_t) aMTLeft[KNOB_SIMD_WIDTH], aMTRight[KNOB_SIMD_WIDTH], aMTTop[KNOB_SIMD_WIDTH], aMTBottom[KNOB_SIMD_WIDTH];
_simd_store_si((simdscalari*)aMTLeft, bbox.left);
_simd_store_si((simdscalari*)aMTRight, bbox.right);
_simd_store_si((simdscalari*)aMTTop, bbox.top);
_simd_store_si((simdscalari*)aMTBottom, bbox.bottom);
_simd_store_si((simdscalari*)aMTLeft, bbox.xmin);
_simd_store_si((simdscalari*)aMTRight, bbox.xmax);
_simd_store_si((simdscalari*)aMTTop, bbox.ymin);
_simd_store_si((simdscalari*)aMTBottom, bbox.ymax);
// transpose verts needed for backend
/// @todo modify BE to take non-transformed verts
@ -2196,11 +2166,11 @@ void BinPoints(
if (CanUseSimplePoints(pDC))
{
// adjust for top-left rule
// adjust for ymin-xmin rule
vXi = _simd_sub_epi32(vXi, _simd_set1_epi32(1));
vYi = _simd_sub_epi32(vYi, _simd_set1_epi32(1));
// cull points off the top-left edge of the viewport
// cull points off the ymin-xmin edge of the viewport
primMask &= ~_simd_movemask_ps(_simd_castsi_ps(vXi));
primMask &= ~_simd_movemask_ps(_simd_castsi_ps(vYi));
@ -2325,40 +2295,40 @@ void BinPoints(
// bloat point to bbox
simdBBox bbox;
bbox.left = bbox.right = vXi;
bbox.top = bbox.bottom = vYi;
bbox.xmin = bbox.xmax = vXi;
bbox.ymin = bbox.ymax = vYi;
simdscalar vHalfWidth = _simd_mul_ps(vPointSize, _simd_set1_ps(0.5f));
simdscalari vHalfWidthi = fpToFixedPointVertical(vHalfWidth);
bbox.left = _simd_sub_epi32(bbox.left, vHalfWidthi);
bbox.right = _simd_add_epi32(bbox.right, vHalfWidthi);
bbox.top = _simd_sub_epi32(bbox.top, vHalfWidthi);
bbox.bottom = _simd_add_epi32(bbox.bottom, vHalfWidthi);
bbox.xmin = _simd_sub_epi32(bbox.xmin, vHalfWidthi);
bbox.xmax = _simd_add_epi32(bbox.xmax, vHalfWidthi);
bbox.ymin = _simd_sub_epi32(bbox.ymin, vHalfWidthi);
bbox.ymax = _simd_add_epi32(bbox.ymax, vHalfWidthi);
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since right/bottom edge is exclusive.
bbox.left = _simd_max_epi32(bbox.left, _simd_set1_epi32(state.scissorInFixedPoint.left));
bbox.top = _simd_max_epi32(bbox.top, _simd_set1_epi32(state.scissorInFixedPoint.top));
bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right));
bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom));
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
// Cull bloated points completely outside scissor
simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right);
simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.top, bbox.bottom);
simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax);
simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.ymin, bbox.ymax);
simdscalari maskOutsideScissorXY = _simd_or_si(maskOutsideScissorX, maskOutsideScissorY);
uint32_t maskOutsideScissor = _simd_movemask_ps(_simd_castsi_ps(maskOutsideScissorXY));
primMask = primMask & ~maskOutsideScissor;
// Convert bbox to macrotile units.
bbox.left = _simd_srai_epi32(bbox.left, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
bbox.top = _simd_srai_epi32(bbox.top, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
bbox.right = _simd_srai_epi32(bbox.right, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
bbox.bottom = _simd_srai_epi32(bbox.bottom, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
bbox.xmin = _simd_srai_epi32(bbox.xmin, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
bbox.ymin = _simd_srai_epi32(bbox.ymin, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
bbox.xmax = _simd_srai_epi32(bbox.xmax, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
bbox.ymax = _simd_srai_epi32(bbox.ymax, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
OSALIGNSIMD(uint32_t) aMTLeft[KNOB_SIMD_WIDTH], aMTRight[KNOB_SIMD_WIDTH], aMTTop[KNOB_SIMD_WIDTH], aMTBottom[KNOB_SIMD_WIDTH];
_simd_store_si((simdscalari*)aMTLeft, bbox.left);
_simd_store_si((simdscalari*)aMTRight, bbox.right);
_simd_store_si((simdscalari*)aMTTop, bbox.top);
_simd_store_si((simdscalari*)aMTBottom, bbox.bottom);
_simd_store_si((simdscalari*)aMTLeft, bbox.xmin);
_simd_store_si((simdscalari*)aMTRight, bbox.xmax);
_simd_store_si((simdscalari*)aMTTop, bbox.ymin);
_simd_store_si((simdscalari*)aMTBottom, bbox.ymax);
// store render target array index
OSALIGNSIMD(uint32_t) aRTAI[KNOB_SIMD_WIDTH];
@ -2543,35 +2513,35 @@ void BinLines(
// Calc bounding box of lines
simdBBox bbox;
bbox.left = _simd_min_epi32(vXi[0], vXi[1]);
bbox.right = _simd_max_epi32(vXi[0], vXi[1]);
bbox.top = _simd_min_epi32(vYi[0], vYi[1]);
bbox.bottom = _simd_max_epi32(vYi[0], vYi[1]);
bbox.xmin = _simd_min_epi32(vXi[0], vXi[1]);
bbox.xmax = _simd_max_epi32(vXi[0], vXi[1]);
bbox.ymin = _simd_min_epi32(vYi[0], vYi[1]);
bbox.ymax = _simd_max_epi32(vYi[0], vYi[1]);
// bloat bbox by line width along minor axis
simdscalar vHalfWidth = _simd_set1_ps(rastState.lineWidth / 2.0f);
simdscalari vHalfWidthi = fpToFixedPointVertical(vHalfWidth);
simdBBox bloatBox;
bloatBox.left = _simd_sub_epi32(bbox.left, vHalfWidthi);
bloatBox.right = _simd_add_epi32(bbox.right, vHalfWidthi);
bloatBox.top = _simd_sub_epi32(bbox.top, vHalfWidthi);
bloatBox.bottom = _simd_add_epi32(bbox.bottom, vHalfWidthi);
bloatBox.xmin = _simd_sub_epi32(bbox.xmin, vHalfWidthi);
bloatBox.xmax = _simd_add_epi32(bbox.xmax, vHalfWidthi);
bloatBox.ymin = _simd_sub_epi32(bbox.ymin, vHalfWidthi);
bloatBox.ymax = _simd_add_epi32(bbox.ymax, vHalfWidthi);
bbox.left = _simd_blendv_epi32(bbox.left, bloatBox.left, vYmajorMask);
bbox.right = _simd_blendv_epi32(bbox.right, bloatBox.right, vYmajorMask);
bbox.top = _simd_blendv_epi32(bloatBox.top, bbox.top, vYmajorMask);
bbox.bottom = _simd_blendv_epi32(bloatBox.bottom, bbox.bottom, vYmajorMask);
bbox.xmin = _simd_blendv_epi32(bbox.xmin, bloatBox.xmin, vYmajorMask);
bbox.xmax = _simd_blendv_epi32(bbox.xmax, bloatBox.xmax, vYmajorMask);
bbox.ymin = _simd_blendv_epi32(bloatBox.ymin, bbox.ymin, vYmajorMask);
bbox.ymax = _simd_blendv_epi32(bloatBox.ymax, bbox.ymax, vYmajorMask);
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since right/bottom edge is exclusive.
bbox.left = _simd_max_epi32(bbox.left, _simd_set1_epi32(state.scissorInFixedPoint.left));
bbox.top = _simd_max_epi32(bbox.top, _simd_set1_epi32(state.scissorInFixedPoint.top));
bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right));
bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom));
// Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
// Cull prims completely outside scissor
{
simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right);
simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.top, bbox.bottom);
simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax);
simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.ymin, bbox.ymax);
simdscalari maskOutsideScissorXY = _simd_or_si(maskOutsideScissorX, maskOutsideScissorY);
uint32_t maskOutsideScissor = _simd_movemask_ps(_simd_castsi_ps(maskOutsideScissorXY));
primMask = primMask & ~maskOutsideScissor;
@ -2583,16 +2553,16 @@ void BinLines(
}
// Convert triangle bbox to macrotile units.
bbox.left = _simd_srai_epi32(bbox.left, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
bbox.top = _simd_srai_epi32(bbox.top, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
bbox.right = _simd_srai_epi32(bbox.right, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
bbox.bottom = _simd_srai_epi32(bbox.bottom, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
bbox.xmin = _simd_srai_epi32(bbox.xmin, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
bbox.ymin = _simd_srai_epi32(bbox.ymin, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
bbox.xmax = _simd_srai_epi32(bbox.xmax, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
bbox.ymax = _simd_srai_epi32(bbox.ymax, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
OSALIGNSIMD(uint32_t) aMTLeft[KNOB_SIMD_WIDTH], aMTRight[KNOB_SIMD_WIDTH], aMTTop[KNOB_SIMD_WIDTH], aMTBottom[KNOB_SIMD_WIDTH];
_simd_store_si((simdscalari*)aMTLeft, bbox.left);
_simd_store_si((simdscalari*)aMTRight, bbox.right);
_simd_store_si((simdscalari*)aMTTop, bbox.top);
_simd_store_si((simdscalari*)aMTBottom, bbox.bottom);
_simd_store_si((simdscalari*)aMTLeft, bbox.xmin);
_simd_store_si((simdscalari*)aMTRight, bbox.xmax);
_simd_store_si((simdscalari*)aMTTop, bbox.ymin);
_simd_store_si((simdscalari*)aMTBottom, bbox.ymax);
// transpose verts needed for backend
/// @todo modify BE to take non-transformed verts

View File

@ -240,7 +240,7 @@ void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices,
}
INLINE
void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox)
void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, SWR_RECT &bbox)
{
// Need horizontal fp min here
__m128i vX1 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 2, 0, 1));
@ -262,10 +262,10 @@ void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox)
__m128i vMaxY = _mm_max_epi32(vY, vY1);
vMaxY = _mm_max_epi32(vMaxY, vY2);
bbox.left = _mm_extract_epi32(vMinX, 0);
bbox.right = _mm_extract_epi32(vMaxX, 0);
bbox.top = _mm_extract_epi32(vMinY, 0);
bbox.bottom = _mm_extract_epi32(vMaxY, 0);
bbox.xmin = _mm_extract_epi32(vMinX, 0);
bbox.xmax = _mm_extract_epi32(vMaxX, 0);
bbox.ymin = _mm_extract_epi32(vMinY, 0);
bbox.ymax = _mm_extract_epi32(vMaxY, 0);
}
INLINE

View File

@ -591,7 +591,7 @@ INLINE void UpdateEdgeMasks<SingleSampleT>(const __m256d(&)[3], const __m256d* v
template <typename RasterScissorEdgesT, typename IsConservativeT, typename RT>
struct ComputeScissorEdges
{
INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y,
INLINE ComputeScissorEdges(const SWR_RECT &triBBox, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7]){};
};
@ -604,20 +604,20 @@ struct ComputeScissorEdges<std::true_type, std::true_type, RT>
//////////////////////////////////////////////////////////////////////////
/// @brief Intersect tri bbox with scissor, compute scissor edge vectors,
/// evaluate edge equations and offset them away from pixel center.
INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y,
INLINE ComputeScissorEdges(const SWR_RECT &triBBox, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
{
// if conservative rasterizing, triangle bbox intersected with scissor bbox is used
BBOX scissor;
scissor.left = std::max(triBBox.left, scissorBBox.left);
scissor.right = std::min(triBBox.right, scissorBBox.right);
scissor.top = std::max(triBBox.top, scissorBBox.top);
scissor.bottom = std::min(triBBox.bottom, scissorBBox.bottom);
SWR_RECT scissor;
scissor.xmin = std::max(triBBox.xmin, scissorBBox.xmin);
scissor.xmax = std::min(triBBox.xmax, scissorBBox.xmax);
scissor.ymin = std::max(triBBox.ymin, scissorBBox.ymin);
scissor.ymax = std::min(triBBox.ymax, scissorBBox.ymax);
POS topLeft{scissor.left, scissor.top};
POS bottomLeft{scissor.left, scissor.bottom};
POS topRight{scissor.right, scissor.top};
POS bottomRight{scissor.right, scissor.bottom};
POS topLeft{scissor.xmin, scissor.ymin};
POS bottomLeft{scissor.xmin, scissor.ymax};
POS topRight{scissor.xmax, scissor.ymin};
POS bottomRight{scissor.xmax, scissor.ymax};
// construct 4 scissor edges in ccw direction
ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
@ -625,10 +625,10 @@ struct ComputeScissorEdges<std::true_type, std::true_type, RT>
ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
ComputeEdgeData(topRight, topLeft, rastEdges[6]);
vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.left)) + (rastEdges[3].b * (y - scissor.top)));
vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.left)) + (rastEdges[4].b * (y - scissor.bottom)));
vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom)));
vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top)));
vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) + (rastEdges[3].b * (y - scissor.ymin)));
vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) + (rastEdges[4].b * (y - scissor.ymax)));
vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) + (rastEdges[5].b * (y - scissor.ymax)));
vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) + (rastEdges[6].b * (y - scissor.ymin)));
// if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing
adjustScissorEdge<RT>(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]);
@ -647,14 +647,14 @@ struct ComputeScissorEdges<std::true_type, std::false_type, RT>
{
//////////////////////////////////////////////////////////////////////////
/// @brief Compute scissor edge vectors and evaluate edge equations
INLINE ComputeScissorEdges(const BBOX &, const BBOX &scissorBBox, const int32_t x, const int32_t y,
INLINE ComputeScissorEdges(const SWR_RECT &, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
{
const BBOX &scissor = scissorBBox;
POS topLeft{scissor.left, scissor.top};
POS bottomLeft{scissor.left, scissor.bottom};
POS topRight{scissor.right, scissor.top};
POS bottomRight{scissor.right, scissor.bottom};
const SWR_RECT &scissor = scissorBBox;
POS topLeft{scissor.xmin, scissor.ymin};
POS bottomLeft{scissor.xmin, scissor.ymax};
POS topRight{scissor.xmax, scissor.ymin};
POS bottomRight{scissor.xmax, scissor.ymax};
// construct 4 scissor edges in ccw direction
ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
@ -662,10 +662,10 @@ struct ComputeScissorEdges<std::true_type, std::false_type, RT>
ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
ComputeEdgeData(topRight, topLeft, rastEdges[6]);
vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.left)) + (rastEdges[3].b * (y - scissor.top)));
vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.left)) + (rastEdges[4].b * (y - scissor.bottom)));
vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom)));
vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top)));
vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) + (rastEdges[3].b * (y - scissor.ymin)));
vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) + (rastEdges[4].b * (y - scissor.ymax)));
vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) + (rastEdges[5].b * (y - scissor.ymax)));
vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) + (rastEdges[6].b * (y - scissor.ymin)));
}
};
@ -964,23 +964,23 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
triDesc.Z[2] += ComputeDepthBias(&rastState, &triDesc, workDesc.pTriBuffer + 8);
// Calc bounding box of triangle
OSALIGNSIMD(BBOX) bbox;
OSALIGNSIMD(SWR_RECT) bbox;
calcBoundingBoxInt(vXi, vYi, bbox);
if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
{
// If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid
bbox.left--; bbox.right++; bbox.top--; bbox.bottom++;
SWR_ASSERT(state.scissorInFixedPoint.left >= 0 && state.scissorInFixedPoint.top >= 0,
bbox.xmin--; bbox.xmax++; bbox.ymin--; bbox.ymax++;
SWR_ASSERT(state.scissorInFixedPoint.xmin >= 0 && state.scissorInFixedPoint.ymin >= 0,
"Conservative rast degenerate handling requires a valid scissor rect");
}
// Intersect with scissor/viewport
OSALIGNSIMD(BBOX) intersect;
intersect.left = std::max(bbox.left, state.scissorInFixedPoint.left);
intersect.right = std::min(bbox.right - 1, state.scissorInFixedPoint.right);
intersect.top = std::max(bbox.top, state.scissorInFixedPoint.top);
intersect.bottom = std::min(bbox.bottom - 1, state.scissorInFixedPoint.bottom);
OSALIGNSIMD(SWR_RECT) intersect;
intersect.xmin = std::max(bbox.xmin, state.scissorInFixedPoint.xmin);
intersect.xmax = std::min(bbox.xmax - 1, state.scissorInFixedPoint.xmax);
intersect.ymin = std::max(bbox.ymin, state.scissorInFixedPoint.ymin);
intersect.ymax = std::min(bbox.ymax - 1, state.scissorInFixedPoint.ymax);
triDesc.triFlags = workDesc.triFlags;
@ -992,20 +992,20 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
intersect.left = std::max(intersect.left, macroBoxLeft);
intersect.top = std::max(intersect.top, macroBoxTop);
intersect.right = std::min(intersect.right, macroBoxRight);
intersect.bottom = std::min(intersect.bottom, macroBoxBottom);
intersect.xmin = std::max(intersect.xmin, macroBoxLeft);
intersect.ymin = std::max(intersect.ymin, macroBoxTop);
intersect.xmax = std::min(intersect.xmax, macroBoxRight);
intersect.ymax = std::min(intersect.ymax, macroBoxBottom);
SWR_ASSERT(intersect.left <= intersect.right && intersect.top <= intersect.bottom && intersect.left >= 0 && intersect.right >= 0 && intersect.top >= 0 && intersect.bottom >= 0);
SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0);
RDTSC_STOP(BETriangleSetup, 0, pDC->drawId);
// update triangle desc
uint32_t minTileX = intersect.left >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
uint32_t minTileY = intersect.top >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
uint32_t maxTileX = intersect.right >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
uint32_t maxTileY = intersect.bottom >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
uint32_t minTileY = intersect.ymin >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
uint32_t maxTileX = intersect.xmax >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
uint32_t maxTileY = intersect.ymax >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
uint32_t numTilesX = maxTileX - minTileX + 1;
uint32_t numTilesY = maxTileY - minTileY + 1;
@ -1020,8 +1020,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
// Step to pixel center of top-left pixel of the triangle bbox
// Align intersect bbox (top/left) to raster tile's (top/left).
int32_t x = AlignDown(intersect.left, (FIXED_POINT_SCALE * KNOB_TILE_X_DIM));
int32_t y = AlignDown(intersect.top, (FIXED_POINT_SCALE * KNOB_TILE_Y_DIM));
int32_t x = AlignDown(intersect.xmin, (FIXED_POINT_SCALE * KNOB_TILE_X_DIM));
int32_t y = AlignDown(intersect.ymin, (FIXED_POINT_SCALE * KNOB_TILE_Y_DIM));
// convenience typedef
typedef typename RT::NumRasterSamplesT NumRasterSamplesT;
@ -1663,17 +1663,17 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
// make sure this macrotile intersects the triangle
__m128i vXai = fpToFixedPoint(vXa);
__m128i vYai = fpToFixedPoint(vYa);
OSALIGNSIMD(BBOX) bboxA;
OSALIGNSIMD(SWR_RECT) bboxA;
calcBoundingBoxInt(vXai, vYai, bboxA);
if (!(bboxA.left > macroBoxRight ||
bboxA.left > state.scissorInFixedPoint.right ||
bboxA.right - 1 < macroBoxLeft ||
bboxA.right - 1 < state.scissorInFixedPoint.left ||
bboxA.top > macroBoxBottom ||
bboxA.top > state.scissorInFixedPoint.bottom ||
bboxA.bottom - 1 < macroBoxTop ||
bboxA.bottom - 1 < state.scissorInFixedPoint.top)) {
if (!(bboxA.xmin > macroBoxRight ||
bboxA.xmin > state.scissorInFixedPoint.xmax ||
bboxA.xmax - 1 < macroBoxLeft ||
bboxA.xmax - 1 < state.scissorInFixedPoint.xmin ||
bboxA.ymin > macroBoxBottom ||
bboxA.ymin > state.scissorInFixedPoint.ymax ||
bboxA.ymax - 1 < macroBoxTop ||
bboxA.ymax - 1 < state.scissorInFixedPoint.ymin)) {
// rasterize triangle
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
}
@ -1739,14 +1739,14 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
vYai = fpToFixedPoint(vYa);
calcBoundingBoxInt(vXai, vYai, bboxA);
if (!(bboxA.left > macroBoxRight ||
bboxA.left > state.scissorInFixedPoint.right ||
bboxA.right - 1 < macroBoxLeft ||
bboxA.right - 1 < state.scissorInFixedPoint.left ||
bboxA.top > macroBoxBottom ||
bboxA.top > state.scissorInFixedPoint.bottom ||
bboxA.bottom - 1 < macroBoxTop ||
bboxA.bottom - 1 < state.scissorInFixedPoint.top)) {
if (!(bboxA.xmin > macroBoxRight ||
bboxA.xmin > state.scissorInFixedPoint.xmax ||
bboxA.xmax - 1 < macroBoxLeft ||
bboxA.xmax - 1 < state.scissorInFixedPoint.xmin ||
bboxA.ymin > macroBoxBottom ||
bboxA.ymin > state.scissorInFixedPoint.ymax ||
bboxA.ymax - 1 < macroBoxTop ||
bboxA.ymax - 1 < state.scissorInFixedPoint.ymin)) {
// rasterize triangle
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
}

View File

@ -33,6 +33,7 @@
#include "common/os.h"
#include "common/simdintrin.h"
#include "common/swr_assert.h"
#include "core/api.h"
#if defined(_WIN64) || defined(__x86_64__)
#define _MM_INSERT_EPI64 _mm_insert_epi64
@ -74,53 +75,12 @@ INLINE __m128i _MM_INSERT_EPI64(__m128i a, int64_t b, const int32_t ndx)
}
#endif
OSALIGNLINE(struct) BBOX
{
int top{ 0 };
int bottom{ 0 };
int left{ 0 };
int right{ 0 };
BBOX() {}
BBOX(int t, int b, int l, int r) : top(t), bottom(b), left(l), right(r) {}
bool operator==(const BBOX& rhs)
{
return (this->top == rhs.top &&
this->bottom == rhs.bottom &&
this->left == rhs.left &&
this->right == rhs.right);
}
bool operator!=(const BBOX& rhs)
{
return !(*this == rhs);
}
BBOX& Intersect(const BBOX& other)
{
this->top = std::max(this->top, other.top);
this->bottom = std::min(this->bottom, other.bottom);
this->left = std::max(this->left, other.left);
this->right = std::min(this->right, other.right);
if (right - left < 0 ||
bottom - top < 0)
{
// Zero area
top = bottom = left = right = 0;
}
return *this;
}
};
struct simdBBox
{
simdscalari top;
simdscalari bottom;
simdscalari left;
simdscalari right;
simdscalari ymin;
simdscalari ymax;
simdscalari xmin;
simdscalari xmax;
};
INLINE

View File

@ -67,17 +67,9 @@ swr_clear(struct pipe_context *pipe,
((union pipe_color_union *)color)->f[3] = 1.0; /* cast off your const'd-ness */
#endif
/* Reset viewport to full framebuffer width/height before clear, then
* restore it */
/* Scissor affects clear, viewport should not */
ctx->dirty |= SWR_NEW_VIEWPORT;
SWR_VIEWPORT vp = {0};
vp.width = ctx->framebuffer.width;
vp.height = ctx->framebuffer.height;
SwrSetViewports(ctx->swrContext, 1, &vp, NULL);
swr_update_draw_context(ctx);
SwrClearRenderTarget(ctx->swrContext, clearMask, color->f, depth, stencil);
SwrClearRenderTarget(ctx->swrContext, clearMask, color->f, depth, stencil,
ctx->swr_scissor);
}

View File

@ -121,6 +121,7 @@ struct swr_context {
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
SWR_RECT swr_scissor;
struct pipe_sampler_view *
sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];

View File

@ -258,37 +258,13 @@ swr_store_render_target(struct pipe_context *pipe,
/* Only proceed if there's a valid surface to store to */
if (renderTarget->pBaseAddress) {
/* Set viewport to full renderTarget width/height and disable scissor
* before StoreTiles */
boolean change_viewport =
(ctx->derived.vp.x != 0.0f || ctx->derived.vp.y != 0.0f
|| ctx->derived.vp.width != renderTarget->width
|| ctx->derived.vp.height != renderTarget->height);
if (change_viewport) {
SWR_VIEWPORT vp = {0};
vp.width = renderTarget->width;
vp.height = renderTarget->height;
SwrSetViewports(ctx->swrContext, 1, &vp, NULL);
}
boolean scissor_enable = ctx->derived.rastState.scissorEnable;
if (scissor_enable) {
ctx->derived.rastState.scissorEnable = FALSE;
SwrSetRastState(ctx->swrContext, &ctx->derived.rastState);
}
swr_update_draw_context(ctx);
SWR_RECT full_rect =
{0, 0, (int32_t)renderTarget->width, (int32_t)renderTarget->height};
SwrStoreTiles(ctx->swrContext,
(enum SWR_RENDERTARGET_ATTACHMENT)attachment,
post_tile_state);
/* Restore viewport and scissor enable */
if (change_viewport)
SwrSetViewports(ctx->swrContext, 1, &ctx->derived.vp, &ctx->derived.vpm);
if (scissor_enable) {
ctx->derived.rastState.scissorEnable = scissor_enable;
SwrSetRastState(ctx->swrContext, &ctx->derived.rastState);
}
post_tile_state,
full_rect);
}
}

View File

@ -575,6 +575,10 @@ swr_set_scissor_states(struct pipe_context *pipe,
struct swr_context *ctx = swr_context(pipe);
ctx->scissor = *scissor;
ctx->swr_scissor.xmin = scissor->minx;
ctx->swr_scissor.xmax = scissor->maxx;
ctx->swr_scissor.ymin = scissor->miny;
ctx->swr_scissor.ymax = scissor->maxy;
ctx->dirty |= SWR_NEW_SCISSOR;
}
@ -930,10 +934,7 @@ swr_update_derived(struct pipe_context *pipe,
/* Scissor */
if (ctx->dirty & SWR_NEW_SCISSOR) {
pipe_scissor_state *scissor = &ctx->scissor;
BBOX bbox(scissor->miny, scissor->maxy,
scissor->minx, scissor->maxx);
SwrSetScissorRects(ctx->swrContext, 1, &bbox);
SwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor);
}
/* Viewport */