swr/rast: Split backend.cpp to improve compile time
Hardcode split to four files currently. Decreases swr build time on a quad-core by ~10%. Reviewed-by: Bruce Cherniak <bruce.cherniak at intel.com>
This commit is contained in:
parent
b89bd3694c
commit
cae53b24d7
|
@ -34,6 +34,7 @@ COMMON_CXXFLAGS = \
|
|||
$(LLVM_CXXFLAGS) \
|
||||
$(SWR_CXX11_CXXFLAGS) \
|
||||
-I$(builddir)/rasterizer/codegen \
|
||||
-I$(builddir)/rasterizer/core \
|
||||
-I$(builddir)/rasterizer/jitter \
|
||||
-I$(builddir)/rasterizer/archrast \
|
||||
-I$(srcdir)/rasterizer \
|
||||
|
@ -62,7 +63,11 @@ BUILT_SOURCES = \
|
|||
rasterizer/archrast/gen_ar_event.cpp \
|
||||
rasterizer/archrast/gen_ar_eventhandler.hpp \
|
||||
rasterizer/archrast/gen_ar_eventhandlerfile.hpp \
|
||||
rasterizer/core/gen_BackendPixelRate0.cpp
|
||||
rasterizer/core/backends/gen_BackendPixelRate0.cpp \
|
||||
rasterizer/core/backends/gen_BackendPixelRate1.cpp \
|
||||
rasterizer/core/backends/gen_BackendPixelRate2.cpp \
|
||||
rasterizer/core/backends/gen_BackendPixelRate3.cpp \
|
||||
rasterizer/core/backends/gen_BackendPixelRate.hpp
|
||||
|
||||
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
|
||||
PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
|
||||
|
@ -140,20 +145,33 @@ rasterizer/archrast/gen_ar_eventhandlerfile.hpp: rasterizer/codegen/gen_archrast
|
|||
--output rasterizer/archrast/gen_ar_eventhandlerfile.hpp \
|
||||
--gen_eventhandlerfile_h
|
||||
|
||||
rasterizer/core/backends/gen_BackendPixelRate0.cpp \
|
||||
rasterizer/core/backends/gen_BackendPixelRate1.cpp \
|
||||
rasterizer/core/backends/gen_BackendPixelRate2.cpp \
|
||||
rasterizer/core/backends/gen_BackendPixelRate3.cpp \
|
||||
rasterizer/core/backends/gen_BackendPixelRate.hpp: \
|
||||
backend.intermediate
|
||||
|
||||
# 5 SWR_MULTISAMPLE_TYPE_COUNT
|
||||
# 2 SWR_MSAA_SAMPLE_PATTERN_COUNT
|
||||
# 3 SWR_INPUT_COVERAGE_COUNT
|
||||
# 2 centroid
|
||||
# 2 forcedSampleCount
|
||||
# 2 canEarlyZ
|
||||
rasterizer/core/gen_BackendPixelRate0.cpp: rasterizer/codegen/gen_backends.py rasterizer/codegen/templates/gen_backend.cpp
|
||||
|
||||
# use intermediate rule to tell make that all files can be
|
||||
# generated in one invocation of gen_backends.py (prevents
|
||||
# parallel make race condition)
|
||||
.INTERMEDIATE: backend.intermediate
|
||||
backend.intermediate: rasterizer/codegen/gen_backends.py rasterizer/codegen/templates/gen_backend.cpp rasterizer/codegen/templates/gen_header_init.hpp
|
||||
$(MKDIR_GEN)
|
||||
$(PYTHON_GEN) \
|
||||
$(srcdir)/rasterizer/codegen/gen_backends.py \
|
||||
--outdir rasterizer/core \
|
||||
--outdir rasterizer/core/backends \
|
||||
--dim 5 2 3 2 2 2 \
|
||||
--split 0 \
|
||||
--cpp
|
||||
--numfiles 4 \
|
||||
--cpp \
|
||||
--hpp
|
||||
|
||||
COMMON_LIBADD = \
|
||||
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
|
||||
|
@ -227,5 +245,6 @@ EXTRA_DIST = \
|
|||
rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp \
|
||||
rasterizer/codegen/templates/gen_backend.cpp \
|
||||
rasterizer/codegen/templates/gen_builder.hpp \
|
||||
rasterizer/codegen/templates/gen_header_init.hpp \
|
||||
rasterizer/codegen/templates/gen_knobs.cpp \
|
||||
rasterizer/codegen/templates/gen_llvm.hpp
|
||||
|
|
|
@ -73,7 +73,11 @@ CORE_CXX_SOURCES := \
|
|||
rasterizer/core/api.h \
|
||||
rasterizer/core/arena.h \
|
||||
rasterizer/core/backend.cpp \
|
||||
rasterizer/core/backend_clear.cpp \
|
||||
rasterizer/core/backend_sample.cpp \
|
||||
rasterizer/core/backend_singlesample.cpp \
|
||||
rasterizer/core/backend.h \
|
||||
rasterizer/core/backend_impl.h \
|
||||
rasterizer/core/binner.cpp \
|
||||
rasterizer/core/binner.h \
|
||||
rasterizer/core/blend.h \
|
||||
|
|
|
@ -140,12 +140,22 @@ Depends('rasterizer/jitter/gen_state_llvm.h',
|
|||
# 2 centroid
|
||||
# 2 forcedSampleCount
|
||||
# 2 canEarlyZ
|
||||
backendPixelRateFileCount = 4
|
||||
backendPixelRateFilePat = "rasterizer/core/backends/gen_BackendPixelRate%s.cpp"
|
||||
backendPixelRateFiles = map(lambda x: backendPixelRateFilePat % x,
|
||||
range(0, backendPixelRateFileCount))
|
||||
env.CodeGenerate(
|
||||
target = 'rasterizer/core/gen_BackendPixelRate0.cpp',
|
||||
target = 'rasterizer/core/backends/gen_BackendPixelRate.hpp',
|
||||
script = swrroot + 'rasterizer/codegen/gen_backends.py',
|
||||
source = '',
|
||||
command = python_cmd + ' $SCRIPT --outdir ' + bldroot + '/rasterizer/core --dim 5 2 3 2 2 2 --split 0 --cpp'
|
||||
)
|
||||
command = python_cmd + ' $SCRIPT --outdir ' + bldroot + '/rasterizer/core/backends --dim 5 2 3 2 2 2 --numfiles ' + str(backendPixelRateFileCount) + ' --cpp --hpp'
|
||||
)
|
||||
Depends(backendPixelRateFiles,
|
||||
['rasterizer/core/backends/gen_BackendPixelRate.hpp',
|
||||
'rasterizer/archrast/gen_ar_event.hpp',
|
||||
'rasterizer/codegen/gen_knobs.h']
|
||||
)
|
||||
|
||||
Depends('rasterizer/jitter/gen_state_llvm.h',
|
||||
swrroot + 'rasterizer/codegen/templates/gen_backend.cpp')
|
||||
|
||||
|
@ -153,9 +163,10 @@ Depends('rasterizer/jitter/gen_state_llvm.h',
|
|||
built_sources = [
|
||||
'rasterizer/codegen/gen_knobs.cpp',
|
||||
'rasterizer/archrast/gen_ar_event.cpp',
|
||||
'rasterizer/core/gen_BackendPixelRate0.cpp',
|
||||
]
|
||||
|
||||
built_sources += backendPixelRateFiles
|
||||
|
||||
source = built_sources
|
||||
source += env.ParseSourceList(swrroot + 'Makefile.sources', [
|
||||
'CXX_SOURCES',
|
||||
|
|
|
@ -35,7 +35,9 @@ def main(args=sys.argv[1:]):
|
|||
parser.add_argument('--dim', help='gBackendPixelRateTable array dimensions', nargs='+', type=int, required=True)
|
||||
parser.add_argument('--outdir', help='output directory', nargs='?', type=str, default=thisDir)
|
||||
parser.add_argument('--split', help='how many lines of initialization per file [0=no split]', nargs='?', type=int, default='512')
|
||||
parser.add_argument('--numfiles', help='how many output files to generate', nargs='?', type=int, default='0')
|
||||
parser.add_argument('--cpp', help='Generate cpp file(s)', action='store_true', default=False)
|
||||
parser.add_argument('--hpp', help='Generate hpp file', action='store_true', default=False)
|
||||
parser.add_argument('--cmake', help='Generate cmake file', action='store_true', default=False)
|
||||
|
||||
args = parser.parse_args(args);
|
||||
|
@ -43,11 +45,14 @@ def main(args=sys.argv[1:]):
|
|||
class backendStrs :
|
||||
def __init__(self) :
|
||||
self.outFileName = 'gen_BackendPixelRate%s.cpp'
|
||||
self.outHeaderName = 'gen_BackendPixelRate.hpp'
|
||||
self.functionTableName = 'gBackendPixelRateTable'
|
||||
self.funcInstanceHeader = ' = BackendPixelRate<SwrBackendTraits<'
|
||||
self.template = 'gen_backend.cpp'
|
||||
self.hpp_template = 'gen_header_init.hpp'
|
||||
self.cmakeFileName = 'gen_backends.cmake'
|
||||
self.cmakeSrcVar = 'GEN_BACKEND_SOURCES'
|
||||
self.tableName = 'BackendPixelRate'
|
||||
|
||||
backend = backendStrs()
|
||||
|
||||
|
@ -77,6 +82,8 @@ def main(args=sys.argv[1:]):
|
|||
numFiles = 1
|
||||
else:
|
||||
numFiles = (len(output_list) + args.split - 1) // args.split
|
||||
if (args.numfiles != 0):
|
||||
numFiles = args.numfiles
|
||||
linesPerFile = (len(output_list) + numFiles - 1) // numFiles
|
||||
chunkedList = [output_list[x:x+linesPerFile] for x in range(0, len(output_list), linesPerFile)]
|
||||
|
||||
|
@ -94,6 +101,18 @@ def main(args=sys.argv[1:]):
|
|||
fileNum=fileNum,
|
||||
funcList=chunkedList[fileNum])
|
||||
|
||||
if args.hpp:
|
||||
baseHppName = os.path.join(args.outdir, backend.outHeaderName)
|
||||
templateHpp = os.path.join(thisDir, 'templates', backend.hpp_template)
|
||||
|
||||
MakoTemplateWriter.to_file(
|
||||
templateHpp,
|
||||
baseHppName,
|
||||
cmdline=sys.argv,
|
||||
numFiles=numFiles,
|
||||
filename=backend.outHeaderName,
|
||||
tableName=backend.tableName)
|
||||
|
||||
# generate gen_backend.cmake file
|
||||
if args.cmake:
|
||||
templateCmake = os.path.join(thisDir, 'templates', 'gen_backend.cmake')
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
//============================================================================
|
||||
|
||||
#include "core/backend.h"
|
||||
#include "core/backend_impl.h"
|
||||
|
||||
void InitBackendPixelRate${fileNum}()
|
||||
{
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
//============================================================================
|
||||
// Copyright (C) 2017 Intel Corporation. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a
|
||||
// copy of this software and associated documentation files (the "Software"),
|
||||
// to deal in the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice (including the next
|
||||
// paragraph) shall be included in all copies or substantial portions of the
|
||||
// Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
// IN THE SOFTWARE.
|
||||
//
|
||||
// @file ${filename}
|
||||
//
|
||||
// @brief auto-generated file
|
||||
//
|
||||
// DO NOT EDIT
|
||||
//
|
||||
// Generation Command Line:
|
||||
// ${'\n// '.join(cmdline)}
|
||||
//
|
||||
//============================================================================
|
||||
|
||||
%for num in range(numFiles):
|
||||
void Init${tableName}${num}();
|
||||
%endfor
|
||||
|
||||
static INLINE void Init${tableName}()
|
||||
{
|
||||
%for num in range(numFiles):
|
||||
Init${tableName}${num}();
|
||||
%endfor
|
||||
}
|
|
@ -784,9 +784,6 @@ void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
|
|||
|
||||
|
||||
// templated backend function tables
|
||||
extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
|
||||
extern PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT][2][2];
|
||||
extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2];
|
||||
|
||||
void SetupPipeline(DRAW_CONTEXT *pDC)
|
||||
{
|
||||
|
@ -838,7 +835,9 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
SWR_ASSERT(backendFuncs.pfnBackend);
|
||||
|
||||
PFN_PROCESS_PRIMS pfnBinner;
|
||||
#if USE_SIMD16_FRONTEND
|
||||
PFN_PROCESS_PRIMS_SIMD16 pfnBinner_simd16;
|
||||
|
|
|
@ -30,15 +30,14 @@
|
|||
#include <smmintrin.h>
|
||||
|
||||
#include "backend.h"
|
||||
#include "backend_impl.h"
|
||||
#include "tilemgr.h"
|
||||
#include "memory/tilingtraits.h"
|
||||
#include "core/multisample.h"
|
||||
#include "backends/gen_BackendPixelRate.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect);
|
||||
static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Process compute work.
|
||||
|
@ -103,238 +102,6 @@ void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
|
|||
SWR_ASSERT(x == 0 && y == 0);
|
||||
}
|
||||
|
||||
template<SWR_FORMAT format>
|
||||
void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
|
||||
{
|
||||
auto lambda = [&](int32_t comp)
|
||||
{
|
||||
FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
|
||||
|
||||
pTileBuffer += (KNOB_SIMD_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
|
||||
};
|
||||
|
||||
const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD_TILE_X_DIM);
|
||||
|
||||
for (uint32_t i = 0; i < numIter; ++i)
|
||||
{
|
||||
UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
template<SWR_FORMAT format>
|
||||
void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value)
|
||||
{
|
||||
auto lambda = [&](int32_t comp)
|
||||
{
|
||||
FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
|
||||
|
||||
pTileBuffer += (KNOB_SIMD16_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
|
||||
};
|
||||
|
||||
const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM);
|
||||
|
||||
for (uint32_t i = 0; i < numIter; ++i)
|
||||
{
|
||||
UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
template<SWR_FORMAT format>
|
||||
INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
|
||||
{
|
||||
// convert clear color to hottile format
|
||||
// clear color is in RGBA float/uint32
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
simd16vector vClear;
|
||||
for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
|
||||
{
|
||||
simd16scalar vComp;
|
||||
vComp = _simd16_load1_ps((const float*)&clear[comp]);
|
||||
if (FormatTraits<format>::isNormalized(comp))
|
||||
{
|
||||
vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<format>::fromFloat(comp)));
|
||||
vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp));
|
||||
}
|
||||
vComp = FormatTraits<format>::pack(comp, vComp);
|
||||
vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
|
||||
}
|
||||
|
||||
#else
|
||||
simdvector vClear;
|
||||
for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
|
||||
{
|
||||
simdscalar vComp;
|
||||
vComp = _simd_load1_ps((const float*)&clear[comp]);
|
||||
if (FormatTraits<format>::isNormalized(comp))
|
||||
{
|
||||
vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<format>::fromFloat(comp)));
|
||||
vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp));
|
||||
}
|
||||
vComp = FormatTraits<format>::pack(comp, vComp);
|
||||
vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
|
||||
}
|
||||
|
||||
#endif
|
||||
uint32_t tileX, tileY;
|
||||
MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
|
||||
|
||||
// Init to full macrotile
|
||||
SWR_RECT clearTile =
|
||||
{
|
||||
KNOB_MACROTILE_X_DIM * int32_t(tileX),
|
||||
KNOB_MACROTILE_Y_DIM * int32_t(tileY),
|
||||
KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
|
||||
KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1),
|
||||
};
|
||||
|
||||
// intersect with clear rect
|
||||
clearTile &= rect;
|
||||
|
||||
// translate to local hottile origin
|
||||
clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM, -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
|
||||
|
||||
// Make maximums inclusive (needed for convert to raster tiles)
|
||||
clearTile.xmax -= 1;
|
||||
clearTile.ymax -= 1;
|
||||
|
||||
// convert to raster tiles
|
||||
clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT);
|
||||
clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT);
|
||||
clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT);
|
||||
clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT);
|
||||
|
||||
const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
|
||||
// compute steps between raster tile samples / raster tiles / macro tile rows
|
||||
const uint32_t rasterTileSampleStep = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
|
||||
const uint32_t rasterTileStep = (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
|
||||
const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
|
||||
const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
|
||||
|
||||
HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples, renderTargetArrayIndex);
|
||||
uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
|
||||
uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
|
||||
|
||||
// loop over all raster tiles in the current hot tile
|
||||
for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
|
||||
{
|
||||
uint8_t* pRasterTile = pRasterTileRow;
|
||||
for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
|
||||
{
|
||||
for( int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
|
||||
{
|
||||
ClearRasterTile<format>(pRasterTile, vClear);
|
||||
pRasterTile += rasterTileSampleStep;
|
||||
}
|
||||
}
|
||||
pRasterTileRow += macroTileRowStep;
|
||||
}
|
||||
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
}
|
||||
|
||||
|
||||
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
if (KNOB_FAST_CLEAR)
|
||||
{
|
||||
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
|
||||
SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
|
||||
uint32_t numSamples = GetNumSamples(sampleCount);
|
||||
|
||||
SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
|
||||
|
||||
AR_BEGIN(BEClear, pDC->drawId);
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
|
||||
{
|
||||
unsigned long rt = 0;
|
||||
uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
|
||||
while (_BitScanForward(&rt, mask))
|
||||
{
|
||||
mask &= ~(1 << rt);
|
||||
|
||||
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
|
||||
|
||||
// All we want to do here is to mark the hot tile as being in a "needs clear" state.
|
||||
pHotTile->clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
|
||||
pHotTile->clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]);
|
||||
pHotTile->clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]);
|
||||
pHotTile->clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]);
|
||||
pHotTile->state = HOTTILE_CLEAR;
|
||||
}
|
||||
}
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
|
||||
{
|
||||
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
|
||||
pHotTile->clearData[0] = *(DWORD*)&pClear->clearDepth;
|
||||
pHotTile->state = HOTTILE_CLEAR;
|
||||
}
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
|
||||
{
|
||||
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
|
||||
|
||||
pHotTile->clearData[0] = pClear->clearStencil;
|
||||
pHotTile->state = HOTTILE_CLEAR;
|
||||
}
|
||||
|
||||
AR_END(BEClear, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Legacy clear
|
||||
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
|
||||
AR_BEGIN(BEClear, pDC->drawId);
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
|
||||
{
|
||||
DWORD clearData[4];
|
||||
clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
|
||||
clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]);
|
||||
clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]);
|
||||
clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]);
|
||||
|
||||
PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
|
||||
SWR_ASSERT(pfnClearTiles != nullptr);
|
||||
|
||||
unsigned long rt = 0;
|
||||
uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
|
||||
while (_BitScanForward(&rt, mask))
|
||||
{
|
||||
mask &= ~(1 << rt);
|
||||
|
||||
pfnClearTiles(pDC, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
|
||||
}
|
||||
}
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
|
||||
{
|
||||
DWORD clearData[4];
|
||||
clearData[0] = *(DWORD*)&pClear->clearDepth;
|
||||
PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
|
||||
SWR_ASSERT(pfnClearTiles != nullptr);
|
||||
|
||||
pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
|
||||
}
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
|
||||
{
|
||||
DWORD clearData[4];
|
||||
clearData[0] = pClear->clearStencil;
|
||||
PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
|
||||
|
||||
pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
|
||||
}
|
||||
|
||||
AR_END(BEClear, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, STORE_TILES_DESC* pDesc,
|
||||
SWR_RENDERTARGET_ATTACHMENT attachment)
|
||||
{
|
||||
|
@ -368,7 +135,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
|
|||
// clear if clear is pending (i.e., not rendered to), then mark as dirty for store.
|
||||
if (pHotTile->state == HOTTILE_CLEAR)
|
||||
{
|
||||
PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[srcFormat];
|
||||
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[srcFormat];
|
||||
SWR_ASSERT(pfnClearTiles != nullptr);
|
||||
|
||||
pfnClearTiles(pDC, attachment, macroTile, pHotTile->renderTargetArrayIndex, pHotTile->clearData, pDesc->rect);
|
||||
|
@ -429,457 +196,6 @@ void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint3
|
|||
}
|
||||
}
|
||||
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
const simdscalar vCenterOffsetsX = __m256{0.5, 1.5, 0.5, 1.5, 2.5, 3.5, 2.5, 3.5};
|
||||
const simdscalar vCenterOffsetsY = __m256{0.5, 0.5, 1.5, 1.5, 0.5, 0.5, 1.5, 1.5};
|
||||
const simdscalar vULOffsetsX = __m256{0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0};
|
||||
const simdscalar vULOffsetsY = __m256{0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0};
|
||||
#else
|
||||
#error Unsupported vector width
|
||||
#endif
|
||||
|
||||
simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscalar vI, simdscalar vJ)
|
||||
{
|
||||
simdscalar vClipMask = _simd_setzero_ps();
|
||||
uint32_t numClipDistance = _mm_popcnt_u32(clipMask);
|
||||
|
||||
for (uint32_t i = 0; i < numClipDistance; ++i)
|
||||
{
|
||||
// pull triangle clip distance values from clip buffer
|
||||
simdscalar vA = _simd_broadcast_ss(pUserClipBuffer++);
|
||||
simdscalar vB = _simd_broadcast_ss(pUserClipBuffer++);
|
||||
simdscalar vC = _simd_broadcast_ss(pUserClipBuffer++);
|
||||
|
||||
// interpolate
|
||||
simdscalar vInterp = vplaneps(vA, vB, vC, vI, vJ);
|
||||
|
||||
// clip if interpolated clip distance is < 0 || NAN
|
||||
simdscalar vCull = _simd_cmp_ps(_simd_setzero_ps(), vInterp, _CMP_NLE_UQ);
|
||||
|
||||
vClipMask = _simd_or_ps(vClipMask, vCull);
|
||||
}
|
||||
|
||||
return _simd_movemask_ps(vClipMask);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BESingleSampleBackend, pDC->drawId);
|
||||
AR_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
const API_STATE &state = GetApiState(pDC);
|
||||
|
||||
BarycentricCoeffs coeffs;
|
||||
SetupBarycentricCoeffs(&coeffs, work);
|
||||
|
||||
uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], *pDepthBuffer, *pStencilBuffer;
|
||||
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
|
||||
SetupPixelShaderContext<T>(&psContext, samplePos, work);
|
||||
|
||||
AR_END(BESetup, 1);
|
||||
|
||||
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
|
||||
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
|
||||
|
||||
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
|
||||
{
|
||||
psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
|
||||
const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
|
||||
|
||||
for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
|
||||
{
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
|
||||
|
||||
#endif
|
||||
simdmask coverageMask = work.coverageMask[0] & MASK;
|
||||
|
||||
if (coverageMask)
|
||||
{
|
||||
if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
|
||||
{
|
||||
static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
|
||||
|
||||
const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthBuffer));
|
||||
|
||||
const float minz = state.depthBoundsState.depthBoundsTestMinValue;
|
||||
const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
|
||||
|
||||
coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
|
||||
}
|
||||
|
||||
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
|
||||
{
|
||||
const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
|
||||
|
||||
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
|
||||
}
|
||||
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
|
||||
CalcPixelBarycentrics(coeffs, psContext);
|
||||
|
||||
CalcCentroid<T, true>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
|
||||
|
||||
// interpolate and quantize z
|
||||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
|
||||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
|
||||
|
||||
AR_END(BEBarycentric, 1);
|
||||
|
||||
// interpolate user clip distance if available
|
||||
if (state.rastState.clipDistanceMask)
|
||||
{
|
||||
coverageMask &= ~ComputeUserClipMask(state.rastState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.center, psContext.vJ.center);
|
||||
}
|
||||
|
||||
simdscalar vCoverageMask = vMask(coverageMask);
|
||||
simdscalar depthPassMask = vCoverageMask;
|
||||
simdscalar stencilPassMask = vCoverageMask;
|
||||
|
||||
// Early-Z?
|
||||
if (T::bCanEarlyZ)
|
||||
{
|
||||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
|
||||
AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
|
||||
AR_END(BEEarlyDepthTest, 0);
|
||||
|
||||
// early-exit if no pixels passed depth or earlyZ is forced on
|
||||
if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
|
||||
|
||||
if (!_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
goto Endtile;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
psContext.sampleIndex = 0;
|
||||
psContext.activeMask = _simd_castps_si(vCoverageMask);
|
||||
|
||||
// execute pixel shader
|
||||
AR_BEGIN(BEPixelShader, pDC->drawId);
|
||||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
|
||||
AR_END(BEPixelShader, 0);
|
||||
|
||||
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
|
||||
|
||||
// late-Z
|
||||
if (!T::bCanEarlyZ)
|
||||
{
|
||||
AR_BEGIN(BELateDepthTest, pDC->drawId);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
|
||||
AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
|
||||
AR_END(BELateDepthTest, 0);
|
||||
|
||||
if (!_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
// need to call depth/stencil write for stencil write
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
|
||||
goto Endtile;
|
||||
}
|
||||
} else {
|
||||
// for early z, consolidate discards from shader
|
||||
// into depthPassMask
|
||||
depthPassMask = _simd_and_ps(depthPassMask, vCoverageMask);
|
||||
}
|
||||
|
||||
uint32_t statMask = _simd_movemask_ps(depthPassMask);
|
||||
uint32_t statCount = _mm_popcnt_u32(statMask);
|
||||
UPDATE_STAT_BE(DepthPassCount, statCount);
|
||||
|
||||
// output merger
|
||||
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
OutputMerger8x2(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
|
||||
#else
|
||||
OutputMerger4x2(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets);
|
||||
#endif
|
||||
|
||||
// do final depth write after all pixel kills
|
||||
if (!state.psState.forceEarlyZ)
|
||||
{
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
|
||||
}
|
||||
AR_END(BEOutputMerger, 0);
|
||||
}
|
||||
|
||||
Endtile:
|
||||
AR_BEGIN(BEEndTile, pDC->drawId);
|
||||
|
||||
work.coverageMask[0] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
|
||||
{
|
||||
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
if (useAlternateOffset)
|
||||
{
|
||||
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
#endif
|
||||
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
|
||||
AR_END(BEEndTile, 0);
|
||||
|
||||
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
|
||||
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
|
||||
}
|
||||
|
||||
psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
|
||||
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
|
||||
}
|
||||
|
||||
AR_END(BESingleSampleBackend, 0);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BESampleRateBackend, pDC->drawId);
|
||||
AR_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
const API_STATE &state = GetApiState(pDC);
|
||||
|
||||
BarycentricCoeffs coeffs;
|
||||
SetupBarycentricCoeffs(&coeffs, work);
|
||||
|
||||
uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], *pDepthBuffer, *pStencilBuffer;
|
||||
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
|
||||
SetupPixelShaderContext<T>(&psContext, samplePos, work);
|
||||
|
||||
AR_END(BESetup, 0);
|
||||
|
||||
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
|
||||
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
|
||||
|
||||
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
|
||||
{
|
||||
psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
|
||||
const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
|
||||
|
||||
for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
|
||||
{
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
|
||||
|
||||
#endif
|
||||
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
|
||||
{
|
||||
const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
|
||||
|
||||
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
|
||||
}
|
||||
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
|
||||
CalcPixelBarycentrics(coeffs, psContext);
|
||||
|
||||
CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
|
||||
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
for (uint32_t sample = 0; sample < T::MultisampleT::numSamples; sample++)
|
||||
{
|
||||
simdmask coverageMask = work.coverageMask[sample] & MASK;
|
||||
|
||||
if (coverageMask)
|
||||
{
|
||||
// offset depth/stencil buffers current sample
|
||||
uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
|
||||
uint8_t *pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
|
||||
|
||||
if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
|
||||
{
|
||||
static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
|
||||
|
||||
const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
|
||||
|
||||
const float minz = state.depthBoundsState.depthBoundsTestMinValue;
|
||||
const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
|
||||
|
||||
coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
|
||||
}
|
||||
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
|
||||
// calculate per sample positions
|
||||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
|
||||
|
||||
CalcSampleBarycentrics(coeffs, psContext);
|
||||
|
||||
// interpolate and quantize z
|
||||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
|
||||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
|
||||
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
// interpolate user clip distance if available
|
||||
if (state.rastState.clipDistanceMask)
|
||||
{
|
||||
coverageMask &= ~ComputeUserClipMask(state.rastState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample);
|
||||
}
|
||||
|
||||
simdscalar vCoverageMask = vMask(coverageMask);
|
||||
simdscalar depthPassMask = vCoverageMask;
|
||||
simdscalar stencilPassMask = vCoverageMask;
|
||||
|
||||
// Early-Z?
|
||||
if (T::bCanEarlyZ)
|
||||
{
|
||||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||
AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
|
||||
AR_END(BEEarlyDepthTest, 0);
|
||||
|
||||
// early-exit if no samples passed depth or earlyZ is forced on.
|
||||
if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||
|
||||
if (!_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
psContext.sampleIndex = sample;
|
||||
psContext.activeMask = _simd_castps_si(vCoverageMask);
|
||||
|
||||
// execute pixel shader
|
||||
AR_BEGIN(BEPixelShader, pDC->drawId);
|
||||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
|
||||
AR_END(BEPixelShader, 0);
|
||||
|
||||
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
|
||||
|
||||
// late-Z
|
||||
if (!T::bCanEarlyZ)
|
||||
{
|
||||
AR_BEGIN(BELateDepthTest, pDC->drawId);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||
AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
|
||||
AR_END(BELateDepthTest, 0);
|
||||
|
||||
if (!_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
// need to call depth/stencil write for stencil write
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t statMask = _simd_movemask_ps(depthPassMask);
|
||||
uint32_t statCount = _mm_popcnt_u32(statMask);
|
||||
UPDATE_STAT_BE(DepthPassCount, statCount);
|
||||
|
||||
// output merger
|
||||
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
OutputMerger8x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
|
||||
#else
|
||||
OutputMerger4x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets);
|
||||
#endif
|
||||
|
||||
// do final depth write after all pixel kills
|
||||
if (!state.psState.forceEarlyZ)
|
||||
{
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||
}
|
||||
AR_END(BEOutputMerger, 0);
|
||||
}
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
|
||||
Endtile:
|
||||
ATTR_UNUSED;
|
||||
|
||||
AR_BEGIN(BEEndTile, pDC->drawId);
|
||||
|
||||
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
|
||||
{
|
||||
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
if (useAlternateOffset)
|
||||
{
|
||||
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
#endif
|
||||
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
|
||||
AR_END(BEEndTile, 0);
|
||||
|
||||
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
|
||||
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
|
||||
}
|
||||
|
||||
psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
|
||||
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
|
||||
}
|
||||
|
||||
AR_END(BESampleRateBackend, 0);
|
||||
}
|
||||
// optimized backend flow with NULL PS
|
||||
template<uint32_t sampleCountT>
|
||||
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
|
@ -977,7 +293,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
|||
UPDATE_STAT_BE(DepthPassCount, statCount);
|
||||
}
|
||||
|
||||
Endtile:
|
||||
Endtile:
|
||||
ATTR_UNUSED;
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
|
@ -994,17 +310,7 @@ Endtile:
|
|||
AR_END(BENullBackend, 0);
|
||||
}
|
||||
|
||||
void InitClearTilesTable()
|
||||
{
|
||||
memset(sClearTilesTable, 0, sizeof(sClearTilesTable));
|
||||
|
||||
sClearTilesTable[R8G8B8A8_UNORM] = ClearMacroTile<R8G8B8A8_UNORM>;
|
||||
sClearTilesTable[B8G8R8A8_UNORM] = ClearMacroTile<B8G8R8A8_UNORM>;
|
||||
sClearTilesTable[R32_FLOAT] = ClearMacroTile<R32_FLOAT>;
|
||||
sClearTilesTable[R32G32B32A32_FLOAT] = ClearMacroTile<R32G32B32A32_FLOAT>;
|
||||
sClearTilesTable[R8_UINT] = ClearMacroTile<R8_UINT>;
|
||||
}
|
||||
|
||||
PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS] = {};
|
||||
PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
|
||||
PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT]
|
||||
[2] // centroid
|
||||
|
@ -1023,113 +329,10 @@ PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
|
|||
[2] // canEarlyZ
|
||||
= {};
|
||||
|
||||
// Recursive template used to auto-nest conditionals. Converts dynamic enum function
|
||||
// arguments to static template arguments.
|
||||
template <uint32_t... ArgsT>
|
||||
struct BEChooser
|
||||
{
|
||||
// Last Arg Terminator
|
||||
static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
|
||||
{
|
||||
switch(tArg)
|
||||
{
|
||||
case SWR_BACKEND_SINGLE_SAMPLE: return BackendSingleSample<SwrBackendTraits<ArgsT...>>; break;
|
||||
case SWR_BACKEND_MSAA_PIXEL_RATE: return BackendPixelRate<SwrBackendTraits<ArgsT...>>; break;
|
||||
case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<SwrBackendTraits<ArgsT...>>; break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid backend func\n");
|
||||
return nullptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
switch(tArg)
|
||||
{
|
||||
case SWR_INPUT_COVERAGE_NONE: return BEChooser<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...); break;
|
||||
case SWR_INPUT_COVERAGE_NORMAL: return BEChooser<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(remainingArgs...); break;
|
||||
case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE: return BEChooser<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(remainingArgs...); break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid sample pattern\n");
|
||||
return BEChooser<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
switch(tArg)
|
||||
{
|
||||
case SWR_MULTISAMPLE_1X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_2X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_4X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_8X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_16X: return BEChooser<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid sample count\n");
|
||||
return BEChooser<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
if(tArg == true)
|
||||
{
|
||||
return BEChooser<ArgsT..., 1>::GetFunc(remainingArgs...);
|
||||
}
|
||||
|
||||
return BEChooser<ArgsT..., 0>::GetFunc(remainingArgs...);
|
||||
}
|
||||
};
|
||||
|
||||
void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COUNT][2][2])
|
||||
{
|
||||
for(uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
|
||||
{
|
||||
for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
|
||||
{
|
||||
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
|
||||
{
|
||||
table[inputCoverage][isCentroid][canEarlyZ] =
|
||||
BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, false, (SWR_INPUT_COVERAGE)inputCoverage,
|
||||
(isCentroid > 0), false, (canEarlyZ > 0), SWR_BACKEND_SINGLE_SAMPLE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2])
|
||||
{
|
||||
for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_COUNT; sampleCount++)
|
||||
{
|
||||
for(uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
|
||||
{
|
||||
for(uint32_t centroid = 0; centroid < 2; centroid++)
|
||||
{
|
||||
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
|
||||
{
|
||||
table[sampleCount][inputCoverage][centroid][canEarlyZ] =
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, false, (SWR_INPUT_COVERAGE)inputCoverage,
|
||||
(centroid > 0), false, (canEarlyZ > 0), (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InitBackendPixelRate0();
|
||||
void InitBackendFuncTables()
|
||||
{
|
||||
InitBackendPixelRate();
|
||||
InitBackendSingleFuncTable(gBackendSingleSample);
|
||||
InitBackendPixelRate0();
|
||||
InitBackendSampleFuncTable(gBackendSampleRateTable);
|
||||
|
||||
gBackendNullPs[SWR_MULTISAMPLE_1X] = &BackendNullPS < SWR_MULTISAMPLE_1X > ;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,281 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file backend.cpp
|
||||
*
|
||||
* @brief Backend handles rasterization, pixel shading and output merger
|
||||
* operations.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include "backend.h"
|
||||
#include "backend_impl.h"
|
||||
#include "tilemgr.h"
|
||||
#include "memory/tilingtraits.h"
|
||||
#include "core/multisample.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
template<SWR_FORMAT format>
|
||||
void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
|
||||
{
|
||||
auto lambda = [&](int32_t comp)
|
||||
{
|
||||
FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
|
||||
|
||||
pTileBuffer += (KNOB_SIMD_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
|
||||
};
|
||||
|
||||
const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD_TILE_X_DIM);
|
||||
|
||||
for (uint32_t i = 0; i < numIter; ++i)
|
||||
{
|
||||
UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
template<SWR_FORMAT format>
|
||||
void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value)
|
||||
{
|
||||
auto lambda = [&](int32_t comp)
|
||||
{
|
||||
FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
|
||||
|
||||
pTileBuffer += (KNOB_SIMD16_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
|
||||
};
|
||||
|
||||
const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM);
|
||||
|
||||
for (uint32_t i = 0; i < numIter; ++i)
|
||||
{
|
||||
UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
template<SWR_FORMAT format>
|
||||
INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
|
||||
{
|
||||
// convert clear color to hottile format
|
||||
// clear color is in RGBA float/uint32
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
simd16vector vClear;
|
||||
for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
|
||||
{
|
||||
simd16scalar vComp;
|
||||
vComp = _simd16_load1_ps((const float*)&clear[comp]);
|
||||
if (FormatTraits<format>::isNormalized(comp))
|
||||
{
|
||||
vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<format>::fromFloat(comp)));
|
||||
vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp));
|
||||
}
|
||||
vComp = FormatTraits<format>::pack(comp, vComp);
|
||||
vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
|
||||
}
|
||||
|
||||
#else
|
||||
simdvector vClear;
|
||||
for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
|
||||
{
|
||||
simdscalar vComp;
|
||||
vComp = _simd_load1_ps((const float*)&clear[comp]);
|
||||
if (FormatTraits<format>::isNormalized(comp))
|
||||
{
|
||||
vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<format>::fromFloat(comp)));
|
||||
vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp));
|
||||
}
|
||||
vComp = FormatTraits<format>::pack(comp, vComp);
|
||||
vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
|
||||
}
|
||||
|
||||
#endif
|
||||
uint32_t tileX, tileY;
|
||||
MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
|
||||
|
||||
// Init to full macrotile
|
||||
SWR_RECT clearTile =
|
||||
{
|
||||
KNOB_MACROTILE_X_DIM * int32_t(tileX),
|
||||
KNOB_MACROTILE_Y_DIM * int32_t(tileY),
|
||||
KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
|
||||
KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1),
|
||||
};
|
||||
|
||||
// intersect with clear rect
|
||||
clearTile &= rect;
|
||||
|
||||
// translate to local hottile origin
|
||||
clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM, -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
|
||||
|
||||
// Make maximums inclusive (needed for convert to raster tiles)
|
||||
clearTile.xmax -= 1;
|
||||
clearTile.ymax -= 1;
|
||||
|
||||
// convert to raster tiles
|
||||
clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT);
|
||||
clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT);
|
||||
clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT);
|
||||
clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT);
|
||||
|
||||
const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
|
||||
// compute steps between raster tile samples / raster tiles / macro tile rows
|
||||
const uint32_t rasterTileSampleStep = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
|
||||
const uint32_t rasterTileStep = (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
|
||||
const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
|
||||
const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
|
||||
|
||||
HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples, renderTargetArrayIndex);
|
||||
uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
|
||||
uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
|
||||
|
||||
// loop over all raster tiles in the current hot tile
|
||||
for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
|
||||
{
|
||||
uint8_t* pRasterTile = pRasterTileRow;
|
||||
for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
|
||||
{
|
||||
for( int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
|
||||
{
|
||||
ClearRasterTile<format>(pRasterTile, vClear);
|
||||
pRasterTile += rasterTileSampleStep;
|
||||
}
|
||||
}
|
||||
pRasterTileRow += macroTileRowStep;
|
||||
}
|
||||
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
}
|
||||
|
||||
|
||||
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
if (KNOB_FAST_CLEAR)
|
||||
{
|
||||
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
|
||||
SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
|
||||
uint32_t numSamples = GetNumSamples(sampleCount);
|
||||
|
||||
SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
|
||||
|
||||
AR_BEGIN(BEClear, pDC->drawId);
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
|
||||
{
|
||||
unsigned long rt = 0;
|
||||
uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
|
||||
while (_BitScanForward(&rt, mask))
|
||||
{
|
||||
mask &= ~(1 << rt);
|
||||
|
||||
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
|
||||
|
||||
// All we want to do here is to mark the hot tile as being in a "needs clear" state.
|
||||
pHotTile->clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
|
||||
pHotTile->clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]);
|
||||
pHotTile->clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]);
|
||||
pHotTile->clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]);
|
||||
pHotTile->state = HOTTILE_CLEAR;
|
||||
}
|
||||
}
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
|
||||
{
|
||||
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
|
||||
pHotTile->clearData[0] = *(DWORD*)&pClear->clearDepth;
|
||||
pHotTile->state = HOTTILE_CLEAR;
|
||||
}
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
|
||||
{
|
||||
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
|
||||
|
||||
pHotTile->clearData[0] = pClear->clearStencil;
|
||||
pHotTile->state = HOTTILE_CLEAR;
|
||||
}
|
||||
|
||||
AR_END(BEClear, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Legacy clear
|
||||
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
|
||||
AR_BEGIN(BEClear, pDC->drawId);
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
|
||||
{
|
||||
DWORD clearData[4];
|
||||
clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
|
||||
clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]);
|
||||
clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]);
|
||||
clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]);
|
||||
|
||||
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
|
||||
SWR_ASSERT(pfnClearTiles != nullptr);
|
||||
|
||||
unsigned long rt = 0;
|
||||
uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
|
||||
while (_BitScanForward(&rt, mask))
|
||||
{
|
||||
mask &= ~(1 << rt);
|
||||
|
||||
pfnClearTiles(pDC, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
|
||||
}
|
||||
}
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
|
||||
{
|
||||
DWORD clearData[4];
|
||||
clearData[0] = *(DWORD*)&pClear->clearDepth;
|
||||
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
|
||||
SWR_ASSERT(pfnClearTiles != nullptr);
|
||||
|
||||
pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
|
||||
}
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
|
||||
{
|
||||
DWORD clearData[4];
|
||||
clearData[0] = pClear->clearStencil;
|
||||
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
|
||||
|
||||
pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
|
||||
}
|
||||
|
||||
AR_END(BEClear, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void InitClearTilesTable()
|
||||
{
|
||||
memset(gClearTilesTable, 0, sizeof(gClearTilesTable));
|
||||
|
||||
gClearTilesTable[R8G8B8A8_UNORM] = ClearMacroTile<R8G8B8A8_UNORM>;
|
||||
gClearTilesTable[B8G8R8A8_UNORM] = ClearMacroTile<B8G8R8A8_UNORM>;
|
||||
gClearTilesTable[R32_FLOAT] = ClearMacroTile<R32_FLOAT>;
|
||||
gClearTilesTable[R32G32B32A32_FLOAT] = ClearMacroTile<R32G32B32A32_FLOAT>;
|
||||
gClearTilesTable[R8_UINT] = ClearMacroTile<R8_UINT>;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,345 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file backend.cpp
|
||||
*
|
||||
* @brief Backend handles rasterization, pixel shading and output merger
|
||||
* operations.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include "backend.h"
|
||||
#include "backend_impl.h"
|
||||
#include "tilemgr.h"
|
||||
#include "memory/tilingtraits.h"
|
||||
#include "core/multisample.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
template<typename T>
|
||||
void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BESampleRateBackend, pDC->drawId);
|
||||
AR_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
const API_STATE &state = GetApiState(pDC);
|
||||
|
||||
BarycentricCoeffs coeffs;
|
||||
SetupBarycentricCoeffs(&coeffs, work);
|
||||
|
||||
uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], *pDepthBuffer, *pStencilBuffer;
|
||||
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
|
||||
SetupPixelShaderContext<T>(&psContext, samplePos, work);
|
||||
|
||||
AR_END(BESetup, 0);
|
||||
|
||||
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
|
||||
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
|
||||
|
||||
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
|
||||
{
|
||||
psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
|
||||
const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
|
||||
|
||||
for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
|
||||
{
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
|
||||
|
||||
#endif
|
||||
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
|
||||
{
|
||||
const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
|
||||
|
||||
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
|
||||
}
|
||||
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
|
||||
CalcPixelBarycentrics(coeffs, psContext);
|
||||
|
||||
CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
|
||||
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
for (uint32_t sample = 0; sample < T::MultisampleT::numSamples; sample++)
|
||||
{
|
||||
simdmask coverageMask = work.coverageMask[sample] & MASK;
|
||||
|
||||
if (coverageMask)
|
||||
{
|
||||
// offset depth/stencil buffers current sample
|
||||
uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
|
||||
uint8_t *pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
|
||||
|
||||
if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
|
||||
{
|
||||
static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
|
||||
|
||||
const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
|
||||
|
||||
const float minz = state.depthBoundsState.depthBoundsTestMinValue;
|
||||
const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
|
||||
|
||||
coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
|
||||
}
|
||||
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
|
||||
// calculate per sample positions
|
||||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
|
||||
|
||||
CalcSampleBarycentrics(coeffs, psContext);
|
||||
|
||||
// interpolate and quantize z
|
||||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
|
||||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
|
||||
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
// interpolate user clip distance if available
|
||||
if (state.rastState.clipDistanceMask)
|
||||
{
|
||||
coverageMask &= ~ComputeUserClipMask(state.rastState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample);
|
||||
}
|
||||
|
||||
simdscalar vCoverageMask = vMask(coverageMask);
|
||||
simdscalar depthPassMask = vCoverageMask;
|
||||
simdscalar stencilPassMask = vCoverageMask;
|
||||
|
||||
// Early-Z?
|
||||
if (T::bCanEarlyZ)
|
||||
{
|
||||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||
AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
|
||||
AR_END(BEEarlyDepthTest, 0);
|
||||
|
||||
// early-exit if no samples passed depth or earlyZ is forced on.
|
||||
if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||
|
||||
if (!_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
psContext.sampleIndex = sample;
|
||||
psContext.activeMask = _simd_castps_si(vCoverageMask);
|
||||
|
||||
// execute pixel shader
|
||||
AR_BEGIN(BEPixelShader, pDC->drawId);
|
||||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
|
||||
AR_END(BEPixelShader, 0);
|
||||
|
||||
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
|
||||
|
||||
// late-Z
|
||||
if (!T::bCanEarlyZ)
|
||||
{
|
||||
AR_BEGIN(BELateDepthTest, pDC->drawId);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||
AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
|
||||
AR_END(BELateDepthTest, 0);
|
||||
|
||||
if (!_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
// need to call depth/stencil write for stencil write
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t statMask = _simd_movemask_ps(depthPassMask);
|
||||
uint32_t statCount = _mm_popcnt_u32(statMask);
|
||||
UPDATE_STAT_BE(DepthPassCount, statCount);
|
||||
|
||||
// output merger
|
||||
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
OutputMerger8x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
|
||||
#else
|
||||
OutputMerger4x2(psContext, pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets);
|
||||
#endif
|
||||
|
||||
// do final depth write after all pixel kills
|
||||
if (!state.psState.forceEarlyZ)
|
||||
{
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||
}
|
||||
AR_END(BEOutputMerger, 0);
|
||||
}
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
|
||||
Endtile:
|
||||
ATTR_UNUSED;
|
||||
|
||||
AR_BEGIN(BEEndTile, pDC->drawId);
|
||||
|
||||
if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
|
||||
{
|
||||
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
if (useAlternateOffset)
|
||||
{
|
||||
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
#endif
|
||||
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
|
||||
AR_END(BEEndTile, 0);
|
||||
|
||||
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
|
||||
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
|
||||
}
|
||||
|
||||
psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
|
||||
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
|
||||
}
|
||||
|
||||
AR_END(BESampleRateBackend, 0);
|
||||
}
|
||||
|
||||
// Recursive template used to auto-nest conditionals. Converts dynamic enum function
|
||||
// arguments to static template arguments.
|
||||
template <uint32_t... ArgsT>
|
||||
struct BEChooserSampleRate
|
||||
{
|
||||
// Last Arg Terminator
|
||||
static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
|
||||
{
|
||||
switch (tArg)
|
||||
{
|
||||
case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<SwrBackendTraits<ArgsT...>>; break;
|
||||
case SWR_BACKEND_SINGLE_SAMPLE:
|
||||
case SWR_BACKEND_MSAA_PIXEL_RATE:
|
||||
SWR_ASSERT(0 && "Invalid backend func\n");
|
||||
return nullptr;
|
||||
break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid backend func\n");
|
||||
return nullptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
switch (tArg)
|
||||
{
|
||||
case SWR_INPUT_COVERAGE_NONE: return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...); break;
|
||||
case SWR_INPUT_COVERAGE_NORMAL: return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(remainingArgs...); break;
|
||||
case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE: return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(remainingArgs...); break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid sample pattern\n");
|
||||
return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
switch (tArg)
|
||||
{
|
||||
case SWR_MULTISAMPLE_1X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_2X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_4X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_8X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_16X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid sample count\n");
|
||||
return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
if (tArg == true)
|
||||
{
|
||||
return BEChooserSampleRate<ArgsT..., 1>::GetFunc(remainingArgs...);
|
||||
}
|
||||
|
||||
return BEChooserSampleRate<ArgsT..., 0>::GetFunc(remainingArgs...);
|
||||
}
|
||||
};
|
||||
|
||||
void InitBackendSampleFuncTable(PFN_BACKEND_FUNC(&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2])
|
||||
{
|
||||
for (uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_COUNT; sampleCount++)
|
||||
{
|
||||
for (uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
|
||||
{
|
||||
for (uint32_t centroid = 0; centroid < 2; centroid++)
|
||||
{
|
||||
for (uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
|
||||
{
|
||||
table[sampleCount][inputCoverage][centroid][canEarlyZ] =
|
||||
BEChooserSampleRate<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, false, (SWR_INPUT_COVERAGE)inputCoverage,
|
||||
(centroid > 0), false, (canEarlyZ > 0), (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,321 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file backend.cpp
|
||||
*
|
||||
* @brief Backend handles rasterization, pixel shading and output merger
|
||||
* operations.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include "backend.h"
|
||||
#include "backend_impl.h"
|
||||
#include "tilemgr.h"
|
||||
#include "memory/tilingtraits.h"
|
||||
#include "core/multisample.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
template<typename T>
|
||||
void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BESingleSampleBackend, pDC->drawId);
|
||||
AR_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
const API_STATE &state = GetApiState(pDC);
|
||||
|
||||
BarycentricCoeffs coeffs;
|
||||
SetupBarycentricCoeffs(&coeffs, work);
|
||||
|
||||
uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], *pDepthBuffer, *pStencilBuffer;
|
||||
SetupRenderBuffers(pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.psState.numRenderTargets, renderBuffers);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
|
||||
SetupPixelShaderContext<T>(&psContext, samplePos, work);
|
||||
|
||||
AR_END(BESetup, 1);
|
||||
|
||||
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
|
||||
|
||||
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
|
||||
|
||||
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
|
||||
{
|
||||
psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
|
||||
|
||||
const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
|
||||
|
||||
for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
|
||||
{
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
|
||||
|
||||
#endif
|
||||
simdmask coverageMask = work.coverageMask[0] & MASK;
|
||||
|
||||
if (coverageMask)
|
||||
{
|
||||
if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
|
||||
{
|
||||
static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
|
||||
|
||||
const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthBuffer));
|
||||
|
||||
const float minz = state.depthBoundsState.depthBoundsTestMinValue;
|
||||
const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
|
||||
|
||||
coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
|
||||
}
|
||||
|
||||
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
|
||||
{
|
||||
const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
|
||||
|
||||
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
|
||||
}
|
||||
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
|
||||
CalcPixelBarycentrics(coeffs, psContext);
|
||||
|
||||
CalcCentroid<T, true>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
|
||||
|
||||
// interpolate and quantize z
|
||||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
|
||||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
|
||||
|
||||
AR_END(BEBarycentric, 1);
|
||||
|
||||
// interpolate user clip distance if available
|
||||
if (state.rastState.clipDistanceMask)
|
||||
{
|
||||
coverageMask &= ~ComputeUserClipMask(state.rastState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.center, psContext.vJ.center);
|
||||
}
|
||||
|
||||
simdscalar vCoverageMask = vMask(coverageMask);
|
||||
simdscalar depthPassMask = vCoverageMask;
|
||||
simdscalar stencilPassMask = vCoverageMask;
|
||||
|
||||
// Early-Z?
|
||||
if (T::bCanEarlyZ)
|
||||
{
|
||||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
|
||||
AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
|
||||
AR_END(BEEarlyDepthTest, 0);
|
||||
|
||||
// early-exit if no pixels passed depth or earlyZ is forced on
|
||||
if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
|
||||
|
||||
if (!_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
goto Endtile;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
psContext.sampleIndex = 0;
|
||||
psContext.activeMask = _simd_castps_si(vCoverageMask);
|
||||
|
||||
// execute pixel shader
|
||||
AR_BEGIN(BEPixelShader, pDC->drawId);
|
||||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
|
||||
AR_END(BEPixelShader, 0);
|
||||
|
||||
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
|
||||
|
||||
// late-Z
|
||||
if (!T::bCanEarlyZ)
|
||||
{
|
||||
AR_BEGIN(BELateDepthTest, pDC->drawId);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
|
||||
AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
|
||||
AR_END(BELateDepthTest, 0);
|
||||
|
||||
if (!_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
// need to call depth/stencil write for stencil write
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
|
||||
goto Endtile;
|
||||
}
|
||||
} else {
|
||||
// for early z, consolidate discards from shader
|
||||
// into depthPassMask
|
||||
depthPassMask = _simd_and_ps(depthPassMask, vCoverageMask);
|
||||
}
|
||||
|
||||
uint32_t statMask = _simd_movemask_ps(depthPassMask);
|
||||
uint32_t statCount = _mm_popcnt_u32(statMask);
|
||||
UPDATE_STAT_BE(DepthPassCount, statCount);
|
||||
|
||||
// output merger
|
||||
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
OutputMerger8x2(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets, state.colorHottileEnable, useAlternateOffset);
|
||||
#else
|
||||
OutputMerger4x2(psContext, pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.numRenderTargets);
|
||||
#endif
|
||||
|
||||
// do final depth write after all pixel kills
|
||||
if (!state.psState.forceEarlyZ)
|
||||
{
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
|
||||
}
|
||||
AR_END(BEOutputMerger, 0);
|
||||
}
|
||||
|
||||
Endtile:
|
||||
AR_BEGIN(BEEndTile, pDC->drawId);
|
||||
|
||||
work.coverageMask[0] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
|
||||
{
|
||||
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
|
||||
#if USE_8x2_TILE_BACKEND
|
||||
if (useAlternateOffset)
|
||||
{
|
||||
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (uint32_t rt = 0; rt < state.psState.numRenderTargets; ++rt)
|
||||
{
|
||||
pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
#endif
|
||||
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
|
||||
AR_END(BEEndTile, 0);
|
||||
|
||||
psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
|
||||
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
|
||||
}
|
||||
|
||||
psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
|
||||
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
|
||||
}
|
||||
|
||||
AR_END(BESingleSampleBackend, 0);
|
||||
}
|
||||
|
||||
// Recursive template used to auto-nest conditionals. Converts dynamic enum function
|
||||
// arguments to static template arguments.
|
||||
template <uint32_t... ArgsT>
|
||||
struct BEChooserSingleSample
|
||||
{
|
||||
// Last Arg Terminator
|
||||
static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
|
||||
{
|
||||
switch(tArg)
|
||||
{
|
||||
case SWR_BACKEND_SINGLE_SAMPLE: return BackendSingleSample<SwrBackendTraits<ArgsT...>>; break;
|
||||
case SWR_BACKEND_MSAA_PIXEL_RATE:
|
||||
case SWR_BACKEND_MSAA_SAMPLE_RATE:
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid backend func\n");
|
||||
return nullptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
switch(tArg)
|
||||
{
|
||||
case SWR_INPUT_COVERAGE_NONE: return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...); break;
|
||||
case SWR_INPUT_COVERAGE_NORMAL: return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(remainingArgs...); break;
|
||||
case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE: return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(remainingArgs...); break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid sample pattern\n");
|
||||
return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
switch(tArg)
|
||||
{
|
||||
case SWR_MULTISAMPLE_1X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_2X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_4X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_8X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
|
||||
case SWR_MULTISAMPLE_16X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
|
||||
default:
|
||||
SWR_ASSERT(0 && "Invalid sample count\n");
|
||||
return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively parse args
|
||||
template <typename... TArgsT>
|
||||
static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs)
|
||||
{
|
||||
if(tArg == true)
|
||||
{
|
||||
return BEChooserSingleSample<ArgsT..., 1>::GetFunc(remainingArgs...);
|
||||
}
|
||||
|
||||
return BEChooserSingleSample<ArgsT..., 0>::GetFunc(remainingArgs...);
|
||||
}
|
||||
};
|
||||
|
||||
void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COUNT][2][2])
|
||||
{
|
||||
for(uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
|
||||
{
|
||||
for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
|
||||
{
|
||||
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
|
||||
{
|
||||
table[inputCoverage][isCentroid][canEarlyZ] =
|
||||
BEChooserSingleSample<>::GetFunc(SWR_MULTISAMPLE_1X, false, (SWR_INPUT_COVERAGE)inputCoverage,
|
||||
(isCentroid > 0), false, (canEarlyZ > 0), SWR_BACKEND_SINGLE_SAMPLE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue