swr/rast: Thread locked tiles improvement

- Change tilemgr TILE_ID encoding to use Morton-order (Z-order).
- Change locked tiles set to bitset.  Makes clear, set, get much faster.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
George Kyriazis 2018-05-01 19:33:38 -05:00
parent 8238c791dc
commit 4e52cb51b5
7 changed files with 153 additions and 25 deletions

View File

@ -42,6 +42,7 @@
#include "core/tilemgr.h"
#include "core/clip.h"
#include "core/utils.h"
#include "core/tileset.h"
#include "common/os.h"
@ -139,6 +140,11 @@ HANDLE SwrCreateContext(
BindApiThread(pContext, 0);
}
if (pContext->threadInfo.SINGLE_THREADED)
{
pContext->pSingleThreadLockedTiles = new TileSet();
}
pContext->ppScratch = new uint8_t*[pContext->NumWorkerThreads];
pContext->pStats = (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * pContext->NumWorkerThreads, 64);
@ -245,7 +251,7 @@ void QueueWork(SWR_CONTEXT *pContext)
{
uint32_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId };
WorkOnFifoFE(pContext, 0, curDraw[0]);
WorkOnFifoBE(pContext, 0, curDraw[1], pContext->singleThreadLockedTiles, 0, 0);
WorkOnFifoBE(pContext, 0, curDraw[1], *pContext->pSingleThreadLockedTiles, 0, 0);
}
else
{
@ -427,7 +433,8 @@ void SwrDestroyContext(HANDLE hContext)
delete[] pContext->ppScratch;
AlignedFree(pContext->pStats);
delete(pContext->pHotTileMgr);
delete pContext->pHotTileMgr;
delete pContext->pSingleThreadLockedTiles;
pContext->~SWR_CONTEXT();
AlignedFree(GetContext(hContext));

View File

@ -516,7 +516,7 @@ struct SWR_CONTEXT
uint32_t lastFrameChecked;
uint64_t lastDrawChecked;
TileSet singleThreadLockedTiles;
TileSet* pSingleThreadLockedTiles;
// ArchRast thread contexts.
HANDLE* pArContext;

View File

@ -49,6 +49,7 @@
#include "rasterizer.h"
#include "rdtsc_core.h"
#include "tilemgr.h"
#include "tileset.h"
@ -587,7 +588,7 @@ bool WorkOnFifoBE(
}
// can only work on this draw if it's not in use by other threads
if (lockedTiles.find(tileID) != lockedTiles.end())
if (lockedTiles.get(tileID))
{
continue;
}
@ -645,7 +646,7 @@ bool WorkOnFifoBE(
else
{
// This tile is already locked. So let's add it to our locked tiles set. This way we don't try locking this one again.
lockedTiles.insert(tileID);
lockedTiles.set(tileID);
}
}
}

View File

@ -62,7 +62,7 @@ struct THREAD_POOL
THREAD_DATA *pApiThreadData;
};
typedef std::unordered_set<uint32_t> TileSet;
struct TileSet;
void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool);

View File

@ -33,8 +33,6 @@
#include "core/multisample.h"
#include "rdtsc_core.h"
#define TILE_ID(x,y) ((x << 16 | y))
MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena)
{
}
@ -50,26 +48,35 @@ void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK *pWork)
return;
}
uint32_t id = TILE_ID(x, y);
uint32_t id = getTileId(x, y);
MacroTileQueue &tile = mTiles[id];
tile.mWorkItemsFE++;
tile.mId = id;
if (tile.mWorkItemsFE == 1)
if (id >= mTiles.size())
{
tile.clear(mArena);
mDirtyTiles.push_back(&tile);
mTiles.resize((16 + id) * 2);
}
MacroTileQueue *pTile = mTiles[id];
if (!pTile)
{
pTile = mTiles[id] = new MacroTileQueue();
}
pTile->mWorkItemsFE++;
pTile->mId = id;
if (pTile->mWorkItemsFE == 1)
{
pTile->clear(mArena);
mDirtyTiles.push_back(pTile);
}
mWorkItemsProduced++;
tile.enqueue_try_nosync(mArena, pWork);
pTile->enqueue_try_nosync(mArena, pWork);
}
void MacroTileMgr::markTileComplete(uint32_t id)
{
SWR_ASSERT(mTiles.find(id) != mTiles.end());
MacroTileQueue &tile = mTiles[id];
SWR_ASSERT(mTiles.size() > id);
MacroTileQueue &tile = *mTiles[id];
uint32_t numTiles = tile.mWorkItemsFE;
InterlockedExchangeAdd(&mWorkItemsConsumed, numTiles);

View File

@ -31,6 +31,7 @@
#include <set>
#include <unordered_map>
#include "common/formats.h"
#include "common/intrin.h"
#include "fifo.hpp"
#include "context.h"
#include "format_traits.h"
@ -41,7 +42,7 @@
struct MacroTileQueue
{
MacroTileQueue() { }
~MacroTileQueue() { }
~MacroTileQueue() { destroy(); }
//////////////////////////////////////////////////////////////////////////
/// @brief Returns number of work items queued for this tile.
@ -110,9 +111,9 @@ public:
MacroTileMgr(CachingArena& arena);
~MacroTileMgr()
{
for (auto &tile : mTiles)
for (auto *pTile : mTiles)
{
tile.second.destroy();
delete pTile;
}
}
@ -136,13 +137,20 @@ public:
static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y)
{
y = tileID & 0xffff;
x = (tileID >> 16) & 0xffff;
// Morton / Z order of tiles
x = pext_u32(tileID, 0x55555555);
y = pext_u32(tileID, 0xAAAAAAAA);
}
static INLINE uint32_t getTileId(uint32_t x, uint32_t y)
{
// Morton / Z order of tiles
return pdep_u32(x, 0x55555555) | pdep_u32(y, 0xAAAAAAAA);
}
private:
CachingArena& mArena;
std::unordered_map<uint32_t, MacroTileQueue> mTiles;
std::vector<MacroTileQueue*> mTiles;
// Any tile that has work queued to it is a dirty tile.
std::vector<MacroTileQueue*> mDirtyTiles;

View File

@ -0,0 +1,105 @@
/****************************************************************************
* Copyright (C) 2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* @file tileset.h
*
* @brief Custom bitset class for managing locked tiles
*
******************************************************************************/
#pragma once
struct TileSet
{
~TileSet()
{
if (m_bits)
{
AlignedFree(m_bits);
}
}
INLINE void set(size_t idx)
{
_grow(idx);
size_t& word = _get_word(idx);
word |= (size_t(1) << (idx & BITS_OFFSET));
m_maxSet = std::max(m_maxSet, idx + 1);
}
INLINE bool get(size_t idx)
{
if (idx >= m_size)
{
return false;
}
size_t word = _get_word(idx);
return 0 != (word & (size_t(1) << (idx & BITS_OFFSET)));
}
INLINE void clear()
{
if (m_maxSet)
{
size_t num_words = (m_maxSet + BITS_OFFSET) / BITS_PER_WORD;
memset(m_bits, 0, sizeof(size_t) * num_words);
m_maxSet = 0;
}
}
private:
static const size_t BITS_PER_WORD = sizeof(size_t) * 8;
static const size_t BITS_OFFSET = BITS_PER_WORD - 1;
size_t m_size = 0;
size_t m_maxSet = 0;
size_t* m_bits = nullptr;
INLINE size_t& _get_word(size_t idx)
{
return m_bits[idx / BITS_PER_WORD];
}
void _grow(size_t idx)
{
if (idx < m_size)
{
return;
}
size_t new_size = (1 + idx + BITS_OFFSET) & ~BITS_OFFSET;
size_t num_words = new_size / BITS_PER_WORD;
size_t* newBits = (size_t*)AlignedMalloc(sizeof(size_t) * num_words, 64);
size_t copy_words = 0;
if (m_bits)
{
copy_words = (m_size + BITS_OFFSET) / BITS_PER_WORD;
num_words -= copy_words;
memcpy(newBits, m_bits, copy_words * sizeof(size_t));
AlignedFree(m_bits);
}
m_bits = newBits;
m_size = new_size;
memset(&m_bits[copy_words], 0, sizeof(size_t) * num_words);
}
};