gallium/swr: add OpenSWR rasterizer
Acked-by: Roland Scheidegger <sroland@vmware.com> Acked-by: Jose Fonseca <jfonseca@vmware.com>
This commit is contained in:
parent
2b2d3680bf
commit
c6e67f5a93
|
@ -0,0 +1,208 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef SWRLIB_CONTAINERS_HPP__
|
||||
#define SWRLIB_CONTAINERS_HPP__
|
||||
|
||||
#include <functional>
|
||||
#include "common/os.h"
|
||||
|
||||
namespace SWRL
|
||||
{
|
||||
|
||||
template <typename T, int NUM_ELEMENTS>
|
||||
struct UncheckedFixedVector
|
||||
{
|
||||
UncheckedFixedVector() : mSize(0)
|
||||
{
|
||||
}
|
||||
|
||||
UncheckedFixedVector(std::size_t size, T const& exemplar)
|
||||
{
|
||||
this->mSize = 0;
|
||||
for (std::size_t i = 0; i < size; ++i)
|
||||
this->push_back(exemplar);
|
||||
}
|
||||
|
||||
template <typename Iter>
|
||||
UncheckedFixedVector(Iter fst, Iter lst)
|
||||
{
|
||||
this->mSize = 0;
|
||||
for ( ; fst != lst; ++fst)
|
||||
this->push_back(*fst);
|
||||
}
|
||||
|
||||
UncheckedFixedVector(UncheckedFixedVector const& UFV)
|
||||
{
|
||||
this->mSize = 0;
|
||||
for (std::size_t i = 0, N = UFV.size(); i < N; ++i)
|
||||
(*this)[i] = UFV[i];
|
||||
this->mSize = UFV.size();
|
||||
}
|
||||
|
||||
UncheckedFixedVector& operator=(UncheckedFixedVector const& UFV)
|
||||
{
|
||||
for (std::size_t i = 0, N = UFV.size(); i < N; ++i)
|
||||
(*this)[i] = UFV[i];
|
||||
this->mSize = UFV.size();
|
||||
return *this;
|
||||
}
|
||||
|
||||
T* begin() { return &this->mElements[0]; }
|
||||
T* end() { return &this->mElements[0] + this->mSize; }
|
||||
T const* begin() const { return &this->mElements[0]; }
|
||||
T const* end() const { return &this->mElements[0] + this->mSize; }
|
||||
|
||||
friend bool operator==(UncheckedFixedVector const& L, UncheckedFixedVector const& R)
|
||||
{
|
||||
if (L.size() != R.size()) return false;
|
||||
for (std::size_t i = 0, N = L.size(); i < N; ++i)
|
||||
{
|
||||
if (L[i] != R[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
friend bool operator!=(UncheckedFixedVector const& L, UncheckedFixedVector const& R)
|
||||
{
|
||||
if (L.size() != R.size()) return true;
|
||||
for (std::size_t i = 0, N = L.size(); i < N; ++i)
|
||||
{
|
||||
if (L[i] != R[i]) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
T& operator[](std::size_t idx)
|
||||
{
|
||||
return this->mElements[idx];
|
||||
}
|
||||
T const& operator[](std::size_t idx) const
|
||||
{
|
||||
return this->mElements[idx];
|
||||
}
|
||||
void push_back(T const& t)
|
||||
{
|
||||
this->mElements[this->mSize] = t;
|
||||
++this->mSize;
|
||||
}
|
||||
void pop_back()
|
||||
{
|
||||
SWR_ASSERT(this->mSize > 0);
|
||||
--this->mSize;
|
||||
}
|
||||
T& back()
|
||||
{
|
||||
return this->mElements[this->mSize-1];
|
||||
}
|
||||
T const& back() const
|
||||
{
|
||||
return this->mElements[this->mSize-1];
|
||||
}
|
||||
bool empty() const
|
||||
{
|
||||
return this->mSize == 0;
|
||||
}
|
||||
std::size_t size() const
|
||||
{
|
||||
return this->mSize;
|
||||
}
|
||||
void resize(std::size_t sz)
|
||||
{
|
||||
this->mSize = sz;
|
||||
}
|
||||
void clear()
|
||||
{
|
||||
this->resize(0);
|
||||
}
|
||||
private:
|
||||
std::size_t mSize;
|
||||
T mElements[NUM_ELEMENTS];
|
||||
};
|
||||
|
||||
template <typename T, int NUM_ELEMENTS>
|
||||
struct FixedStack : UncheckedFixedVector<T, NUM_ELEMENTS>
|
||||
{
|
||||
FixedStack() {}
|
||||
|
||||
void push(T const& t)
|
||||
{
|
||||
this->push_back(t);
|
||||
}
|
||||
|
||||
void pop()
|
||||
{
|
||||
this->pop_back();
|
||||
}
|
||||
|
||||
T& top()
|
||||
{
|
||||
return this->back();
|
||||
}
|
||||
|
||||
T const& top() const
|
||||
{
|
||||
return this->back();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct CRCHash
|
||||
{
|
||||
static_assert((sizeof(T) % sizeof(UINT)) == 0, "CRCHash expects templated type size is even multiple of 4B");
|
||||
UINT operator()(const T& k) const
|
||||
{
|
||||
UINT *pData = (UINT*)&k;
|
||||
UINT crc = 0;
|
||||
for (UINT i = 0; i < sizeof(T) / sizeof(UINT); ++i)
|
||||
{
|
||||
crc = _mm_crc32_u32(crc, pData[i]);
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
};
|
||||
|
||||
}// end SWRL
|
||||
|
||||
namespace std
|
||||
{
|
||||
|
||||
template <typename T, int N>
|
||||
struct hash<SWRL::UncheckedFixedVector<T, N>>
|
||||
{
|
||||
size_t operator() (SWRL::UncheckedFixedVector<T, N> const& v) const
|
||||
{
|
||||
if (v.size() == 0) return 0;
|
||||
std::hash<T> H;
|
||||
size_t x = H(v[0]);
|
||||
if (v.size() == 1) return x;
|
||||
for (size_t i = 1; i < v.size(); ++i)
|
||||
x ^= H(v[i]) + 0x9e3779b9 + (x<<6) + (x>>2);
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}// end std.
|
||||
|
||||
#endif//SWRLIB_CONTAINERS_HPP__
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,251 @@
|
|||
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file formats.h
|
||||
*
|
||||
* @brief auto-generated file
|
||||
*
|
||||
* DO NOT EDIT
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/os.h"
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// SWR_TYPE - Format component type
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
enum SWR_TYPE
|
||||
{
|
||||
SWR_TYPE_UNKNOWN,
|
||||
SWR_TYPE_UNUSED,
|
||||
SWR_TYPE_UNORM,
|
||||
SWR_TYPE_SNORM,
|
||||
SWR_TYPE_UINT,
|
||||
SWR_TYPE_SINT,
|
||||
SWR_TYPE_FLOAT,
|
||||
SWR_TYPE_SSCALED,
|
||||
SWR_TYPE_USCALED,
|
||||
};
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// SWR_FORMAT
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
enum SWR_FORMAT
|
||||
{
|
||||
R32G32B32A32_FLOAT = 0x0,
|
||||
R32G32B32A32_SINT = 0x1,
|
||||
R32G32B32A32_UINT = 0x2,
|
||||
R32G32B32X32_FLOAT = 0x6,
|
||||
R32G32B32A32_SSCALED = 0x7,
|
||||
R32G32B32A32_USCALED = 0x8,
|
||||
R32G32B32_FLOAT = 0x40,
|
||||
R32G32B32_SINT = 0x41,
|
||||
R32G32B32_UINT = 0x42,
|
||||
R32G32B32_SSCALED = 0x45,
|
||||
R32G32B32_USCALED = 0x46,
|
||||
R16G16B16A16_UNORM = 0x80,
|
||||
R16G16B16A16_SNORM = 0x81,
|
||||
R16G16B16A16_SINT = 0x82,
|
||||
R16G16B16A16_UINT = 0x83,
|
||||
R16G16B16A16_FLOAT = 0x84,
|
||||
R32G32_FLOAT = 0x85,
|
||||
R32G32_SINT = 0x86,
|
||||
R32G32_UINT = 0x87,
|
||||
R32_FLOAT_X8X24_TYPELESS = 0x88,
|
||||
X32_TYPELESS_G8X24_UINT = 0x89,
|
||||
L32A32_FLOAT = 0x8A,
|
||||
R16G16B16X16_UNORM = 0x8E,
|
||||
R16G16B16X16_FLOAT = 0x8F,
|
||||
L32X32_FLOAT = 0x91,
|
||||
I32X32_FLOAT = 0x92,
|
||||
R16G16B16A16_SSCALED = 0x93,
|
||||
R16G16B16A16_USCALED = 0x94,
|
||||
R32G32_SSCALED = 0x95,
|
||||
R32G32_USCALED = 0x96,
|
||||
R32_FLOAT_X8X24_TYPELESS_LD = 0x98,
|
||||
B8G8R8A8_UNORM = 0xC0,
|
||||
B8G8R8A8_UNORM_SRGB = 0xC1,
|
||||
R10G10B10A2_UNORM = 0xC2,
|
||||
R10G10B10A2_UNORM_SRGB = 0xC3,
|
||||
R10G10B10A2_UINT = 0xC4,
|
||||
R8G8B8A8_UNORM = 0xC7,
|
||||
R8G8B8A8_UNORM_SRGB = 0xC8,
|
||||
R8G8B8A8_SNORM = 0xC9,
|
||||
R8G8B8A8_SINT = 0xCA,
|
||||
R8G8B8A8_UINT = 0xCB,
|
||||
R16G16_UNORM = 0xCC,
|
||||
R16G16_SNORM = 0xCD,
|
||||
R16G16_SINT = 0xCE,
|
||||
R16G16_UINT = 0xCF,
|
||||
R16G16_FLOAT = 0xD0,
|
||||
B10G10R10A2_UNORM = 0xD1,
|
||||
B10G10R10A2_UNORM_SRGB = 0xD2,
|
||||
R11G11B10_FLOAT = 0xD3,
|
||||
R32_SINT = 0xD6,
|
||||
R32_UINT = 0xD7,
|
||||
R32_FLOAT = 0xD8,
|
||||
R24_UNORM_X8_TYPELESS = 0xD9,
|
||||
R24_UNORM_X8_TYPELESS_LD = 0xDC,
|
||||
L32_UNORM = 0xDD,
|
||||
L16A16_UNORM = 0xDF,
|
||||
I24X8_UNORM = 0xE0,
|
||||
L24X8_UNORM = 0xE1,
|
||||
I32_FLOAT = 0xE3,
|
||||
L32_FLOAT = 0xE4,
|
||||
A32_FLOAT = 0xE5,
|
||||
B8G8R8X8_UNORM = 0xE9,
|
||||
B8G8R8X8_UNORM_SRGB = 0xEA,
|
||||
R8G8B8X8_UNORM = 0xEB,
|
||||
R8G8B8X8_UNORM_SRGB = 0xEC,
|
||||
R9G9B9E5_SHAREDEXP = 0xED,
|
||||
B10G10R10X2_UNORM = 0xEE,
|
||||
L16A16_FLOAT = 0xF0,
|
||||
R10G10B10X2_USCALED = 0xF3,
|
||||
R8G8B8A8_SSCALED = 0xF4,
|
||||
R8G8B8A8_USCALED = 0xF5,
|
||||
R16G16_SSCALED = 0xF6,
|
||||
R16G16_USCALED = 0xF7,
|
||||
R32_SSCALED = 0xF8,
|
||||
R32_USCALED = 0xF9,
|
||||
B5G6R5_UNORM = 0x100,
|
||||
B5G6R5_UNORM_SRGB = 0x101,
|
||||
B5G5R5A1_UNORM = 0x102,
|
||||
B5G5R5A1_UNORM_SRGB = 0x103,
|
||||
B4G4R4A4_UNORM = 0x104,
|
||||
B4G4R4A4_UNORM_SRGB = 0x105,
|
||||
R8G8_UNORM = 0x106,
|
||||
R8G8_SNORM = 0x107,
|
||||
R8G8_SINT = 0x108,
|
||||
R8G8_UINT = 0x109,
|
||||
R16_UNORM = 0x10A,
|
||||
R16_SNORM = 0x10B,
|
||||
R16_SINT = 0x10C,
|
||||
R16_UINT = 0x10D,
|
||||
R16_FLOAT = 0x10E,
|
||||
I16_UNORM = 0x111,
|
||||
L16_UNORM = 0x112,
|
||||
A16_UNORM = 0x113,
|
||||
L8A8_UNORM = 0x114,
|
||||
I16_FLOAT = 0x115,
|
||||
L16_FLOAT = 0x116,
|
||||
A16_FLOAT = 0x117,
|
||||
L8A8_UNORM_SRGB = 0x118,
|
||||
B5G5R5X1_UNORM = 0x11A,
|
||||
B5G5R5X1_UNORM_SRGB = 0x11B,
|
||||
R8G8_SSCALED = 0x11C,
|
||||
R8G8_USCALED = 0x11D,
|
||||
R16_SSCALED = 0x11E,
|
||||
R16_USCALED = 0x11F,
|
||||
L8A8_UINT = 0x126,
|
||||
L8A8_SINT = 0x127,
|
||||
R8_UNORM = 0x140,
|
||||
R8_SNORM = 0x141,
|
||||
R8_SINT = 0x142,
|
||||
R8_UINT = 0x143,
|
||||
A8_UNORM = 0x144,
|
||||
I8_UNORM = 0x145,
|
||||
L8_UNORM = 0x146,
|
||||
R8_SSCALED = 0x149,
|
||||
R8_USCALED = 0x14A,
|
||||
L8_UNORM_SRGB = 0x14C,
|
||||
L8_UINT = 0x152,
|
||||
L8_SINT = 0x153,
|
||||
I8_UINT = 0x154,
|
||||
I8_SINT = 0x155,
|
||||
YCRCB_SWAPUVY = 0x183,
|
||||
BC1_UNORM = 0x186,
|
||||
BC2_UNORM = 0x187,
|
||||
BC3_UNORM = 0x188,
|
||||
BC4_UNORM = 0x189,
|
||||
BC5_UNORM = 0x18A,
|
||||
BC1_UNORM_SRGB = 0x18B,
|
||||
BC2_UNORM_SRGB = 0x18C,
|
||||
BC3_UNORM_SRGB = 0x18D,
|
||||
YCRCB_SWAPUV = 0x18F,
|
||||
R8G8B8_UNORM = 0x193,
|
||||
R8G8B8_SNORM = 0x194,
|
||||
R8G8B8_SSCALED = 0x195,
|
||||
R8G8B8_USCALED = 0x196,
|
||||
BC4_SNORM = 0x199,
|
||||
BC5_SNORM = 0x19A,
|
||||
R16G16B16_FLOAT = 0x19B,
|
||||
R16G16B16_UNORM = 0x19C,
|
||||
R16G16B16_SNORM = 0x19D,
|
||||
R16G16B16_SSCALED = 0x19E,
|
||||
R16G16B16_USCALED = 0x19F,
|
||||
BC6H_SF16 = 0x1A1,
|
||||
BC7_UNORM = 0x1A2,
|
||||
BC7_UNORM_SRGB = 0x1A3,
|
||||
BC6H_UF16 = 0x1A4,
|
||||
R8G8B8_UNORM_SRGB = 0x1A8,
|
||||
R16G16B16_UINT = 0x1B0,
|
||||
R16G16B16_SINT = 0x1B1,
|
||||
R10G10B10A2_SNORM = 0x1B3,
|
||||
R10G10B10A2_USCALED = 0x1B4,
|
||||
R10G10B10A2_SSCALED = 0x1B5,
|
||||
R10G10B10A2_SINT = 0x1B6,
|
||||
B10G10R10A2_SNORM = 0x1B7,
|
||||
B10G10R10A2_USCALED = 0x1B8,
|
||||
B10G10R10A2_SSCALED = 0x1B9,
|
||||
B10G10R10A2_UINT = 0x1BA,
|
||||
B10G10R10A2_SINT = 0x1BB,
|
||||
R8G8B8_UINT = 0x1C8,
|
||||
R8G8B8_SINT = 0x1C9,
|
||||
NUM_SWR_FORMATS = 0x1CA,
|
||||
};
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// SWR_FORMAT_INFO - Format information
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct SWR_FORMAT_INFO
|
||||
{
|
||||
const char* name;
|
||||
SWR_TYPE type[4];
|
||||
uint32_t defaults[4];
|
||||
uint32_t swizzle[4]; ///< swizzle per component
|
||||
uint32_t bpc[4]; ///< bits per component
|
||||
uint32_t bpp; ///< bits per pixel
|
||||
uint32_t Bpp; ///< bytes per pixel
|
||||
uint32_t numComps; ///< number of components
|
||||
bool isSRGB;
|
||||
bool isBC;
|
||||
bool isSubsampled;
|
||||
bool isNormalized[4];
|
||||
float toFloat[4];
|
||||
uint32_t bcWidth;
|
||||
uint32_t bcHeight;
|
||||
bool isLuminance;
|
||||
};
|
||||
|
||||
extern const SWR_FORMAT_INFO gFormatInfo[];
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Retrieves format info struct for given format.
|
||||
/// @param format - SWR format
|
||||
INLINE const SWR_FORMAT_INFO& GetFormatInfo(SWR_FORMAT format)
|
||||
{
|
||||
return gFormatInfo[format];
|
||||
}
|
||||
|
||||
// lookup table for unorm8 srgb -> float conversion
|
||||
extern const uint32_t srgb8Table[256];
|
|
@ -0,0 +1,235 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <bitset>
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <string.h>
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
|
||||
class InstructionSet
|
||||
{
|
||||
public:
|
||||
InstructionSet() : CPU_Rep() {};
|
||||
|
||||
// getters
|
||||
std::string Vendor(void) { return CPU_Rep.vendor_; }
|
||||
std::string Brand(void) { return CPU_Rep.brand_; }
|
||||
|
||||
bool SSE3(void) { return CPU_Rep.f_1_ECX_[0]; }
|
||||
bool PCLMULQDQ(void) { return CPU_Rep.f_1_ECX_[1]; }
|
||||
bool MONITOR(void) { return CPU_Rep.f_1_ECX_[3]; }
|
||||
bool SSSE3(void) { return CPU_Rep.f_1_ECX_[9]; }
|
||||
bool FMA(void) { return CPU_Rep.f_1_ECX_[12]; }
|
||||
bool CMPXCHG16B(void) { return CPU_Rep.f_1_ECX_[13]; }
|
||||
bool SSE41(void) { return CPU_Rep.f_1_ECX_[19]; }
|
||||
bool SSE42(void) { return CPU_Rep.f_1_ECX_[20]; }
|
||||
bool MOVBE(void) { return CPU_Rep.f_1_ECX_[22]; }
|
||||
bool POPCNT(void) { return CPU_Rep.f_1_ECX_[23]; }
|
||||
bool AES(void) { return CPU_Rep.f_1_ECX_[25]; }
|
||||
bool XSAVE(void) { return CPU_Rep.f_1_ECX_[26]; }
|
||||
bool OSXSAVE(void) { return CPU_Rep.f_1_ECX_[27]; }
|
||||
bool RDRAND(void) { return CPU_Rep.f_1_ECX_[30]; }
|
||||
|
||||
bool MSR(void) { return CPU_Rep.f_1_EDX_[5]; }
|
||||
bool CX8(void) { return CPU_Rep.f_1_EDX_[8]; }
|
||||
bool SEP(void) { return CPU_Rep.f_1_EDX_[11]; }
|
||||
bool CMOV(void) { return CPU_Rep.f_1_EDX_[15]; }
|
||||
bool CLFSH(void) { return CPU_Rep.f_1_EDX_[19]; }
|
||||
bool MMX(void) { return CPU_Rep.f_1_EDX_[23]; }
|
||||
bool FXSR(void) { return CPU_Rep.f_1_EDX_[24]; }
|
||||
bool SSE(void) { return CPU_Rep.f_1_EDX_[25]; }
|
||||
bool SSE2(void) { return CPU_Rep.f_1_EDX_[26]; }
|
||||
|
||||
bool FSGSBASE(void) { return CPU_Rep.f_7_EBX_[0]; }
|
||||
bool BMI1(void) { return CPU_Rep.f_7_EBX_[3]; }
|
||||
bool HLE(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_7_EBX_[4]; }
|
||||
bool BMI2(void) { return CPU_Rep.f_7_EBX_[8]; }
|
||||
bool ERMS(void) { return CPU_Rep.f_7_EBX_[9]; }
|
||||
bool INVPCID(void) { return CPU_Rep.f_7_EBX_[10]; }
|
||||
bool RTM(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_7_EBX_[11]; }
|
||||
bool RDSEED(void) { return CPU_Rep.f_7_EBX_[18]; }
|
||||
bool ADX(void) { return CPU_Rep.f_7_EBX_[19]; }
|
||||
bool SHA(void) { return CPU_Rep.f_7_EBX_[29]; }
|
||||
|
||||
bool PREFETCHWT1(void) { return CPU_Rep.f_7_ECX_[0]; }
|
||||
|
||||
bool LAHF(void) { return CPU_Rep.f_81_ECX_[0]; }
|
||||
bool LZCNT(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_81_ECX_[5]; }
|
||||
bool ABM(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[5]; }
|
||||
bool SSE4a(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[6]; }
|
||||
bool XOP(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[11]; }
|
||||
bool TBM(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[21]; }
|
||||
|
||||
bool SYSCALL(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_81_EDX_[11]; }
|
||||
bool MMXEXT(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_EDX_[22]; }
|
||||
bool RDTSCP(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_81_EDX_[27]; }
|
||||
bool _3DNOWEXT(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_EDX_[30]; }
|
||||
bool _3DNOW(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_EDX_[31]; }
|
||||
|
||||
bool AVX(void) { return CPU_Rep.f_1_ECX_[28]; }
|
||||
bool F16C(void) { return CPU_Rep.f_1_ECX_[29]; }
|
||||
bool AVX2(void) { return CPU_Rep.f_7_EBX_[5]; }
|
||||
bool AVX512F(void) { return CPU_Rep.f_7_EBX_[16]; }
|
||||
bool AVX512PF(void) { return CPU_Rep.f_7_EBX_[26]; }
|
||||
bool AVX512ER(void) { return CPU_Rep.f_7_EBX_[27]; }
|
||||
bool AVX512CD(void) { return CPU_Rep.f_7_EBX_[28]; }
|
||||
|
||||
private:
|
||||
class InstructionSet_Internal
|
||||
{
|
||||
public:
|
||||
InstructionSet_Internal()
|
||||
: nIds_{ 0 },
|
||||
nExIds_{ 0 },
|
||||
isIntel_{ false },
|
||||
isAMD_{ false },
|
||||
f_1_ECX_{ 0 },
|
||||
f_1_EDX_{ 0 },
|
||||
f_7_EBX_{ 0 },
|
||||
f_7_ECX_{ 0 },
|
||||
f_81_ECX_{ 0 },
|
||||
f_81_EDX_{ 0 },
|
||||
data_{},
|
||||
extdata_{}
|
||||
{
|
||||
//int cpuInfo[4] = {-1};
|
||||
std::array<int, 4> cpui;
|
||||
|
||||
// Calling __cpuid with 0x0 as the function_id argument
|
||||
// gets the number of the highest valid function ID.
|
||||
#if defined(_WIN32)
|
||||
__cpuid(cpui.data(), 0);
|
||||
nIds_ = cpui[0];
|
||||
#else
|
||||
nIds_ = __get_cpuid_max(0, NULL);
|
||||
#endif
|
||||
|
||||
for (int i = 0; i <= nIds_; ++i)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
__cpuidex(cpui.data(), i, 0);
|
||||
#else
|
||||
int *data = cpui.data();
|
||||
__cpuid_count(i, 0, data[0], data[1], data[2], data[3]);
|
||||
#endif
|
||||
data_.push_back(cpui);
|
||||
}
|
||||
|
||||
// Capture vendor string
|
||||
char vendor[0x20];
|
||||
memset(vendor, 0, sizeof(vendor));
|
||||
*reinterpret_cast<int*>(vendor) = data_[0][1];
|
||||
*reinterpret_cast<int*>(vendor + 4) = data_[0][3];
|
||||
*reinterpret_cast<int*>(vendor + 8) = data_[0][2];
|
||||
vendor_ = vendor;
|
||||
if (vendor_ == "GenuineIntel")
|
||||
{
|
||||
isIntel_ = true;
|
||||
}
|
||||
else if (vendor_ == "AuthenticAMD")
|
||||
{
|
||||
isAMD_ = true;
|
||||
}
|
||||
|
||||
// load bitset with flags for function 0x00000001
|
||||
if (nIds_ >= 1)
|
||||
{
|
||||
f_1_ECX_ = data_[1][2];
|
||||
f_1_EDX_ = data_[1][3];
|
||||
}
|
||||
|
||||
// load bitset with flags for function 0x00000007
|
||||
if (nIds_ >= 7)
|
||||
{
|
||||
f_7_EBX_ = data_[7][1];
|
||||
f_7_ECX_ = data_[7][2];
|
||||
}
|
||||
|
||||
// Calling __cpuid with 0x80000000 as the function_id argument
|
||||
// gets the number of the highest valid extended ID.
|
||||
#if defined(_WIN32)
|
||||
__cpuid(cpui.data(), 0x80000000);
|
||||
nExIds_ = cpui[0];
|
||||
#else
|
||||
nExIds_ = __get_cpuid_max(0x80000000, NULL);
|
||||
#endif
|
||||
|
||||
char brand[0x40];
|
||||
memset(brand, 0, sizeof(brand));
|
||||
|
||||
for (unsigned i = 0x80000000; i <= nExIds_; ++i)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
__cpuidex(cpui.data(), i, 0);
|
||||
#else
|
||||
int *data = cpui.data();
|
||||
__cpuid_count(i, 0, data[0], data[1], data[2], data[3]);
|
||||
#endif
|
||||
extdata_.push_back(cpui);
|
||||
}
|
||||
|
||||
// load bitset with flags for function 0x80000001
|
||||
if (nExIds_ >= 0x80000001)
|
||||
{
|
||||
f_81_ECX_ = extdata_[1][2];
|
||||
f_81_EDX_ = extdata_[1][3];
|
||||
}
|
||||
|
||||
// Interpret CPU brand string if reported
|
||||
if (nExIds_ >= 0x80000004)
|
||||
{
|
||||
memcpy(brand, extdata_[2].data(), sizeof(cpui));
|
||||
memcpy(brand + 16, extdata_[3].data(), sizeof(cpui));
|
||||
memcpy(brand + 32, extdata_[4].data(), sizeof(cpui));
|
||||
brand_ = brand;
|
||||
}
|
||||
};
|
||||
|
||||
int nIds_;
|
||||
unsigned nExIds_;
|
||||
std::string vendor_;
|
||||
std::string brand_;
|
||||
bool isIntel_;
|
||||
bool isAMD_;
|
||||
std::bitset<32> f_1_ECX_;
|
||||
std::bitset<32> f_1_EDX_;
|
||||
std::bitset<32> f_7_EBX_;
|
||||
std::bitset<32> f_7_ECX_;
|
||||
std::bitset<32> f_81_ECX_;
|
||||
std::bitset<32> f_81_EDX_;
|
||||
std::vector<std::array<int, 4>> data_;
|
||||
std::vector<std::array<int, 4>> extdata_;
|
||||
};
|
||||
const InstructionSet_Internal CPU_Rep;
|
||||
};
|
|
@ -0,0 +1,221 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef __SWR_OS_H__
|
||||
#define __SWR_OS_H__
|
||||
|
||||
#include "core/knobs.h"
|
||||
|
||||
#if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX)
|
||||
|
||||
#define SWR_API __cdecl
|
||||
|
||||
#ifndef _CRT_SECURE_NO_WARNINGS
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include "Windows.h"
|
||||
#include <intrin.h>
|
||||
#include <cstdint>
|
||||
|
||||
#define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD
|
||||
#define THREAD __declspec(thread)
|
||||
#define INLINE __forceinline
|
||||
#define DEBUGBREAK __debugbreak()
|
||||
|
||||
#define PRAGMA_WARNING_PUSH_DISABLE(...) \
|
||||
__pragma(warning(push));\
|
||||
__pragma(warning(disable:__VA_ARGS__));
|
||||
|
||||
#define PRAGMA_WARNING_POP() __pragma(warning(pop))
|
||||
|
||||
#if defined(_WIN32)
|
||||
#if defined(_WIN64)
|
||||
#define BitScanForwardSizeT BitScanForward64
|
||||
#define _mm_popcount_sizeT _mm_popcnt_u64
|
||||
#else
|
||||
#define BitScanForwardSizeT BitScanForward
|
||||
#define _mm_popcount_sizeT _mm_popcnt_u32
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#elif defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
|
||||
|
||||
#define SWR_API
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <X11/Xmd.h>
|
||||
#include <x86intrin.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
typedef void VOID;
|
||||
typedef void* LPVOID;
|
||||
typedef CARD8 BOOL;
|
||||
typedef wchar_t WCHAR;
|
||||
typedef uint16_t UINT16;
|
||||
typedef int INT;
|
||||
typedef int INT32;
|
||||
typedef unsigned int UINT;
|
||||
typedef uint32_t UINT32;
|
||||
typedef uint64_t UINT64;
|
||||
typedef int64_t INT64;
|
||||
typedef void* HANDLE;
|
||||
typedef float FLOAT;
|
||||
typedef int LONG;
|
||||
typedef CARD8 BYTE;
|
||||
typedef unsigned char UCHAR;
|
||||
typedef unsigned int DWORD;
|
||||
|
||||
#undef FALSE
|
||||
#define FALSE 0
|
||||
|
||||
#undef TRUE
|
||||
#define TRUE 1
|
||||
|
||||
#define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
|
||||
#define THREAD __thread
|
||||
#ifndef INLINE
|
||||
#define INLINE __inline
|
||||
#endif
|
||||
#define DEBUGBREAK asm ("int $3")
|
||||
#define __cdecl
|
||||
#define __declspec(X)
|
||||
|
||||
#define GCC_VERSION (__GNUC__ * 10000 \
|
||||
+ __GNUC_MINOR__ * 100 \
|
||||
+ __GNUC_PATCHLEVEL__)
|
||||
|
||||
#if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500)
|
||||
inline
|
||||
uint64_t __rdtsc()
|
||||
{
|
||||
long low, high;
|
||||
asm volatile("rdtsc" : "=a"(low), "=d"(high));
|
||||
return (low | ((uint64_t)high << 32));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __clang__
|
||||
// Intrinsic not defined in gcc
|
||||
static INLINE
|
||||
void _mm256_storeu2_m128i(__m128i *hi, __m128i *lo, __m256i a)
|
||||
{
|
||||
_mm_storeu_si128((__m128i*)lo, _mm256_castsi256_si128(a));
|
||||
_mm_storeu_si128((__m128i*)hi, _mm256_extractf128_si256(a, 0x1));
|
||||
}
|
||||
#endif
|
||||
|
||||
inline
|
||||
unsigned char _BitScanForward(unsigned long *Index, unsigned long Mask)
|
||||
{
|
||||
*Index = __builtin_ctz(Mask);
|
||||
return (Mask != 0);
|
||||
}
|
||||
|
||||
inline
|
||||
unsigned char _BitScanForward(unsigned int *Index, unsigned int Mask)
|
||||
{
|
||||
*Index = __builtin_ctz(Mask);
|
||||
return (Mask != 0);
|
||||
}
|
||||
|
||||
inline
|
||||
unsigned char _BitScanReverse(unsigned long *Index, unsigned long Mask)
|
||||
{
|
||||
*Index = __builtin_clz(Mask);
|
||||
return (Mask != 0);
|
||||
}
|
||||
|
||||
inline
|
||||
unsigned char _BitScanReverse(unsigned int *Index, unsigned int Mask)
|
||||
{
|
||||
*Index = __builtin_clz(Mask);
|
||||
return (Mask != 0);
|
||||
}
|
||||
|
||||
inline
|
||||
void *_aligned_malloc(unsigned int size, unsigned int alignment)
|
||||
{
|
||||
void *ret;
|
||||
if (posix_memalign(&ret, alignment, size))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline
|
||||
unsigned char _bittest(const LONG *a, LONG b)
|
||||
{
|
||||
return ((*(unsigned *)(a) & (1 << b)) != 0);
|
||||
}
|
||||
|
||||
#define GetCurrentProcessId getpid
|
||||
|
||||
#define CreateDirectory(name, pSecurity) mkdir(name, 0777)
|
||||
|
||||
#if defined(_WIN32)
|
||||
static inline
|
||||
unsigned int _mm_popcnt_u32(unsigned int v)
|
||||
{
|
||||
return __builtin_popcount(v);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define _aligned_free free
|
||||
#define InterlockedCompareExchange(Dest, Exchange, Comparand) __sync_val_compare_and_swap(Dest, Comparand, Exchange)
|
||||
#define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value)
|
||||
#define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1)
|
||||
#define InterlockedIncrement(Append) __sync_add_and_fetch(Append, 1)
|
||||
#define _ReadWriteBarrier() asm volatile("" ::: "memory")
|
||||
#define __stdcall
|
||||
|
||||
#define PRAGMA_WARNING_PUSH_DISABLE(...)
|
||||
#define PRAGMA_WARNING_POP()
|
||||
|
||||
#else
|
||||
|
||||
#error Unsupported OS/system.
|
||||
|
||||
#endif
|
||||
|
||||
// Universal types
|
||||
typedef BYTE KILOBYTE[1024];
|
||||
typedef KILOBYTE MEGABYTE[1024];
|
||||
typedef MEGABYTE GIGABYTE[1024];
|
||||
|
||||
#define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64)
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
#define OSALIGNSIMD(RWORD) OSALIGN(RWORD, 32)
|
||||
#endif
|
||||
|
||||
#include "common/swr_assert.h"
|
||||
|
||||
#endif//__SWR_OS_H__
|
|
@ -0,0 +1,188 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file rdtsc_buckets.cpp
|
||||
*
|
||||
* @brief implementation of rdtsc buckets.
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#include "rdtsc_buckets.h"
|
||||
#include <inttypes.h>
|
||||
|
||||
THREAD UINT tlsThreadId = 0;
|
||||
|
||||
void BucketManager::RegisterThread(const std::string& name)
|
||||
{
|
||||
BUCKET_THREAD newThread;
|
||||
newThread.name = name;
|
||||
newThread.root.children.reserve(mBuckets.size());
|
||||
newThread.root.id = 0;
|
||||
newThread.root.pParent = nullptr;
|
||||
newThread.pCurrent = &newThread.root;
|
||||
|
||||
mThreadMutex.lock();
|
||||
|
||||
// assign unique thread id for this thread
|
||||
size_t id = mThreads.size();
|
||||
newThread.id = (UINT)id;
|
||||
tlsThreadId = (UINT)id;
|
||||
|
||||
// open threadviz file if enabled
|
||||
if (mThreadViz)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << mThreadVizDir << "\\threadviz_thread." << newThread.id << ".dat";
|
||||
newThread.vizFile = fopen(ss.str().c_str(), "wb");
|
||||
}
|
||||
|
||||
// store new thread
|
||||
mThreads.push_back(newThread);
|
||||
|
||||
mThreadMutex.unlock();
|
||||
}
|
||||
|
||||
UINT BucketManager::RegisterBucket(const BUCKET_DESC& desc)
|
||||
{
|
||||
size_t id = mBuckets.size();
|
||||
mBuckets.push_back(desc);
|
||||
return (UINT)id;
|
||||
}
|
||||
|
||||
void BucketManager::PrintBucket(FILE* f, UINT level, UINT64 threadCycles, UINT64 parentCycles, const BUCKET& bucket)
|
||||
{
|
||||
const char *arrows[] = {
|
||||
"",
|
||||
"|-> ",
|
||||
" |-> ",
|
||||
" |-> ",
|
||||
" |-> ",
|
||||
" |-> ",
|
||||
" |-> "
|
||||
};
|
||||
|
||||
// compute percent of total cycles used by this bucket
|
||||
float percentTotal = (float)((double)bucket.elapsed / (double)threadCycles * 100.0);
|
||||
|
||||
// compute percent of parent cycles used by this bucket
|
||||
float percentParent = (float)((double)bucket.elapsed / (double)parentCycles * 100.0);
|
||||
|
||||
// compute average cycle count per invocation
|
||||
UINT64 CPE = bucket.elapsed / bucket.count;
|
||||
|
||||
BUCKET_DESC &desc = mBuckets[bucket.id];
|
||||
|
||||
// construct hierarchy visualization
|
||||
char hier[80];
|
||||
strcpy(hier, arrows[level]);
|
||||
strcat(hier, desc.name.c_str());
|
||||
|
||||
// print out
|
||||
fprintf(f, "%6.2f %6.2f %-10" PRIu64 " %-10" PRIu64 " %-10u %-10lu %-10u %s\n",
|
||||
percentTotal,
|
||||
percentParent,
|
||||
bucket.elapsed,
|
||||
CPE,
|
||||
bucket.count,
|
||||
(unsigned long)0,
|
||||
(uint32_t)0,
|
||||
hier
|
||||
);
|
||||
|
||||
// dump all children of this bucket
|
||||
for (const BUCKET& child : bucket.children)
|
||||
{
|
||||
if (child.count)
|
||||
{
|
||||
PrintBucket(f, level + 1, threadCycles, bucket.elapsed, child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BucketManager::PrintThread(FILE* f, const BUCKET_THREAD& thread)
|
||||
{
|
||||
// print header
|
||||
fprintf(f, "\nThread %u (%s)\n", thread.id, thread.name.c_str());
|
||||
fprintf(f, " %%Tot %%Par Cycles CPE NumEvent CPE2 NumEvent2 Bucket\n");
|
||||
|
||||
// compute thread level total cycle counts across all buckets from root
|
||||
const BUCKET& root = thread.root;
|
||||
UINT64 totalCycles = 0;
|
||||
for (const BUCKET& child : root.children)
|
||||
{
|
||||
totalCycles += child.elapsed;
|
||||
}
|
||||
|
||||
for (const BUCKET& child : root.children)
|
||||
{
|
||||
if (child.count)
|
||||
{
|
||||
PrintBucket(f, 0, totalCycles, totalCycles, child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BucketManager::DumpThreadViz()
|
||||
{
|
||||
// ensure all thread data is flushed
|
||||
mThreadMutex.lock();
|
||||
for (auto& thread : mThreads)
|
||||
{
|
||||
fflush(thread.vizFile);
|
||||
fclose(thread.vizFile);
|
||||
}
|
||||
mThreadMutex.unlock();
|
||||
|
||||
// dump bucket descriptions
|
||||
std::stringstream ss;
|
||||
ss << mThreadVizDir << "\\threadviz_buckets.dat";
|
||||
|
||||
FILE* f = fopen(ss.str().c_str(), "wb");
|
||||
for (auto& bucket : mBuckets)
|
||||
{
|
||||
Serialize(f, bucket);
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
void BucketManager::PrintReport(const std::string& filename)
|
||||
{
|
||||
if (mThreadViz)
|
||||
{
|
||||
DumpThreadViz();
|
||||
}
|
||||
else
|
||||
{
|
||||
FILE* f = fopen(filename.c_str(), "w");
|
||||
|
||||
mThreadMutex.lock();
|
||||
for (const BUCKET_THREAD& thread : mThreads)
|
||||
{
|
||||
PrintThread(f, thread);
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
mThreadMutex.unlock();
|
||||
|
||||
fclose(f);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,229 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file rdtsc_buckets.h
|
||||
*
|
||||
* @brief declaration for rdtsc buckets.
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include "os.h"
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
|
||||
#include "rdtsc_buckets_shared.h"
|
||||
|
||||
// unique thread id stored in thread local storage
|
||||
extern THREAD UINT tlsThreadId;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief BucketManager encapsulates a single instance of the buckets
|
||||
/// functionality. There can be one or many bucket managers active
|
||||
/// at any time. The manager owns all the threads and
|
||||
/// bucket information that have been registered to it.
|
||||
class BucketManager
|
||||
{
|
||||
public:
|
||||
BucketManager(bool enableThreadViz) : mThreadViz(enableThreadViz)
|
||||
{
|
||||
if (mThreadViz)
|
||||
{
|
||||
uint32_t pid = GetCurrentProcessId();
|
||||
std::stringstream str;
|
||||
str << "threadviz." << pid;
|
||||
mThreadVizDir = str.str();
|
||||
CreateDirectory(mThreadVizDir.c_str(), NULL);
|
||||
}
|
||||
}
|
||||
|
||||
// removes all registered thread data
|
||||
void ClearThreads()
|
||||
{
|
||||
mThreadMutex.lock();
|
||||
mThreads.clear();
|
||||
mThreadMutex.unlock();
|
||||
}
|
||||
|
||||
// removes all registered buckets
|
||||
void ClearBuckets()
|
||||
{
|
||||
mBuckets.clear();
|
||||
}
|
||||
|
||||
/// Registers a new thread with the manager.
|
||||
/// @param name - name of thread, used for labels in reports and threadviz
|
||||
void RegisterThread(const std::string& name);
|
||||
|
||||
/// Registers a new bucket type with the manager. Returns a unique
|
||||
/// id which should be used in subsequent calls to start/stop the bucket
|
||||
/// @param desc - description of the bucket
|
||||
/// @return unique id
|
||||
UINT RegisterBucket(const BUCKET_DESC& desc);
|
||||
|
||||
// dump threadviz data
|
||||
void DumpThreadViz();
|
||||
|
||||
// print report
|
||||
void PrintReport(const std::string& filename);
|
||||
|
||||
// start capturing
|
||||
INLINE void StartCapture()
|
||||
{
|
||||
mCapturing = true;
|
||||
}
|
||||
|
||||
// stop capturing
|
||||
INLINE void StopCapture()
|
||||
{
|
||||
mCapturing = false;
|
||||
|
||||
// wait for all threads to pop back to root bucket
|
||||
bool stillCapturing = true;
|
||||
while (stillCapturing)
|
||||
{
|
||||
stillCapturing = false;
|
||||
for (const BUCKET_THREAD& t : mThreads)
|
||||
{
|
||||
if (t.pCurrent != &t.root)
|
||||
{
|
||||
stillCapturing = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// start a bucket
|
||||
// @param id generated by RegisterBucket
|
||||
INLINE void StartBucket(UINT id)
|
||||
{
|
||||
if (!mCapturing) return;
|
||||
|
||||
SWR_ASSERT(tlsThreadId < mThreads.size());
|
||||
|
||||
BUCKET_THREAD& bt = mThreads[tlsThreadId];
|
||||
|
||||
// if threadviz is enabled, only need to dump start info to threads viz file
|
||||
if (mThreadViz)
|
||||
{
|
||||
SWR_ASSERT(bt.vizFile != nullptr);
|
||||
if (mBuckets[id].enableThreadViz)
|
||||
{
|
||||
VIZ_START_DATA data{ VIZ_START, id, __rdtsc() };
|
||||
Serialize(bt.vizFile, data);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bt.pCurrent->children.size() < mBuckets.size())
|
||||
{
|
||||
bt.pCurrent->children.resize(mBuckets.size());
|
||||
}
|
||||
BUCKET &child = bt.pCurrent->children[id];
|
||||
child.pParent = bt.pCurrent;
|
||||
child.id = id;
|
||||
child.start = __rdtsc();
|
||||
|
||||
// update thread's currently executing bucket
|
||||
bt.pCurrent = &child;
|
||||
}
|
||||
|
||||
bt.level++;
|
||||
}
|
||||
|
||||
// stop the currently executing bucket
|
||||
INLINE void StopBucket(UINT id)
|
||||
{
|
||||
SWR_ASSERT(tlsThreadId < mThreads.size());
|
||||
BUCKET_THREAD &bt = mThreads[tlsThreadId];
|
||||
|
||||
if (bt.level == 0) return;
|
||||
|
||||
if (mThreadViz)
|
||||
{
|
||||
SWR_ASSERT(bt.vizFile != nullptr);
|
||||
if (mBuckets[id].enableThreadViz)
|
||||
{
|
||||
VIZ_STOP_DATA data{ VIZ_STOP, __rdtsc() };
|
||||
Serialize(bt.vizFile, data);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bt.pCurrent->start == 0) return;
|
||||
SWR_ASSERT(bt.pCurrent->id == id, "Mismatched buckets detected");
|
||||
|
||||
bt.pCurrent->elapsed += (__rdtsc() - bt.pCurrent->start);
|
||||
bt.pCurrent->count++;
|
||||
|
||||
// pop to parent
|
||||
bt.pCurrent = bt.pCurrent->pParent;
|
||||
}
|
||||
|
||||
bt.level--;
|
||||
}
|
||||
|
||||
INLINE void AddEvent(uint32_t id, uint32_t count)
|
||||
{
|
||||
if (!mCapturing) return;
|
||||
|
||||
SWR_ASSERT(tlsThreadId < mThreads.size());
|
||||
|
||||
BUCKET_THREAD& bt = mThreads[tlsThreadId];
|
||||
|
||||
// don't record events for threadviz
|
||||
if (!mThreadViz)
|
||||
{
|
||||
if (bt.pCurrent->children.size() < mBuckets.size())
|
||||
{
|
||||
bt.pCurrent->children.resize(mBuckets.size());
|
||||
}
|
||||
BUCKET &child = bt.pCurrent->children[id];
|
||||
child.pParent = bt.pCurrent;
|
||||
child.id = id;
|
||||
child.count += count;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void PrintBucket(FILE* f, UINT level, UINT64 threadCycles, UINT64 parentCycles, const BUCKET& bucket);
|
||||
void PrintThread(FILE* f, const BUCKET_THREAD& thread);
|
||||
|
||||
// list of active threads that have registered with this manager
|
||||
std::vector<BUCKET_THREAD> mThreads;
|
||||
|
||||
// list of buckets registered with this manager
|
||||
std::vector<BUCKET_DESC> mBuckets;
|
||||
|
||||
// is capturing currently enabled
|
||||
volatile bool mCapturing{ false };
|
||||
|
||||
std::mutex mThreadMutex;
|
||||
|
||||
// enable threadviz
|
||||
bool mThreadViz{ false };
|
||||
std::string mThreadVizDir;
|
||||
};
|
|
@ -0,0 +1,167 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file rdtsc_buckets.h
|
||||
*
|
||||
* @brief declaration for rdtsc buckets.
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
|
||||
struct BUCKET
|
||||
{
|
||||
uint32_t id{ 0 };
|
||||
uint64_t start{ 0 };
|
||||
uint64_t elapsed{ 0 };
|
||||
uint32_t count{ 0 };
|
||||
|
||||
BUCKET* pParent{ nullptr };
|
||||
std::vector<BUCKET> children;
|
||||
};
|
||||
|
||||
struct BUCKET_DESC
|
||||
{
|
||||
// name of bucket, used in reports
|
||||
std::string name;
|
||||
|
||||
// description of bucket, used in threadviz
|
||||
std::string description;
|
||||
|
||||
// enable for threadviz dumping
|
||||
bool enableThreadViz;
|
||||
|
||||
// threadviz color of bucket, in RGBA8_UNORM format
|
||||
uint32_t color;
|
||||
};
|
||||
|
||||
struct BUCKET_THREAD
|
||||
{
|
||||
// name of thread, used in reports
|
||||
std::string name;
|
||||
|
||||
// id for this thread, assigned by the thread manager
|
||||
uint32_t id;
|
||||
|
||||
// root of the bucket hierarchy for this thread
|
||||
BUCKET root;
|
||||
|
||||
// currently executing bucket somewhere in the hierarchy
|
||||
BUCKET* pCurrent;
|
||||
|
||||
// currently executing hierarchy level
|
||||
uint32_t level{ 0 };
|
||||
|
||||
// threadviz file object
|
||||
FILE* vizFile{ nullptr };
|
||||
|
||||
BUCKET_THREAD() {}
|
||||
BUCKET_THREAD(const BUCKET_THREAD& that)
|
||||
{
|
||||
name = that.name;
|
||||
id = that.id;
|
||||
root = that.root;
|
||||
pCurrent = &root;
|
||||
vizFile = that.vizFile;
|
||||
}
|
||||
};
|
||||
|
||||
enum VIZ_TYPE
|
||||
{
|
||||
VIZ_START = 0,
|
||||
VIZ_STOP = 1,
|
||||
VIZ_DATA = 2
|
||||
};
|
||||
|
||||
struct VIZ_START_DATA
|
||||
{
|
||||
uint8_t type;
|
||||
uint32_t bucketId;
|
||||
uint64_t timestamp;
|
||||
};
|
||||
|
||||
struct VIZ_STOP_DATA
|
||||
{
|
||||
uint8_t type;
|
||||
uint64_t timestamp;
|
||||
};
|
||||
|
||||
inline void Serialize(FILE* f, const VIZ_START_DATA& data)
|
||||
{
|
||||
fwrite(&data, sizeof(VIZ_START_DATA), 1, f);
|
||||
}
|
||||
|
||||
inline void Deserialize(FILE* f, VIZ_START_DATA& data)
|
||||
{
|
||||
fread(&data, sizeof(VIZ_START_DATA), 1, f);
|
||||
assert(data.type == VIZ_START);
|
||||
}
|
||||
|
||||
inline void Serialize(FILE* f, const VIZ_STOP_DATA& data)
|
||||
{
|
||||
fwrite(&data, sizeof(VIZ_STOP_DATA), 1, f);
|
||||
}
|
||||
|
||||
inline void Deserialize(FILE* f, VIZ_STOP_DATA& data)
|
||||
{
|
||||
fread(&data, sizeof(VIZ_STOP_DATA), 1, f);
|
||||
assert(data.type == VIZ_STOP);
|
||||
}
|
||||
|
||||
inline void Serialize(FILE* f, const std::string& string)
|
||||
{
|
||||
assert(string.size() <= 256);
|
||||
|
||||
uint8_t length = (uint8_t)string.size();
|
||||
fwrite(&length, sizeof(length), 1, f);
|
||||
fwrite(string.c_str(), string.size(), 1, f);
|
||||
}
|
||||
|
||||
inline void Deserialize(FILE* f, std::string& string)
|
||||
{
|
||||
char cstr[256];
|
||||
uint8_t length;
|
||||
fread(&length, sizeof(length), 1, f);
|
||||
fread(cstr, length, 1, f);
|
||||
cstr[length] = 0;
|
||||
string.assign(cstr);
|
||||
}
|
||||
|
||||
inline void Serialize(FILE* f, const BUCKET_DESC& desc)
|
||||
{
|
||||
Serialize(f, desc.name);
|
||||
Serialize(f, desc.description);
|
||||
fwrite(&desc.enableThreadViz, sizeof(desc.enableThreadViz), 1, f);
|
||||
fwrite(&desc.color, sizeof(desc.color), 1, f);
|
||||
}
|
||||
|
||||
inline void Deserialize(FILE* f, BUCKET_DESC& desc)
|
||||
{
|
||||
Deserialize(f, desc.name);
|
||||
Deserialize(f, desc.description);
|
||||
fread(&desc.enableThreadViz, sizeof(desc.enableThreadViz), 1, f);
|
||||
fread(&desc.color, sizeof(desc.color), 1, f);
|
||||
}
|
|
@ -0,0 +1,787 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef __SWR_SIMDINTRIN_H__
|
||||
#define __SWR_SIMDINTRIN_H__
|
||||
|
||||
#include "os.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
typedef __m256 simdscalar;
|
||||
typedef __m256i simdscalari;
|
||||
typedef uint8_t simdmask;
|
||||
#else
|
||||
#error Unsupported vector width
|
||||
#endif
|
||||
|
||||
// simd vector
|
||||
OSALIGNSIMD(union) simdvector
|
||||
{
|
||||
simdscalar v[4];
|
||||
struct
|
||||
{
|
||||
simdscalar x, y, z, w;
|
||||
};
|
||||
|
||||
simdscalar& operator[] (const int i) { return v[i]; }
|
||||
const simdscalar& operator[] (const int i) const { return v[i]; }
|
||||
};
|
||||
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
#define _simd128_maskstore_ps _mm_maskstore_ps
|
||||
#define _simd_load_ps _mm256_load_ps
|
||||
#define _simd_load1_ps _mm256_broadcast_ss
|
||||
#define _simd_loadu_ps _mm256_loadu_ps
|
||||
#define _simd_setzero_ps _mm256_setzero_ps
|
||||
#define _simd_set1_ps _mm256_set1_ps
|
||||
#define _simd_blend_ps _mm256_blend_ps
|
||||
#define _simd_blendv_ps _mm256_blendv_ps
|
||||
#define _simd_store_ps _mm256_store_ps
|
||||
#define _simd_mul_ps _mm256_mul_ps
|
||||
#define _simd_add_ps _mm256_add_ps
|
||||
#define _simd_sub_ps _mm256_sub_ps
|
||||
#define _simd_rsqrt_ps _mm256_rsqrt_ps
|
||||
#define _simd_min_ps _mm256_min_ps
|
||||
#define _simd_max_ps _mm256_max_ps
|
||||
#define _simd_movemask_ps _mm256_movemask_ps
|
||||
#define _simd_cvtps_epi32 _mm256_cvtps_epi32
|
||||
#define _simd_cvttps_epi32 _mm256_cvttps_epi32
|
||||
#define _simd_cvtepi32_ps _mm256_cvtepi32_ps
|
||||
#define _simd_cmplt_ps(a, b) _mm256_cmp_ps(a, b, _CMP_LT_OQ)
|
||||
#define _simd_cmpgt_ps(a, b) _mm256_cmp_ps(a, b, _CMP_GT_OQ)
|
||||
#define _simd_cmpneq_ps(a, b) _mm256_cmp_ps(a, b, _CMP_NEQ_OQ)
|
||||
#define _simd_cmpeq_ps(a, b) _mm256_cmp_ps(a, b, _CMP_EQ_OQ)
|
||||
#define _simd_cmpge_ps(a, b) _mm256_cmp_ps(a, b, _CMP_GE_OQ)
|
||||
#define _simd_cmple_ps(a, b) _mm256_cmp_ps(a, b, _CMP_LE_OQ)
|
||||
#define _simd_cmp_ps(a, b, imm) _mm256_cmp_ps(a, b, imm)
|
||||
#define _simd_and_ps _mm256_and_ps
|
||||
#define _simd_or_ps _mm256_or_ps
|
||||
|
||||
#define _simd_rcp_ps _mm256_rcp_ps
|
||||
#define _simd_div_ps _mm256_div_ps
|
||||
#define _simd_castsi_ps _mm256_castsi256_ps
|
||||
#define _simd_andnot_ps _mm256_andnot_ps
|
||||
#define _simd_round_ps _mm256_round_ps
|
||||
#define _simd_castpd_ps _mm256_castpd_ps
|
||||
#define _simd_broadcast_ps(a) _mm256_broadcast_ps((const __m128*)(a))
|
||||
|
||||
#define _simd_load_sd _mm256_load_sd
|
||||
#define _simd_movemask_pd _mm256_movemask_pd
|
||||
#define _simd_castsi_pd _mm256_castsi256_pd
|
||||
|
||||
// emulated integer simd
|
||||
#define SIMD_EMU_EPI(func, intrin) \
|
||||
INLINE \
|
||||
__m256i func(__m256i a, __m256i b)\
|
||||
{\
|
||||
__m128i aHi = _mm256_extractf128_si256(a, 1);\
|
||||
__m128i bHi = _mm256_extractf128_si256(b, 1);\
|
||||
__m128i aLo = _mm256_castsi256_si128(a);\
|
||||
__m128i bLo = _mm256_castsi256_si128(b);\
|
||||
\
|
||||
__m128i subLo = intrin(aLo, bLo);\
|
||||
__m128i subHi = intrin(aHi, bHi);\
|
||||
\
|
||||
__m256i result = _mm256_castsi128_si256(subLo);\
|
||||
result = _mm256_insertf128_si256(result, subHi, 1);\
|
||||
\
|
||||
return result;\
|
||||
}
|
||||
|
||||
#if (KNOB_ARCH == KNOB_ARCH_AVX)
|
||||
#define _simd_mul_epi32 _simdemu_mul_epi32
|
||||
#define _simd_mullo_epi32 _simdemu_mullo_epi32
|
||||
#define _simd_sub_epi32 _simdemu_sub_epi32
|
||||
#define _simd_sub_epi64 _simdemu_sub_epi64
|
||||
#define _simd_min_epi32 _simdemu_min_epi32
|
||||
#define _simd_min_epu32 _simdemu_min_epu32
|
||||
#define _simd_max_epi32 _simdemu_max_epi32
|
||||
#define _simd_max_epu32 _simdemu_max_epu32
|
||||
#define _simd_add_epi32 _simdemu_add_epi32
|
||||
#define _simd_and_si _simdemu_and_si
|
||||
#define _simd_andnot_si _simdemu_andnot_si
|
||||
#define _simd_cmpeq_epi32 _simdemu_cmpeq_epi32
|
||||
#define _simd_cmplt_epi32 _simdemu_cmplt_epi32
|
||||
#define _simd_cmpgt_epi32 _simdemu_cmpgt_epi32
|
||||
#define _simd_or_si _simdemu_or_si
|
||||
#define _simd_castps_si _mm256_castps_si256
|
||||
#define _simd_adds_epu8 _simdemu_adds_epu8
|
||||
#define _simd_subs_epu8 _simdemu_subs_epu8
|
||||
#define _simd_add_epi8 _simdemu_add_epi8
|
||||
#define _simd_cmpeq_epi64 _simdemu_cmpeq_epi64
|
||||
#define _simd_cmpgt_epi64 _simdemu_cmpgt_epi64
|
||||
#define _simd_movemask_epi8 _simdemu_movemask_epi8
|
||||
|
||||
SIMD_EMU_EPI(_simdemu_mul_epi32, _mm_mul_epi32)
|
||||
SIMD_EMU_EPI(_simdemu_mullo_epi32, _mm_mullo_epi32)
|
||||
SIMD_EMU_EPI(_simdemu_sub_epi32, _mm_sub_epi32)
|
||||
SIMD_EMU_EPI(_simdemu_sub_epi64, _mm_sub_epi64)
|
||||
SIMD_EMU_EPI(_simdemu_min_epi32, _mm_min_epi32)
|
||||
SIMD_EMU_EPI(_simdemu_min_epu32, _mm_min_epu32)
|
||||
SIMD_EMU_EPI(_simdemu_max_epi32, _mm_max_epi32)
|
||||
SIMD_EMU_EPI(_simdemu_max_epu32, _mm_max_epu32)
|
||||
SIMD_EMU_EPI(_simdemu_add_epi32, _mm_add_epi32)
|
||||
SIMD_EMU_EPI(_simdemu_and_si, _mm_and_si128)
|
||||
SIMD_EMU_EPI(_simdemu_andnot_si, _mm_andnot_si128)
|
||||
SIMD_EMU_EPI(_simdemu_cmpeq_epi32, _mm_cmpeq_epi32)
|
||||
SIMD_EMU_EPI(_simdemu_cmplt_epi32, _mm_cmplt_epi32)
|
||||
SIMD_EMU_EPI(_simdemu_cmpgt_epi32, _mm_cmpgt_epi32)
|
||||
SIMD_EMU_EPI(_simdemu_or_si, _mm_or_si128)
|
||||
SIMD_EMU_EPI(_simdemu_adds_epu8, _mm_adds_epu8)
|
||||
SIMD_EMU_EPI(_simdemu_subs_epu8, _mm_subs_epu8)
|
||||
SIMD_EMU_EPI(_simdemu_add_epi8, _mm_add_epi8)
|
||||
SIMD_EMU_EPI(_simdemu_cmpeq_epi64, _mm_cmpeq_epi64)
|
||||
SIMD_EMU_EPI(_simdemu_cmpgt_epi64, _mm_cmpgt_epi64)
|
||||
|
||||
#define _simd_unpacklo_epi32(a, b) _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)))
|
||||
#define _simd_unpackhi_epi32(a, b) _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)))
|
||||
|
||||
#define _simd_slli_epi32(a,i) _simdemu_slli_epi32(a,i)
|
||||
#define _simd_srai_epi32(a,i) _simdemu_srai_epi32(a,i)
|
||||
#define _simd_srli_epi32(a,i) _simdemu_srli_epi32(a,i)
|
||||
#define _simd_srlisi_ps(a,i) _mm256_castsi256_ps(_simdemu_srli_si128<i>(_mm256_castps_si256(a)))
|
||||
|
||||
#define _simd128_fmadd_ps _mm_fmaddemu_ps
|
||||
#define _simd_fmadd_ps _mm_fmaddemu256_ps
|
||||
#define _simd_fmsub_ps _mm_fmsubemu256_ps
|
||||
#define _simd_shuffle_epi8 _simdemu_shuffle_epi8
|
||||
SIMD_EMU_EPI(_simdemu_shuffle_epi8, _mm_shuffle_epi8)
|
||||
|
||||
INLINE
|
||||
__m128 _mm_fmaddemu_ps(__m128 a, __m128 b, __m128 c)
|
||||
{
|
||||
__m128 res = _mm_mul_ps(a, b);
|
||||
res = _mm_add_ps(res, c);
|
||||
return res;
|
||||
}
|
||||
|
||||
INLINE
|
||||
__m256 _mm_fmaddemu256_ps(__m256 a, __m256 b, __m256 c)
|
||||
{
|
||||
__m256 res = _mm256_mul_ps(a, b);
|
||||
res = _mm256_add_ps(res, c);
|
||||
return res;
|
||||
}
|
||||
|
||||
INLINE
|
||||
__m256 _mm_fmsubemu256_ps(__m256 a, __m256 b, __m256 c)
|
||||
{
|
||||
__m256 res = _mm256_mul_ps(a, b);
|
||||
res = _mm256_sub_ps(res, c);
|
||||
return res;
|
||||
}
|
||||
|
||||
INLINE
|
||||
__m256 _simd_i32gather_ps(const float* pBase, __m256i vOffsets, const int scale)
|
||||
{
|
||||
uint32_t *pOffsets = (uint32_t*)&vOffsets;
|
||||
simdscalar vResult;
|
||||
float* pResult = (float*)&vResult;
|
||||
for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
|
||||
{
|
||||
uint32_t offset = pOffsets[i];
|
||||
offset = offset * scale;
|
||||
pResult[i] = *(float*)(((const uint8_t*)pBase + offset));
|
||||
}
|
||||
|
||||
return vResult;
|
||||
}
|
||||
|
||||
INLINE
|
||||
__m256 _simd_mask_i32gather_ps(__m256 vSrc, const float* pBase, __m256i vOffsets, __m256 vMask, const int scale)
|
||||
{
|
||||
uint32_t *pOffsets = (uint32_t*)&vOffsets;
|
||||
simdscalar vResult = vSrc;
|
||||
float* pResult = (float*)&vResult;
|
||||
DWORD index;
|
||||
uint32_t mask = _simd_movemask_ps(vMask);
|
||||
while (_BitScanForward(&index, mask))
|
||||
{
|
||||
mask &= ~(1 << index);
|
||||
uint32_t offset = pOffsets[index];
|
||||
offset = offset * scale;
|
||||
pResult[index] = *(float*)(((const uint8_t*)pBase + offset));
|
||||
}
|
||||
|
||||
return vResult;
|
||||
}
|
||||
|
||||
INLINE
|
||||
__m256i _simd_abs_epi32(__m256i a)
|
||||
{
|
||||
__m128i aHi = _mm256_extractf128_si256(a, 1);
|
||||
__m128i aLo = _mm256_castsi256_si128(a);
|
||||
__m128i absLo = _mm_abs_epi32(aLo);
|
||||
__m128i absHi = _mm_abs_epi32(aHi);
|
||||
__m256i result = _mm256_castsi128_si256(absLo);
|
||||
result = _mm256_insertf128_si256(result, absHi, 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
INLINE
|
||||
int _simdemu_movemask_epi8(__m256i a)
|
||||
{
|
||||
__m128i aHi = _mm256_extractf128_si256(a, 1);
|
||||
__m128i aLo = _mm256_castsi256_si128(a);
|
||||
|
||||
int resHi = _mm_movemask_epi8(aHi);
|
||||
int resLo = _mm_movemask_epi8(aLo);
|
||||
|
||||
return (resHi << 16) | resLo;
|
||||
}
|
||||
#else
|
||||
|
||||
#define _simd_mul_epi32 _mm256_mul_epi32
|
||||
#define _simd_mullo_epi32 _mm256_mullo_epi32
|
||||
#define _simd_sub_epi32 _mm256_sub_epi32
|
||||
#define _simd_sub_epi64 _mm256_sub_epi64
|
||||
#define _simd_min_epi32 _mm256_min_epi32
|
||||
#define _simd_max_epi32 _mm256_max_epi32
|
||||
#define _simd_min_epu32 _mm256_min_epu32
|
||||
#define _simd_max_epu32 _mm256_max_epu32
|
||||
#define _simd_add_epi32 _mm256_add_epi32
|
||||
#define _simd_and_si _mm256_and_si256
|
||||
#define _simd_andnot_si _mm256_andnot_si256
|
||||
#define _simd_cmpeq_epi32 _mm256_cmpeq_epi32
|
||||
#define _simd_cmplt_epi32(a,b) _mm256_cmpgt_epi32(b,a)
|
||||
#define _simd_cmpgt_epi32(a,b) _mm256_cmpgt_epi32(a,b)
|
||||
#define _simd_or_si _mm256_or_si256
|
||||
#define _simd_castps_si _mm256_castps_si256
|
||||
|
||||
#define _simd_unpacklo_epi32 _mm256_unpacklo_epi32
|
||||
#define _simd_unpackhi_epi32 _mm256_unpackhi_epi32
|
||||
|
||||
#define _simd_srli_si(a,i) _simdemu_srli_si128<i>(a)
|
||||
#define _simd_slli_epi32 _mm256_slli_epi32
|
||||
#define _simd_srai_epi32 _mm256_srai_epi32
|
||||
#define _simd_srli_epi32 _mm256_srli_epi32
|
||||
#define _simd_srlisi_ps(a,i) _mm256_castsi256_ps(_simdemu_srli_si128<i>(_mm256_castps_si256(a)))
|
||||
#define _simd128_fmadd_ps _mm_fmadd_ps
|
||||
#define _simd_fmadd_ps _mm256_fmadd_ps
|
||||
#define _simd_fmsub_ps _mm256_fmsub_ps
|
||||
#define _simd_shuffle_epi8 _mm256_shuffle_epi8
|
||||
#define _simd_adds_epu8 _mm256_adds_epu8
|
||||
#define _simd_subs_epu8 _mm256_subs_epu8
|
||||
#define _simd_add_epi8 _mm256_add_epi8
|
||||
#define _simd_i32gather_ps _mm256_i32gather_ps
|
||||
#define _simd_mask_i32gather_ps _mm256_mask_i32gather_ps
|
||||
#define _simd_abs_epi32 _mm256_abs_epi32
|
||||
|
||||
#define _simd_cmpeq_epi64 _mm256_cmpeq_epi64
|
||||
#define _simd_cmpgt_epi64 _mm256_cmpgt_epi64
|
||||
#define _simd_movemask_epi8 _mm256_movemask_epi8
|
||||
#endif
|
||||
|
||||
#define _simd_shuffleps_epi32(vA, vB, imm) _mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(vA), _mm256_castsi256_ps(vB), imm))
|
||||
#define _simd_shuffle_ps _mm256_shuffle_ps
|
||||
#define _simd_set1_epi32 _mm256_set1_epi32
|
||||
#define _simd_set1_epi8 _mm256_set1_epi8
|
||||
#define _simd_setzero_si _mm256_setzero_si256
|
||||
#define _simd_cvttps_epi32 _mm256_cvttps_epi32
|
||||
#define _simd_store_si _mm256_store_si256
|
||||
#define _simd_broadcast_ss _mm256_broadcast_ss
|
||||
#define _simd_maskstore_ps _mm256_maskstore_ps
|
||||
#define _simd_load_si _mm256_load_si256
|
||||
#define _simd_loadu_si _mm256_loadu_si256
|
||||
#define _simd_sub_ps _mm256_sub_ps
|
||||
#define _simd_testz_ps _mm256_testz_ps
|
||||
#define _simd_xor_ps _mm256_xor_ps
|
||||
|
||||
|
||||
INLINE
|
||||
simdscalari _simd_blendv_epi32(simdscalari a, simdscalari b, simdscalar mask)
|
||||
{
|
||||
return _simd_castps_si(_simd_blendv_ps(_simd_castsi_ps(a), _simd_castsi_ps(b), mask));
|
||||
}
|
||||
|
||||
// convert bitmask to vector mask
|
||||
INLINE
|
||||
simdscalar vMask(int32_t mask)
|
||||
{
|
||||
__m256i vec = _mm256_set1_epi32(mask);
|
||||
const __m256i bit = _mm256_set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
|
||||
vec = _simd_and_si(vec, bit);
|
||||
vec = _simd_cmplt_epi32(_mm256_setzero_si256(), vec);
|
||||
return _simd_castsi_ps(vec);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simd_mov(simdscalar &r, unsigned int rlane, simdscalar& s, unsigned int slane)
|
||||
{
|
||||
OSALIGNSIMD(float) rArray[KNOB_SIMD_WIDTH], sArray[KNOB_SIMD_WIDTH];
|
||||
_mm256_store_ps(rArray, r);
|
||||
_mm256_store_ps(sArray, s);
|
||||
rArray[rlane] = sArray[slane];
|
||||
r = _mm256_load_ps(rArray);
|
||||
}
|
||||
|
||||
INLINE __m256i _simdemu_slli_epi32(__m256i a, uint32_t i)
|
||||
{
|
||||
__m128i aHi = _mm256_extractf128_si256(a, 1);
|
||||
__m128i aLo = _mm256_castsi256_si128(a);
|
||||
|
||||
__m128i resHi = _mm_slli_epi32(aHi, i);
|
||||
__m128i resLo = _mm_slli_epi32(aLo, i);
|
||||
|
||||
__m256i result = _mm256_castsi128_si256(resLo);
|
||||
result = _mm256_insertf128_si256(result, resHi, 1);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
INLINE __m256i _simdemu_srai_epi32(__m256i a, uint32_t i)
|
||||
{
|
||||
__m128i aHi = _mm256_extractf128_si256(a, 1);
|
||||
__m128i aLo = _mm256_castsi256_si128(a);
|
||||
|
||||
__m128i resHi = _mm_srai_epi32(aHi, i);
|
||||
__m128i resLo = _mm_srai_epi32(aLo, i);
|
||||
|
||||
__m256i result = _mm256_castsi128_si256(resLo);
|
||||
result = _mm256_insertf128_si256(result, resHi, 1);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
INLINE __m256i _simdemu_srli_epi32(__m256i a, uint32_t i)
|
||||
{
|
||||
__m128i aHi = _mm256_extractf128_si256(a, 1);
|
||||
__m128i aLo = _mm256_castsi256_si128(a);
|
||||
|
||||
__m128i resHi = _mm_srli_epi32(aHi, i);
|
||||
__m128i resLo = _mm_srli_epi32(aLo, i);
|
||||
|
||||
__m256i result = _mm256_castsi128_si256(resLo);
|
||||
result = _mm256_insertf128_si256(result, resHi, 1);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simdvec_transpose(simdvector &v)
|
||||
{
|
||||
SWR_ASSERT(false, "Need to implement 8 wide version");
|
||||
}
|
||||
|
||||
#else
|
||||
#error Unsupported vector width
|
||||
#endif
|
||||
|
||||
// Populates a simdvector from a vector. So p = xyzw becomes xxxx yyyy zzzz wwww.
|
||||
INLINE
|
||||
void _simdvec_load_ps(simdvector& r, const float *p)
|
||||
{
|
||||
r[0] = _simd_set1_ps(p[0]);
|
||||
r[1] = _simd_set1_ps(p[1]);
|
||||
r[2] = _simd_set1_ps(p[2]);
|
||||
r[3] = _simd_set1_ps(p[3]);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simdvec_mov(simdvector& r, const simdscalar& s)
|
||||
{
|
||||
r[0] = s;
|
||||
r[1] = s;
|
||||
r[2] = s;
|
||||
r[3] = s;
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simdvec_mov(simdvector& r, const simdvector& v)
|
||||
{
|
||||
r[0] = v[0];
|
||||
r[1] = v[1];
|
||||
r[2] = v[2];
|
||||
r[3] = v[3];
|
||||
}
|
||||
|
||||
// just move a lane from the source simdvector to dest simdvector
|
||||
INLINE
|
||||
void _simdvec_mov(simdvector &r, unsigned int rlane, simdvector& s, unsigned int slane)
|
||||
{
|
||||
_simd_mov(r[0], rlane, s[0], slane);
|
||||
_simd_mov(r[1], rlane, s[1], slane);
|
||||
_simd_mov(r[2], rlane, s[2], slane);
|
||||
_simd_mov(r[3], rlane, s[3], slane);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simdvec_dp3_ps(simdscalar& r, const simdvector& v0, const simdvector& v1)
|
||||
{
|
||||
simdscalar tmp;
|
||||
r = _simd_mul_ps(v0[0], v1[0]); // (v0.x*v1.x)
|
||||
|
||||
tmp = _simd_mul_ps(v0[1], v1[1]); // (v0.y*v1.y)
|
||||
r = _simd_add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y)
|
||||
|
||||
tmp = _simd_mul_ps(v0[2], v1[2]); // (v0.z*v1.z)
|
||||
r = _simd_add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simdvec_dp4_ps(simdscalar& r, const simdvector& v0, const simdvector& v1)
|
||||
{
|
||||
simdscalar tmp;
|
||||
r = _simd_mul_ps(v0[0], v1[0]); // (v0.x*v1.x)
|
||||
|
||||
tmp = _simd_mul_ps(v0[1], v1[1]); // (v0.y*v1.y)
|
||||
r = _simd_add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y)
|
||||
|
||||
tmp = _simd_mul_ps(v0[2], v1[2]); // (v0.z*v1.z)
|
||||
r = _simd_add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
|
||||
|
||||
tmp = _simd_mul_ps(v0[3], v1[3]); // (v0.w*v1.w)
|
||||
r = _simd_add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
|
||||
}
|
||||
|
||||
INLINE
|
||||
simdscalar _simdvec_rcp_length_ps(const simdvector& v)
|
||||
{
|
||||
simdscalar length;
|
||||
_simdvec_dp4_ps(length, v, v);
|
||||
return _simd_rsqrt_ps(length);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simdvec_normalize_ps(simdvector& r, const simdvector& v)
|
||||
{
|
||||
simdscalar vecLength;
|
||||
vecLength = _simdvec_rcp_length_ps(v);
|
||||
|
||||
r[0] = _simd_mul_ps(v[0], vecLength);
|
||||
r[1] = _simd_mul_ps(v[1], vecLength);
|
||||
r[2] = _simd_mul_ps(v[2], vecLength);
|
||||
r[3] = _simd_mul_ps(v[3], vecLength);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simdvec_mul_ps(simdvector& r, const simdvector& v, const simdscalar& s)
|
||||
{
|
||||
r[0] = _simd_mul_ps(v[0], s);
|
||||
r[1] = _simd_mul_ps(v[1], s);
|
||||
r[2] = _simd_mul_ps(v[2], s);
|
||||
r[3] = _simd_mul_ps(v[3], s);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simdvec_mul_ps(simdvector& r, const simdvector& v0, const simdvector& v1)
|
||||
{
|
||||
r[0] = _simd_mul_ps(v0[0], v1[0]);
|
||||
r[1] = _simd_mul_ps(v0[1], v1[1]);
|
||||
r[2] = _simd_mul_ps(v0[2], v1[2]);
|
||||
r[3] = _simd_mul_ps(v0[3], v1[3]);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simdvec_add_ps(simdvector& r, const simdvector& v0, const simdvector& v1)
|
||||
{
|
||||
r[0] = _simd_add_ps(v0[0], v1[0]);
|
||||
r[1] = _simd_add_ps(v0[1], v1[1]);
|
||||
r[2] = _simd_add_ps(v0[2], v1[2]);
|
||||
r[3] = _simd_add_ps(v0[3], v1[3]);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simdvec_min_ps(simdvector& r, const simdvector& v0, const simdscalar& s)
|
||||
{
|
||||
r[0] = _simd_min_ps(v0[0], s);
|
||||
r[1] = _simd_min_ps(v0[1], s);
|
||||
r[2] = _simd_min_ps(v0[2], s);
|
||||
r[3] = _simd_min_ps(v0[3], s);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simdvec_max_ps(simdvector& r, const simdvector& v0, const simdscalar& s)
|
||||
{
|
||||
r[0] = _simd_max_ps(v0[0], s);
|
||||
r[1] = _simd_max_ps(v0[1], s);
|
||||
r[2] = _simd_max_ps(v0[2], s);
|
||||
r[3] = _simd_max_ps(v0[3], s);
|
||||
}
|
||||
|
||||
// Matrix4x4 * Vector4
|
||||
// outVec.x = (m00 * v.x) + (m01 * v.y) + (m02 * v.z) + (m03 * v.w)
|
||||
// outVec.y = (m10 * v.x) + (m11 * v.y) + (m12 * v.z) + (m13 * v.w)
|
||||
// outVec.z = (m20 * v.x) + (m21 * v.y) + (m22 * v.z) + (m23 * v.w)
|
||||
// outVec.w = (m30 * v.x) + (m31 * v.y) + (m32 * v.z) + (m33 * v.w)
|
||||
INLINE
|
||||
void _simd_mat4x4_vec4_multiply(
|
||||
simdvector& result,
|
||||
const float *pMatrix,
|
||||
const simdvector& v)
|
||||
{
|
||||
simdscalar m;
|
||||
simdscalar r0;
|
||||
simdscalar r1;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 3); // m[row][3]
|
||||
r1 = _simd_mul_ps(m, v[3]); // (m3 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
|
||||
result[0] = r0;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 3); // m[row][3]
|
||||
r1 = _simd_mul_ps(m, v[3]); // (m3 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
|
||||
result[1] = r0;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 3); // m[row][3]
|
||||
r1 = _simd_mul_ps(m, v[3]); // (m3 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
|
||||
result[2] = r0;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 3*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 3*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 3*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
m = _simd_load1_ps(pMatrix + 3*4 + 3); // m[row][3]
|
||||
r1 = _simd_mul_ps(m, v[3]); // (m3 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
|
||||
result[3] = r0;
|
||||
}
|
||||
|
||||
// Matrix4x4 * Vector3 - Direction Vector where w = 0.
|
||||
// outVec.x = (m00 * v.x) + (m01 * v.y) + (m02 * v.z) + (m03 * 0)
|
||||
// outVec.y = (m10 * v.x) + (m11 * v.y) + (m12 * v.z) + (m13 * 0)
|
||||
// outVec.z = (m20 * v.x) + (m21 * v.y) + (m22 * v.z) + (m23 * 0)
|
||||
// outVec.w = (m30 * v.x) + (m31 * v.y) + (m32 * v.z) + (m33 * 0)
|
||||
INLINE
|
||||
void _simd_mat3x3_vec3_w0_multiply(
|
||||
simdvector& result,
|
||||
const float *pMatrix,
|
||||
const simdvector& v)
|
||||
{
|
||||
simdscalar m;
|
||||
simdscalar r0;
|
||||
simdscalar r1;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
result[0] = r0;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
result[1] = r0;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
result[2] = r0;
|
||||
|
||||
result[3] = _simd_setzero_ps();
|
||||
}
|
||||
|
||||
// Matrix4x4 * Vector3 - Position vector where w = 1.
|
||||
// outVec.x = (m00 * v.x) + (m01 * v.y) + (m02 * v.z) + (m03 * 1)
|
||||
// outVec.y = (m10 * v.x) + (m11 * v.y) + (m12 * v.z) + (m13 * 1)
|
||||
// outVec.z = (m20 * v.x) + (m21 * v.y) + (m22 * v.z) + (m23 * 1)
|
||||
// outVec.w = (m30 * v.x) + (m31 * v.y) + (m32 * v.z) + (m33 * 1)
|
||||
INLINE
|
||||
void _simd_mat4x4_vec3_w1_multiply(
|
||||
simdvector& result,
|
||||
const float *pMatrix,
|
||||
const simdvector& v)
|
||||
{
|
||||
simdscalar m;
|
||||
simdscalar r0;
|
||||
simdscalar r1;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 3); // m[row][3]
|
||||
r0 = _simd_add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
|
||||
result[0] = r0;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 3); // m[row][3]
|
||||
r0 = _simd_add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
|
||||
result[1] = r0;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 3); // m[row][3]
|
||||
r0 = _simd_add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
|
||||
result[2] = r0;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 3*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 3*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 3*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
m = _simd_load1_ps(pMatrix + 3*4 + 3); // m[row][3]
|
||||
result[3] = _simd_add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
|
||||
}
|
||||
|
||||
INLINE
|
||||
void _simd_mat4x3_vec3_w1_multiply(
|
||||
simdvector& result,
|
||||
const float *pMatrix,
|
||||
const simdvector& v)
|
||||
{
|
||||
simdscalar m;
|
||||
simdscalar r0;
|
||||
simdscalar r1;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
m = _simd_load1_ps(pMatrix + 0*4 + 3); // m[row][3]
|
||||
r0 = _simd_add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
|
||||
result[0] = r0;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
m = _simd_load1_ps(pMatrix + 1*4 + 3); // m[row][3]
|
||||
r0 = _simd_add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
|
||||
result[1] = r0;
|
||||
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 0); // m[row][0]
|
||||
r0 = _simd_mul_ps(m, v[0]); // (m00 * v.x)
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 1); // m[row][1]
|
||||
r1 = _simd_mul_ps(m, v[1]); // (m1 * v.y)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 2); // m[row][2]
|
||||
r1 = _simd_mul_ps(m, v[2]); // (m2 * v.z)
|
||||
r0 = _simd_add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
|
||||
m = _simd_load1_ps(pMatrix + 2*4 + 3); // m[row][3]
|
||||
r0 = _simd_add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
|
||||
result[2] = r0;
|
||||
result[3] = _simd_set1_ps(1.0f);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Compute plane equation vA * vX + vB * vY + vC
|
||||
INLINE simdscalar vplaneps(simdscalar vA, simdscalar vB, simdscalar vC, simdscalar &vX, simdscalar &vY)
|
||||
{
|
||||
simdscalar vOut = _simd_fmadd_ps(vA, vX, vC);
|
||||
vOut = _simd_fmadd_ps(vB, vY, vOut);
|
||||
return vOut;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Interpolates a single component.
|
||||
/// @param vI - barycentric I
|
||||
/// @param vJ - barycentric J
|
||||
/// @param pInterpBuffer - pointer to attribute barycentric coeffs
|
||||
template<UINT Attrib, UINT Comp>
|
||||
static INLINE simdscalar InterpolateComponent(simdscalar vI, simdscalar vJ, const float *pInterpBuffer)
|
||||
{
|
||||
const float *pInterpA = &pInterpBuffer[Attrib * 12 + 0 + Comp];
|
||||
const float *pInterpB = &pInterpBuffer[Attrib * 12 + 4 + Comp];
|
||||
const float *pInterpC = &pInterpBuffer[Attrib * 12 + 8 + Comp];
|
||||
|
||||
simdscalar vA = _simd_broadcast_ss(pInterpA);
|
||||
simdscalar vB = _simd_broadcast_ss(pInterpB);
|
||||
simdscalar vC = _simd_broadcast_ss(pInterpC);
|
||||
|
||||
simdscalar vk = _simd_sub_ps(_simd_sub_ps(_simd_set1_ps(1.0f), vI), vJ);
|
||||
vC = _simd_mul_ps(vk, vC);
|
||||
|
||||
return vplaneps(vA, vB, vC, vI, vJ);
|
||||
}
|
||||
|
||||
|
||||
#endif//__SWR_SIMDINTRIN_H__
|
|
@ -0,0 +1,238 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
#include "common/os.h"
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
|
||||
#if SWR_ENABLE_ASSERTS || SWR_ENABLE_REL_ASSERTS
|
||||
|
||||
#if defined(_WIN32)
|
||||
#pragma comment(lib, "user32.lib")
|
||||
#endif // _WIN32
|
||||
|
||||
enum TextColor
|
||||
{
|
||||
TEXT_BLACK = 0,
|
||||
TEXT_RED = 1,
|
||||
TEXT_GREEN = 2,
|
||||
TEXT_BLUE = 4,
|
||||
TEXT_PURPLE = TEXT_RED | TEXT_BLUE,
|
||||
TEXT_CYAN = TEXT_GREEN | TEXT_BLUE,
|
||||
TEXT_YELLOW = TEXT_RED | TEXT_GREEN,
|
||||
TEXT_WHITE = TEXT_RED | TEXT_GREEN | TEXT_BLUE,
|
||||
};
|
||||
|
||||
enum TextStyle
|
||||
{
|
||||
TEXT_NORMAL = 0,
|
||||
TEXT_INTENSITY = 1,
|
||||
};
|
||||
|
||||
void SetTextColor(FILE* stream, TextColor color = TEXT_WHITE, TextStyle style = TEXT_NORMAL)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
|
||||
HANDLE hConsoleHandle = nullptr;
|
||||
if (stream == stderr)
|
||||
{
|
||||
hConsoleHandle = GetStdHandle(STD_ERROR_HANDLE);
|
||||
}
|
||||
else if (stream == stdout)
|
||||
{
|
||||
hConsoleHandle = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not a console stream, do nothing
|
||||
return;
|
||||
}
|
||||
|
||||
WORD textAttributes = 0;
|
||||
if (color & TEXT_RED)
|
||||
{
|
||||
textAttributes |= FOREGROUND_RED;
|
||||
}
|
||||
if (color & TEXT_GREEN)
|
||||
{
|
||||
textAttributes |= FOREGROUND_GREEN;
|
||||
}
|
||||
if (color & TEXT_BLUE)
|
||||
{
|
||||
textAttributes |= FOREGROUND_BLUE;
|
||||
}
|
||||
if (style & TEXT_INTENSITY)
|
||||
{
|
||||
textAttributes |= FOREGROUND_INTENSITY;
|
||||
}
|
||||
SetConsoleTextAttribute(hConsoleHandle, textAttributes);
|
||||
|
||||
#else // !_WIN32
|
||||
|
||||
// Print ANSI codes
|
||||
uint32_t cc = 30 + (style ? 60 : 0) + color;
|
||||
fprintf(stream, "\033[0m\033[%d;%dm", style, cc);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void ResetTextColor(FILE* stream)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
|
||||
SetTextColor(stream);
|
||||
|
||||
#else // !_WIN32
|
||||
|
||||
// Print ANSI codes
|
||||
fprintf(stream, "\033[0m");
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
bool SwrAssert(
|
||||
bool chkDebugger,
|
||||
bool& enabled,
|
||||
const char* pExpression,
|
||||
const char* pFileName,
|
||||
uint32_t lineNum,
|
||||
const char* pFunction,
|
||||
const char* pFmtString /* = nullptr */,
|
||||
...)
|
||||
{
|
||||
if (!enabled) return false;
|
||||
|
||||
SetTextColor(stderr, TEXT_CYAN, TEXT_NORMAL);
|
||||
|
||||
fprintf(stderr, "%s(%d): ", pFileName, lineNum);
|
||||
|
||||
SetTextColor(stderr, TEXT_RED, TEXT_INTENSITY);
|
||||
|
||||
fprintf(stderr, "ASSERT: %s\n", pExpression);
|
||||
|
||||
SetTextColor(stderr, TEXT_CYAN, TEXT_INTENSITY);
|
||||
fprintf(stderr, "\t%s\n", pFunction);
|
||||
|
||||
if (pFmtString)
|
||||
{
|
||||
SetTextColor(stderr, TEXT_YELLOW, TEXT_INTENSITY);
|
||||
fprintf(stderr, "\t");
|
||||
va_list args;
|
||||
va_start(args, pFmtString);
|
||||
vfprintf(stderr, pFmtString, args);
|
||||
va_end(args);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
ResetTextColor(stderr);
|
||||
fflush(stderr);
|
||||
|
||||
#if defined(_WIN32)
|
||||
static const int MAX_MESSAGE_LEN = 2048;
|
||||
char msgBuf[MAX_MESSAGE_LEN];
|
||||
|
||||
sprintf_s(msgBuf, "%s(%d): ASSERT: %s\n", pFileName, lineNum, pExpression);
|
||||
msgBuf[MAX_MESSAGE_LEN - 2] = '\n';
|
||||
msgBuf[MAX_MESSAGE_LEN - 1] = 0;
|
||||
OutputDebugStringA(msgBuf);
|
||||
|
||||
sprintf_s(msgBuf, "\t%s\n", pFunction);
|
||||
msgBuf[MAX_MESSAGE_LEN - 2] = '\n';
|
||||
msgBuf[MAX_MESSAGE_LEN - 1] = 0;
|
||||
OutputDebugStringA(msgBuf);
|
||||
|
||||
int offset = 0;
|
||||
|
||||
if (pFmtString)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, pFmtString);
|
||||
offset = _vsnprintf_s(
|
||||
msgBuf,
|
||||
sizeof(msgBuf),
|
||||
sizeof(msgBuf),
|
||||
pFmtString,
|
||||
args);
|
||||
va_end(args);
|
||||
|
||||
if (offset < 0) { return true; }
|
||||
|
||||
OutputDebugStringA("\t");
|
||||
OutputDebugStringA(msgBuf);
|
||||
OutputDebugStringA("\n");
|
||||
}
|
||||
|
||||
if (KNOB_ENABLE_ASSERT_DIALOGS)
|
||||
{
|
||||
int retval = sprintf_s(
|
||||
&msgBuf[offset],
|
||||
MAX_MESSAGE_LEN - offset,
|
||||
"\n\n"
|
||||
"File: %s\n"
|
||||
"Line: %d\n"
|
||||
"\n"
|
||||
"Expression: %s\n\n"
|
||||
"Cancel: Disable this assert for the remainder of the process\n"
|
||||
"Try Again: Break into the debugger\n"
|
||||
"Continue: Continue execution (but leave assert enabled)",
|
||||
pFileName,
|
||||
lineNum,
|
||||
pExpression);
|
||||
|
||||
if (retval < 0) { return true; }
|
||||
|
||||
offset += retval;
|
||||
|
||||
if (!IsDebuggerPresent())
|
||||
{
|
||||
sprintf_s(
|
||||
&msgBuf[offset],
|
||||
MAX_MESSAGE_LEN - offset,
|
||||
"\n\n*** NO DEBUGGER DETECTED ***\n\nPressing \"Try Again\" will cause a program crash!");
|
||||
}
|
||||
|
||||
retval = MessageBoxA(nullptr, msgBuf, "Assert Failed", MB_CANCELTRYCONTINUE | MB_ICONEXCLAMATION);
|
||||
|
||||
switch (retval)
|
||||
{
|
||||
case IDCANCEL:
|
||||
enabled = false;
|
||||
return false;
|
||||
|
||||
case IDTRYAGAIN:
|
||||
return true;
|
||||
|
||||
case IDCONTINUE:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return IsDebuggerPresent() || !chkDebugger;
|
||||
}
|
||||
#endif // _WIN32
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif // SWR_ENABLE_ASSERTS
|
|
@ -0,0 +1,109 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef __SWR_ASSERT_H__
|
||||
#define __SWR_ASSERT_H__
|
||||
|
||||
#if !defined(__SWR_OS_H__)
|
||||
#error swr_assert.h should not be included directly, please include "common/os.h" instead.
|
||||
#endif
|
||||
|
||||
#if !defined(SWR_ENABLE_ASSERTS)
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
#define SWR_ENABLE_ASSERTS 1
|
||||
#else
|
||||
#define SWR_ENABLE_ASSERTS 0
|
||||
#endif // _DEBUG
|
||||
|
||||
#endif // SWR_ENABLE_ASSERTS
|
||||
|
||||
#if !defined(SWR_ENABLE_REL_ASSERTS)
|
||||
#define SWR_ENABLE_REL_ASSERTS 1
|
||||
#endif
|
||||
|
||||
#if SWR_ENABLE_ASSERTS || SWR_ENABLE_REL_ASSERTS
|
||||
#include "assert.h"
|
||||
|
||||
#if !defined(__cplusplus)
|
||||
|
||||
#pragma message("C++ is required for SWR Asserts, falling back to assert.h")
|
||||
|
||||
#if SWR_ENABLE_ASSERTS
|
||||
#define SWR_ASSERT(e, ...) assert(e)
|
||||
#endif
|
||||
|
||||
#if SWR_ENABLE_REL_ASSERTS
|
||||
#define SWR_REL_ASSERT(e, ...) assert(e)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if SWR_ENABLE_ASSERTS
|
||||
#if defined(assert)
|
||||
#undef assert
|
||||
#endif
|
||||
#define assert(exp) SWR_ASSERT(exp)
|
||||
#endif
|
||||
|
||||
bool SwrAssert(
|
||||
bool chkDebugger,
|
||||
bool& enabled,
|
||||
const char* pExpression,
|
||||
const char* pFileName,
|
||||
uint32_t lineNum,
|
||||
const char* function,
|
||||
const char* pFmtString = nullptr,
|
||||
...);
|
||||
|
||||
#define _SWR_ASSERT(chkDebugger, e, ...) {\
|
||||
bool expFailed = !(e);\
|
||||
if (expFailed) {\
|
||||
static bool swrAssertEnabled = true;\
|
||||
expFailed = SwrAssert(chkDebugger, swrAssertEnabled, #e, __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__);\
|
||||
if (expFailed) { DEBUGBREAK; }\
|
||||
}\
|
||||
}
|
||||
|
||||
#if SWR_ENABLE_ASSERTS
|
||||
#define SWR_ASSERT(e, ...) _SWR_ASSERT(true, e, ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#if SWR_ENABLE_REL_ASSERTS
|
||||
#define SWR_REL_ASSERT(e, ...) _SWR_ASSERT(false, e, ##__VA_ARGS__)
|
||||
#endif
|
||||
#endif // C++
|
||||
|
||||
#endif // SWR_ENABLE_ASSERTS || SWR_ENABLE_REL_ASSERTS
|
||||
|
||||
#if !SWR_ENABLE_ASSERTS
|
||||
#define SWR_ASSERT(e, ...)
|
||||
#endif
|
||||
|
||||
#if !SWR_ENABLE_REL_ASSERTS
|
||||
#define SWR_REL_ASSERT(e, ...)
|
||||
#endif
|
||||
|
||||
#define SWR_NOT_IMPL SWR_ASSERT(0, "%s not implemented", __FUNCTION__)
|
||||
|
||||
#endif//__SWR_ASSERT_H__
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,500 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file api.h
|
||||
*
|
||||
* @brief API definitions
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __SWR_API_H__
|
||||
#define __SWR_API_H__
|
||||
|
||||
#include "common/os.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <vector>
|
||||
|
||||
#include "common/simdintrin.h"
|
||||
#include "common/formats.h"
|
||||
#include "core/utils.h"
|
||||
#include "core/state.h"
|
||||
|
||||
///@todo place all the API functions into the 'swr' namespace.
|
||||
|
||||
typedef void(SWR_API *PFN_CALLBACK_FUNC)(uint64_t data, uint64_t data2, uint64_t data3);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Function signature for load hot tiles
|
||||
/// @param hPrivateContext - handle to private data
|
||||
/// @param dstFormat - format of the hot tile
|
||||
/// @param renderTargetIndex - render target to store, can be color, depth or stencil
|
||||
/// @param x - destination x coordinate
|
||||
/// @param y - destination y coordinate
|
||||
/// @param pDstHotTile - pointer to the hot tile surface
|
||||
typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, SWR_FORMAT dstFormat,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, BYTE *pDstHotTile);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Function signature for store hot tiles
|
||||
/// @param hPrivateContext - handle to private data
|
||||
/// @param srcFormat - format of the hot tile
|
||||
/// @param renderTargetIndex - render target to store, can be color, depth or stencil
|
||||
/// @param x - destination x coordinate
|
||||
/// @param y - destination y coordinate
|
||||
/// @param pSrcHotTile - pointer to the hot tile surface
|
||||
typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, SWR_FORMAT srcFormat,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, BYTE *pSrcHotTile);
|
||||
|
||||
/// @brief Function signature for clearing from the hot tiles clear value
|
||||
/// @param hPrivateContext - handle to private data
|
||||
/// @param renderTargetIndex - render target to store, can be color, depth or stencil
|
||||
/// @param x - destination x coordinate
|
||||
/// @param y - destination y coordinate
|
||||
/// @param pClearColor - pointer to the hot tile's clear value
|
||||
typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext,
|
||||
SWR_RENDERTARGET_ATTACHMENT rtIndex,
|
||||
uint32_t x, uint32_t y, const float* pClearColor);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// SWR_CREATECONTEXT_INFO
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
struct SWR_CREATECONTEXT_INFO
|
||||
{
|
||||
DRIVER_TYPE driver;
|
||||
|
||||
// External functions (e.g. sampler) need per draw context state.
|
||||
// Use SwrGetPrivateContextState() to access private state.
|
||||
uint32_t privateStateSize;
|
||||
|
||||
// Each SWR context can have multiple sets of active state
|
||||
uint32_t maxSubContexts;
|
||||
|
||||
// tile manipulation functions
|
||||
PFN_LOAD_TILE pfnLoadTile;
|
||||
PFN_STORE_TILE pfnStoreTile;
|
||||
PFN_CLEAR_TILE pfnClearTile;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// SWR_RECT
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
struct SWR_RECT
|
||||
{
|
||||
uint32_t left;
|
||||
uint32_t right;
|
||||
uint32_t top;
|
||||
uint32_t bottom;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Create SWR Context.
|
||||
/// @param pCreateInfo - pointer to creation info.
|
||||
HANDLE SWR_API SwrCreateContext(
|
||||
const SWR_CREATECONTEXT_INFO* pCreateInfo);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Destroys SWR Context.
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
void SWR_API SwrDestroyContext(
|
||||
HANDLE hContext);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set currently active state context
|
||||
/// @param subContextIndex - value from 0 to
|
||||
/// SWR_CREATECONTEXT_INFO.maxSubContexts. Defaults to 0.
|
||||
void SWR_API SwrSetActiveSubContext(
|
||||
HANDLE hContext,
|
||||
uint32_t subContextIndex);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Sync cmd. Executes the callback func when all rendering up to this sync
|
||||
/// has been completed
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pfnFunc - pointer to callback function,
|
||||
/// @param userData - user data to pass back
|
||||
void SWR_API SwrSync(
|
||||
HANDLE hContext,
|
||||
PFN_CALLBACK_FUNC pfnFunc,
|
||||
uint64_t userData,
|
||||
uint64_t userData2,
|
||||
uint64_t userData3 = 0);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Blocks until all rendering has been completed.
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
void SWR_API SwrWaitForIdle(
|
||||
HANDLE hContext);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set vertex buffer state.
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param numBuffers - Number of vertex buffer state descriptors.
|
||||
/// @param pVertexBuffers - Array of vertex buffer state descriptors.
|
||||
void SWR_API SwrSetVertexBuffers(
|
||||
HANDLE hContext,
|
||||
uint32_t numBuffers,
|
||||
const SWR_VERTEX_BUFFER_STATE* pVertexBuffers);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set index buffer
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pIndexBuffer - Index buffer.
|
||||
void SWR_API SwrSetIndexBuffer(
|
||||
HANDLE hContext,
|
||||
const SWR_INDEX_BUFFER_STATE* pIndexBuffer);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set fetch shader pointer.
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pfnFetchFunc - Pointer to shader.
|
||||
void SWR_API SwrSetFetchFunc(
|
||||
HANDLE hContext,
|
||||
PFN_FETCH_FUNC pfnFetchFunc);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set streamout shader pointer.
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pfnSoFunc - Pointer to shader.
|
||||
/// @param streamIndex - specifies stream
|
||||
void SWR_API SwrSetSoFunc(
|
||||
HANDLE hContext,
|
||||
PFN_SO_FUNC pfnSoFunc,
|
||||
uint32_t streamIndex);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set streamout state
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pSoState - Pointer to streamout state.
|
||||
void SWR_API SwrSetSoState(
|
||||
HANDLE hContext,
|
||||
SWR_STREAMOUT_STATE* pSoState);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set streamout buffer state
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pSoBuffer - Pointer to streamout buffer.
|
||||
/// @param slot - Slot to bind SO buffer to.
|
||||
void SWR_API SwrSetSoBuffers(
|
||||
HANDLE hContext,
|
||||
SWR_STREAMOUT_BUFFER* pSoBuffer,
|
||||
uint32_t slot);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set vertex shader pointer.
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pfnVertexFunc - Pointer to shader.
|
||||
void SWR_API SwrSetVertexFunc(
|
||||
HANDLE hContext,
|
||||
PFN_VERTEX_FUNC pfnVertexFunc);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set frontend state.
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pState - Pointer to state
|
||||
void SWR_API SwrSetFrontendState(
|
||||
HANDLE hContext,
|
||||
SWR_FRONTEND_STATE *pState);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set geometry shader state.
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pState - Pointer to state
|
||||
void SWR_API SwrSetGsState(
|
||||
HANDLE hContext,
|
||||
SWR_GS_STATE *pState);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set geometry shader
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pState - Pointer to geometry shader function
|
||||
void SWR_API SwrSetGsFunc(
|
||||
HANDLE hContext,
|
||||
PFN_GS_FUNC pfnGsFunc);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set compute shader
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pState - Pointer to compute shader function
|
||||
/// @param totalThreadsInGroup - product of thread group dimensions.
|
||||
void SWR_API SwrSetCsFunc(
|
||||
HANDLE hContext,
|
||||
PFN_CS_FUNC pfnCsFunc,
|
||||
uint32_t totalThreadsInGroup);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set tessellation state.
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pState - Pointer to state
|
||||
void SWR_API SwrSetTsState(
|
||||
HANDLE hContext,
|
||||
SWR_TS_STATE *pState);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set hull shader
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pfnFunc - Pointer to shader function
|
||||
void SWR_API SwrSetHsFunc(
|
||||
HANDLE hContext,
|
||||
PFN_HS_FUNC pfnFunc);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set domain shader
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pfnFunc - Pointer to shader function
|
||||
void SWR_API SwrSetDsFunc(
|
||||
HANDLE hContext,
|
||||
PFN_DS_FUNC pfnFunc);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set depth stencil state
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pState - Pointer to state.
|
||||
void SWR_API SwrSetDepthStencilState(
|
||||
HANDLE hContext,
|
||||
SWR_DEPTH_STENCIL_STATE *pState);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set backend state
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pState - Pointer to state.
|
||||
void SWR_API SwrSetBackendState(
|
||||
HANDLE hContext,
|
||||
SWR_BACKEND_STATE *pState);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set pixel shader state
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pState - Pointer to state.
|
||||
void SWR_API SwrSetPixelShaderState(
|
||||
HANDLE hContext,
|
||||
SWR_PS_STATE *pState);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set blend state
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pState - Pointer to state.
|
||||
void SWR_API SwrSetBlendState(
|
||||
HANDLE hContext,
|
||||
SWR_BLEND_STATE *pState);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set blend function
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param renderTarget - render target index
|
||||
/// @param pfnBlendFunc - function pointer
|
||||
void SWR_API SwrSetBlendFunc(
|
||||
HANDLE hContext,
|
||||
uint32_t renderTarget,
|
||||
PFN_BLEND_JIT_FUNC pfnBlendFunc);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Set linkage mask
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param mask - Specifies which vertex outputs are are needed by PS.
|
||||
/// @param pMap - (Optional)Linkage map to specify where FE attributes are
|
||||
/// gathered from to supply PS attribute values. The length
|
||||
/// of the map buffer needs to match the number of set bits
|
||||
/// in "mask".
|
||||
void SWR_API SwrSetLinkage(
|
||||
HANDLE hContext,
|
||||
uint32_t mask,
|
||||
const uint8_t* pMap);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief SwrDraw
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param topology - Specifies topology for draw.
|
||||
/// @param startVertex - Specifies start vertex in vertex buffer for draw.
|
||||
/// @param primCount - Number of vertices.
|
||||
void SWR_API SwrDraw(
|
||||
HANDLE hContext,
|
||||
PRIMITIVE_TOPOLOGY topology,
|
||||
uint32_t startVertex,
|
||||
uint32_t primCount);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief SwrDrawInstanced
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param topology - Specifies topology for draw.
|
||||
/// @param numVertsPerInstance - How many vertices to read sequentially from vertex data.
|
||||
/// @param numInstances - How many instances to render.
|
||||
/// @param startVertex - Specifies start vertex for draw. (vertex data)
|
||||
/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
|
||||
void SWR_API SwrDrawInstanced(
|
||||
HANDLE hContext,
|
||||
PRIMITIVE_TOPOLOGY topology,
|
||||
uint32_t numVertsPerInstance,
|
||||
uint32_t numInstances,
|
||||
uint32_t startVertex,
|
||||
uint32_t startInstance);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief DrawIndexed
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param topology - Specifies topology for draw.
|
||||
/// @param numIndices - Number of indices to read sequentially from index buffer.
|
||||
/// @param indexOffset - Starting index into index buffer.
|
||||
/// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
|
||||
void SWR_API SwrDrawIndexed(
|
||||
HANDLE hContext,
|
||||
PRIMITIVE_TOPOLOGY topology,
|
||||
uint32_t numIndices,
|
||||
uint32_t indexOffset,
|
||||
int32_t baseVertex);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief SwrDrawIndexedInstanced
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param topology - Specifies topology for draw.
|
||||
/// @param numIndices - Number of indices to read sequentially from index buffer.
|
||||
/// @param numInstances - Number of instances to render.
|
||||
/// @param indexOffset - Starting index into index buffer.
|
||||
/// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
|
||||
/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
|
||||
void SWR_API SwrDrawIndexedInstanced(
|
||||
HANDLE hContext,
|
||||
PRIMITIVE_TOPOLOGY topology,
|
||||
uint32_t numIndices,
|
||||
uint32_t numInstances,
|
||||
uint32_t indexOffset,
|
||||
int32_t baseVertex,
|
||||
uint32_t startInstance);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief SwrInvalidateTiles
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
|
||||
void SWR_API SwrInvalidateTiles(
|
||||
HANDLE hContext,
|
||||
uint32_t attachmentMask);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief SwrDispatch
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param threadGroupCountX - Number of thread groups dispatched in X direction
|
||||
/// @param threadGroupCountY - Number of thread groups dispatched in Y direction
|
||||
/// @param threadGroupCountZ - Number of thread groups dispatched in Z direction
|
||||
void SWR_API SwrDispatch(
|
||||
HANDLE hContext,
|
||||
uint32_t threadGroupCountX,
|
||||
uint32_t threadGroupCountY,
|
||||
uint32_t threadGroupCountZ);
|
||||
|
||||
|
||||
enum SWR_TILE_STATE
|
||||
{
|
||||
SWR_TILE_INVALID = 0, // tile is in unitialized state and should be loaded with surface contents before rendering
|
||||
SWR_TILE_DIRTY = 2, // tile contains newer data than surface it represents
|
||||
SWR_TILE_RESOLVED = 3, // is in sync with surface it represents
|
||||
};
|
||||
|
||||
/// @todo Add a good description for what attachments are and when and why you would use the different SWR_TILE_STATEs.
|
||||
void SWR_API SwrStoreTiles(
|
||||
HANDLE hContext,
|
||||
SWR_RENDERTARGET_ATTACHMENT attachment,
|
||||
SWR_TILE_STATE postStoreTileState);
|
||||
|
||||
void SWR_API SwrClearRenderTarget(
|
||||
HANDLE hContext,
|
||||
uint32_t clearMask,
|
||||
const FLOAT clearColor[4],
|
||||
float z,
|
||||
BYTE stencil);
|
||||
|
||||
void SWR_API SwrSetRastState(
|
||||
HANDLE hContext,
|
||||
const SWR_RASTSTATE *pRastState);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief SwrSetViewports
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param numViewports - number of viewports passed in
|
||||
/// @param pViewports - Specifies extents of viewport.
|
||||
/// @param pMatrices - If not specified then SWR computes a default one.
|
||||
void SWR_API SwrSetViewports(
|
||||
HANDLE hContext,
|
||||
uint32_t numViewports,
|
||||
const SWR_VIEWPORT* pViewports,
|
||||
const SWR_VIEWPORT_MATRIX* pMatrices);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief SwrSetScissorRects
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param numScissors - number of scissors passed in
|
||||
/// @param pScissors - array of scissors
|
||||
void SWR_API SwrSetScissorRects(
|
||||
HANDLE hContext,
|
||||
uint32_t numScissors,
|
||||
const BBOX* pScissors);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Returns a pointer to the private context state for the current
|
||||
/// draw operation. This is used for external componets such as the
|
||||
/// sampler.
|
||||
///
|
||||
/// @note Client needs to resend private state prior to each draw call.
|
||||
/// Also, SWR is responsible for the private state memory.
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
VOID* SWR_API SwrGetPrivateContextState(
|
||||
HANDLE hContext);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Clients can use this to allocate memory for draw/dispatch
|
||||
/// operations. The memory will automatically be freed once operation
|
||||
/// has completed. Client can use this to allocate binding tables,
|
||||
/// etc. needed for shader execution.
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param size - Size of allocation
|
||||
/// @param align - Alignment needed for allocation.
|
||||
VOID* SWR_API SwrAllocDrawContextMemory(
|
||||
HANDLE hContext,
|
||||
uint32_t size,
|
||||
uint32_t align);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Returns pointer to SWR stats.
|
||||
/// @note The counters are incremented by multiple threads.
|
||||
/// When calling this, you need to ensure all previous operations
|
||||
/// have completed.
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param pStats - SWR will fill this out for caller.
|
||||
void SWR_API SwrGetStats(
|
||||
HANDLE hContext,
|
||||
SWR_STATS* pStats);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Enables stats counting
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
/// @param enable - If true then counts are incremented.
|
||||
void SWR_API SwrEnableStats(
|
||||
HANDLE hContext,
|
||||
bool enable);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Mark end of frame - used for performance profiling
|
||||
/// @param hContext - Handle passed back from SwrCreateContext
|
||||
void SWR_API SwrEndFrame(
|
||||
HANDLE hContext);
|
||||
#endif//__SWR_API_H__
|
|
@ -0,0 +1,166 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file arena.cpp
|
||||
*
|
||||
* @brief Arena memory manager
|
||||
* The arena is convenient and fast for managing allocations for any of
|
||||
* our allocations that are associated with operations and can all be freed
|
||||
* once when their operation has completed. Allocations are cheap since
|
||||
* most of the time its simply an increment of an offset. Also, no need to
|
||||
* free individual allocations. All of the arena memory can be freed at once.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "context.h"
|
||||
#include "arena.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
Arena::Arena()
|
||||
: m_pCurBlock(nullptr), m_size(0)
|
||||
{
|
||||
m_pMutex = new std::mutex();
|
||||
}
|
||||
|
||||
Arena::~Arena()
|
||||
{
|
||||
Reset(); // Reset just in case to avoid leaking memory.
|
||||
|
||||
if (m_pCurBlock)
|
||||
{
|
||||
_aligned_free(m_pCurBlock->pMem);
|
||||
delete m_pCurBlock;
|
||||
}
|
||||
|
||||
delete m_pMutex;
|
||||
}
|
||||
|
||||
///@todo Remove this when all users have stopped using this.
|
||||
void Arena::Init()
|
||||
{
|
||||
m_size = 0;
|
||||
m_pCurBlock = nullptr;
|
||||
|
||||
m_pMutex = new std::mutex();
|
||||
}
|
||||
|
||||
void* Arena::AllocAligned(size_t size, size_t align)
|
||||
{
|
||||
if (m_pCurBlock)
|
||||
{
|
||||
ArenaBlock* pCurBlock = m_pCurBlock;
|
||||
pCurBlock->offset = AlignUp(pCurBlock->offset, align);
|
||||
|
||||
if ((pCurBlock->offset + size) <= pCurBlock->blockSize)
|
||||
{
|
||||
void* pMem = PtrAdd(pCurBlock->pMem, pCurBlock->offset);
|
||||
pCurBlock->offset += size;
|
||||
m_size += size;
|
||||
return pMem;
|
||||
}
|
||||
|
||||
// Not enough memory in this block, fall through to allocate
|
||||
// a new block
|
||||
}
|
||||
|
||||
static const size_t ArenaBlockSize = 1024*1024;
|
||||
size_t blockSize = std::max(m_size + ArenaBlockSize, std::max(size, ArenaBlockSize));
|
||||
blockSize = AlignUp(blockSize, KNOB_SIMD_WIDTH*4);
|
||||
|
||||
void *pMem = _aligned_malloc(blockSize, KNOB_SIMD_WIDTH*4); // Arena blocks are always simd byte aligned.
|
||||
SWR_ASSERT(pMem != nullptr);
|
||||
|
||||
ArenaBlock* pNewBlock = new (std::nothrow) ArenaBlock();
|
||||
SWR_ASSERT(pNewBlock != nullptr);
|
||||
|
||||
if (pNewBlock != nullptr)
|
||||
{
|
||||
pNewBlock->pNext = m_pCurBlock;
|
||||
|
||||
m_pCurBlock = pNewBlock;
|
||||
m_pCurBlock->pMem = pMem;
|
||||
m_pCurBlock->blockSize = blockSize;
|
||||
|
||||
}
|
||||
|
||||
return AllocAligned(size, align);
|
||||
}
|
||||
|
||||
void* Arena::Alloc(size_t size)
|
||||
{
|
||||
return AllocAligned(size, 1);
|
||||
}
|
||||
|
||||
void* Arena::AllocAlignedSync(size_t size, size_t align)
|
||||
{
|
||||
void* pAlloc = nullptr;
|
||||
|
||||
SWR_ASSERT(m_pMutex != nullptr);
|
||||
|
||||
m_pMutex->lock();
|
||||
pAlloc = AllocAligned(size, align);
|
||||
m_pMutex->unlock();
|
||||
|
||||
return pAlloc;
|
||||
}
|
||||
|
||||
void* Arena::AllocSync(size_t size)
|
||||
{
|
||||
void* pAlloc = nullptr;
|
||||
|
||||
SWR_ASSERT(m_pMutex != nullptr);
|
||||
|
||||
m_pMutex->lock();
|
||||
pAlloc = Alloc(size);
|
||||
m_pMutex->unlock();
|
||||
|
||||
return pAlloc;
|
||||
}
|
||||
|
||||
void Arena::Reset(bool removeAll)
|
||||
{
|
||||
if (m_pCurBlock)
|
||||
{
|
||||
m_pCurBlock->offset = 0;
|
||||
|
||||
ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
|
||||
m_pCurBlock->pNext = nullptr;
|
||||
while(pUsedBlocks)
|
||||
{
|
||||
ArenaBlock* pBlock = pUsedBlocks;
|
||||
pUsedBlocks = pBlock->pNext;
|
||||
|
||||
_aligned_free(pBlock->pMem);
|
||||
delete pBlock;
|
||||
}
|
||||
|
||||
if (removeAll)
|
||||
{
|
||||
_aligned_free(m_pCurBlock->pMem);
|
||||
delete m_pCurBlock;
|
||||
m_pCurBlock = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
m_size = 0;
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file arena.h
|
||||
*
|
||||
* @brief Arena memory manager
|
||||
* The arena is convenient and fast for managing allocations for any of
|
||||
* our allocations that are associated with operations and can all be freed
|
||||
* once when their operation has completed. Allocations are cheap since
|
||||
* most of the time its simply an increment of an offset. Also, no need to
|
||||
* free individual allocations. All of the arena memory can be freed at once.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
|
||||
class Arena
|
||||
{
|
||||
public:
|
||||
Arena();
|
||||
~Arena();
|
||||
|
||||
void Init();
|
||||
|
||||
void* AllocAligned(size_t size, size_t align);
|
||||
void* Alloc(size_t size);
|
||||
|
||||
void* AllocAlignedSync(size_t size, size_t align);
|
||||
void* AllocSync(size_t size);
|
||||
|
||||
void Reset(bool removeAll = false);
|
||||
size_t Size() { return m_size; }
|
||||
|
||||
private:
|
||||
|
||||
struct ArenaBlock
|
||||
{
|
||||
void* pMem = nullptr;
|
||||
size_t blockSize = 0;
|
||||
size_t offset = 0;
|
||||
ArenaBlock* pNext = nullptr;
|
||||
};
|
||||
|
||||
ArenaBlock* m_pCurBlock = nullptr;
|
||||
size_t m_size = 0;
|
||||
|
||||
/// @note Mutex is only used by sync allocation functions.
|
||||
std::mutex* m_pMutex;
|
||||
};
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,59 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file backend.h
|
||||
*
|
||||
* @brief Backend handles rasterization, pixel shading and output merger
|
||||
* operations.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include "common/os.h"
|
||||
#include "core/context.h"
|
||||
|
||||
void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId);
|
||||
void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
|
||||
void ProcessQueryStatsBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
|
||||
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
|
||||
void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
||||
void ProcessInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
||||
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers);
|
||||
void InitClearTilesTable();
|
||||
|
||||
enum SWR_BACKEND_FUNCS
|
||||
{
|
||||
SWR_BACKEND_SINGLE_SAMPLE,
|
||||
SWR_BACKEND_MSAA_PIXEL_RATE,
|
||||
SWR_BACKEND_MSAA_SAMPLE_RATE,
|
||||
SWR_BACKEND_FUNCS_MAX,
|
||||
};
|
||||
void InitBackendFuncTables();
|
||||
|
||||
extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_MAX];
|
||||
extern PFN_BACKEND_FUNC gBackendSingleSample[2][2];
|
||||
extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_MSAA_SAMPLE_PATTERN_MAX][SWR_INPUT_COVERAGE_MAX][2][2];
|
||||
extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_INPUT_COVERAGE_MAX][2];
|
||||
extern PFN_OUTPUT_MERGER gBackendOutputMergerTable[SWR_NUM_RENDERTARGETS+1][SWR_MULTISAMPLE_TYPE_MAX];
|
||||
extern PFN_CALC_PIXEL_BARYCENTRICS gPixelBarycentricTable[2];
|
||||
extern PFN_CALC_SAMPLE_BARYCENTRICS gSampleBarycentricTable[2];
|
||||
extern PFN_CALC_CENTROID_BARYCENTRICS gCentroidBarycentricTable[SWR_MULTISAMPLE_TYPE_MAX][2][2][2];
|
|
@ -0,0 +1,318 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file blend.cpp
|
||||
*
|
||||
* @brief Implementation for blending operations.
|
||||
*
|
||||
******************************************************************************/
|
||||
#include "state.h"
|
||||
|
||||
template<bool Color, bool Alpha>
|
||||
INLINE
|
||||
void GenerateBlendFactor(SWR_BLEND_FACTOR func, simdvector &constantColor, simdvector &src, simdvector &src1, simdvector &dst, simdvector &out)
|
||||
{
|
||||
simdvector result;
|
||||
|
||||
switch (func)
|
||||
{
|
||||
case BLENDFACTOR_ZERO:
|
||||
result.x = _simd_setzero_ps();
|
||||
result.y = _simd_setzero_ps();
|
||||
result.z = _simd_setzero_ps();
|
||||
result.w = _simd_setzero_ps();
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_ONE:
|
||||
result.x = _simd_set1_ps(1.0);
|
||||
result.y = _simd_set1_ps(1.0);
|
||||
result.z = _simd_set1_ps(1.0);
|
||||
result.w = _simd_set1_ps(1.0);
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_SRC_COLOR:
|
||||
result = src;
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_DST_COLOR:
|
||||
result = dst;
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_INV_SRC_COLOR:
|
||||
result.x = _simd_sub_ps(_simd_set1_ps(1.0), src.x);
|
||||
result.y = _simd_sub_ps(_simd_set1_ps(1.0), src.y);
|
||||
result.z = _simd_sub_ps(_simd_set1_ps(1.0), src.z);
|
||||
result.w = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_INV_DST_COLOR:
|
||||
result.x = _simd_sub_ps(_simd_set1_ps(1.0), dst.x);
|
||||
result.y = _simd_sub_ps(_simd_set1_ps(1.0), dst.y);
|
||||
result.z = _simd_sub_ps(_simd_set1_ps(1.0), dst.z);
|
||||
result.w = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_SRC_ALPHA: result.x = src.w;
|
||||
result.y = src.w;
|
||||
result.z = src.w;
|
||||
result.w = src.w;
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_INV_SRC_ALPHA:
|
||||
{
|
||||
simdscalar oneMinusSrcA = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
|
||||
result.x = oneMinusSrcA;
|
||||
result.y = oneMinusSrcA;
|
||||
result.z = oneMinusSrcA;
|
||||
result.w = oneMinusSrcA;
|
||||
break;
|
||||
}
|
||||
|
||||
case BLENDFACTOR_DST_ALPHA: result.x = dst.w;
|
||||
result.y = dst.w;
|
||||
result.z = dst.w;
|
||||
result.w = dst.w;
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_INV_DST_ALPHA:
|
||||
{
|
||||
simdscalar oneMinusDstA = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
|
||||
result.x = oneMinusDstA;
|
||||
result.y = oneMinusDstA;
|
||||
result.z = oneMinusDstA;
|
||||
result.w = oneMinusDstA;
|
||||
break;
|
||||
}
|
||||
|
||||
case BLENDFACTOR_SRC_ALPHA_SATURATE:
|
||||
{
|
||||
simdscalar sat = _simd_min_ps(src.w, _simd_sub_ps(_simd_set1_ps(1.0), dst.w));
|
||||
result.x = sat;
|
||||
result.y = sat;
|
||||
result.z = sat;
|
||||
result.w = _simd_set1_ps(1.0);
|
||||
break;
|
||||
}
|
||||
|
||||
case BLENDFACTOR_CONST_COLOR:
|
||||
result.x = constantColor[0];
|
||||
result.y = constantColor[1];
|
||||
result.z = constantColor[2];
|
||||
result.w = constantColor[3];
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_CONST_ALPHA:
|
||||
result.x = result.y = result.z = result.w = constantColor[3];
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_INV_CONST_COLOR:
|
||||
{
|
||||
result.x = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[0]);
|
||||
result.y = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[1]);
|
||||
result.z = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[2]);
|
||||
result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
|
||||
break;
|
||||
}
|
||||
|
||||
case BLENDFACTOR_INV_CONST_ALPHA:
|
||||
{
|
||||
result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
|
||||
break;
|
||||
}
|
||||
|
||||
case BLENDFACTOR_SRC1_COLOR:
|
||||
result.x = src1.x;
|
||||
result.y = src1.y;
|
||||
result.z = src1.z;
|
||||
result.w = src1.w;
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_SRC1_ALPHA:
|
||||
result.x = result.y = result.z = result.w = src1.w;
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_INV_SRC1_COLOR:
|
||||
result.x = _simd_sub_ps(_simd_set1_ps(1.0f), src1.x);
|
||||
result.y = _simd_sub_ps(_simd_set1_ps(1.0f), src1.y);
|
||||
result.z = _simd_sub_ps(_simd_set1_ps(1.0f), src1.z);
|
||||
result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
|
||||
break;
|
||||
|
||||
case BLENDFACTOR_INV_SRC1_ALPHA:
|
||||
result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
|
||||
break;
|
||||
|
||||
default: SWR_ASSERT(false, "Unimplemented blend factor: %d", func);
|
||||
}
|
||||
|
||||
if (Color)
|
||||
{
|
||||
out.x = result.x;
|
||||
out.y = result.y;
|
||||
out.z = result.z;
|
||||
}
|
||||
if (Alpha)
|
||||
{
|
||||
out.w = result.w;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<bool Color, bool Alpha>
|
||||
INLINE void BlendFunc(SWR_BLEND_OP blendOp, simdvector &src, simdvector &srcFactor, simdvector &dst, simdvector &dstFactor, simdvector &out)
|
||||
{
|
||||
simdvector result;
|
||||
|
||||
switch (blendOp)
|
||||
{
|
||||
case BLENDOP_ADD:
|
||||
result.x = _simd_fmadd_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
|
||||
result.y = _simd_fmadd_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
|
||||
result.z = _simd_fmadd_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
|
||||
result.w = _simd_fmadd_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
|
||||
break;
|
||||
|
||||
case BLENDOP_SUBTRACT:
|
||||
result.x = _simd_fmsub_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
|
||||
result.y = _simd_fmsub_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
|
||||
result.z = _simd_fmsub_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
|
||||
result.w = _simd_fmsub_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
|
||||
break;
|
||||
|
||||
case BLENDOP_REVSUBTRACT:
|
||||
result.x = _simd_fmsub_ps(dstFactor.x, dst.x, _simd_mul_ps(srcFactor.x, src.x));
|
||||
result.y = _simd_fmsub_ps(dstFactor.y, dst.y, _simd_mul_ps(srcFactor.y, src.y));
|
||||
result.z = _simd_fmsub_ps(dstFactor.z, dst.z, _simd_mul_ps(srcFactor.z, src.z));
|
||||
result.w = _simd_fmsub_ps(dstFactor.w, dst.w, _simd_mul_ps(srcFactor.w, src.w));
|
||||
break;
|
||||
|
||||
case BLENDOP_MIN:
|
||||
result.x = _simd_min_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
|
||||
result.y = _simd_min_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
|
||||
result.z = _simd_min_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
|
||||
result.w = _simd_min_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
|
||||
break;
|
||||
|
||||
case BLENDOP_MAX:
|
||||
result.x = _simd_max_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
|
||||
result.y = _simd_max_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
|
||||
result.z = _simd_max_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
|
||||
result.w = _simd_max_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
|
||||
break;
|
||||
|
||||
default:
|
||||
SWR_ASSERT(false, "Unimplemented blend function: %d", blendOp);
|
||||
}
|
||||
|
||||
if (Color)
|
||||
{
|
||||
out.x = result.x;
|
||||
out.y = result.y;
|
||||
out.z = result.z;
|
||||
}
|
||||
if (Alpha)
|
||||
{
|
||||
out.w = result.w;
|
||||
}
|
||||
}
|
||||
|
||||
template<SWR_TYPE type>
|
||||
INLINE void Clamp(simdvector &src)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case SWR_TYPE_FLOAT:
|
||||
break;
|
||||
|
||||
case SWR_TYPE_UNORM:
|
||||
src.x = _simd_max_ps(src.x, _simd_setzero_ps());
|
||||
src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
|
||||
|
||||
src.y = _simd_max_ps(src.y, _simd_setzero_ps());
|
||||
src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
|
||||
|
||||
src.z = _simd_max_ps(src.z, _simd_setzero_ps());
|
||||
src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
|
||||
|
||||
src.w = _simd_max_ps(src.w, _simd_setzero_ps());
|
||||
src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
|
||||
break;
|
||||
|
||||
case SWR_TYPE_SNORM:
|
||||
src.x = _simd_max_ps(src.x, _simd_set1_ps(-1.0f));
|
||||
src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
|
||||
|
||||
src.y = _simd_max_ps(src.y, _simd_set1_ps(-1.0f));
|
||||
src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
|
||||
|
||||
src.z = _simd_max_ps(src.z, _simd_set1_ps(-1.0f));
|
||||
src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
|
||||
|
||||
src.w = _simd_max_ps(src.w, _simd_set1_ps(-1.0f));
|
||||
src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
|
||||
break;
|
||||
|
||||
default:
|
||||
SWR_ASSERT(false, "Unimplemented clamp: %d", type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template<SWR_TYPE type>
|
||||
void Blend(const SWR_BLEND_STATE *pBlendState, const SWR_RENDER_TARGET_BLEND_STATE *pState, simdvector &src, simdvector& src1, BYTE *pDst, simdvector &result)
|
||||
{
|
||||
// load render target
|
||||
simdvector dst;
|
||||
LoadSOA<KNOB_COLOR_HOT_TILE_FORMAT>(pDst, dst);
|
||||
|
||||
simdvector constColor;
|
||||
constColor.x = _simd_broadcast_ss(&pBlendState->constantColor[0]);
|
||||
constColor.y = _simd_broadcast_ss(&pBlendState->constantColor[1]);
|
||||
constColor.z = _simd_broadcast_ss(&pBlendState->constantColor[2]);
|
||||
constColor.w = _simd_broadcast_ss(&pBlendState->constantColor[3]);
|
||||
|
||||
// clamp src/dst/constant
|
||||
Clamp<type>(src);
|
||||
Clamp<type>(src1);
|
||||
Clamp<type>(dst);
|
||||
Clamp<type>(constColor);
|
||||
|
||||
simdvector srcFactor, dstFactor;
|
||||
if (pBlendState->independentAlphaBlendEnable)
|
||||
{
|
||||
GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
|
||||
GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->sourceAlphaBlendFactor, constColor, src, src1, dst, srcFactor);
|
||||
|
||||
GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
|
||||
GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->destAlphaBlendFactor, constColor, src, src1, dst, dstFactor);
|
||||
|
||||
BlendFunc<true, false>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
|
||||
BlendFunc<false, true>((SWR_BLEND_OP)pState->alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
|
||||
}
|
||||
else
|
||||
{
|
||||
GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
|
||||
GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
|
||||
|
||||
BlendFunc<true, true>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,201 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file clip.cpp
|
||||
*
|
||||
* @brief Implementation for clipping
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "common/os.h"
|
||||
#include "core/clip.h"
|
||||
|
||||
float ComputeInterpFactor(float boundaryCoord0, float boundaryCoord1)
|
||||
{
|
||||
return (boundaryCoord0 / (boundaryCoord0 - boundaryCoord1));
|
||||
}
|
||||
|
||||
template<SWR_CLIPCODES ClippingPlane>
|
||||
inline void intersect(
|
||||
int s, // index to first edge vertex v0 in pInPts.
|
||||
int p, // index to second edge vertex v1 in pInPts.
|
||||
const float *pInPts, // array of all the input positions.
|
||||
const float *pInAttribs, // array of all attributes for all vertex. All the attributes for each vertex is contiguous.
|
||||
int numInAttribs, // number of attributes per vertex.
|
||||
int i, // output index.
|
||||
float *pOutPts, // array of output positions. We'll write our new intersection point at i*4.
|
||||
float *pOutAttribs) // array of output attributes. We'll write our new attributes at i*numInAttribs.
|
||||
{
|
||||
float t;
|
||||
|
||||
// Find the parameter of the intersection.
|
||||
// t = (v1.w - v1.x) / ((v2.x - v1.x) - (v2.w - v1.w)) for x = w (RIGHT) plane, etc.
|
||||
const float *v1 = &pInPts[s*4];
|
||||
const float *v2 = &pInPts[p*4];
|
||||
|
||||
switch (ClippingPlane)
|
||||
{
|
||||
case FRUSTUM_LEFT: t = ComputeInterpFactor(v1[3] + v1[0], v2[3] + v2[0]); break;
|
||||
case FRUSTUM_RIGHT: t = ComputeInterpFactor(v1[3] - v1[0], v2[3] - v2[0]); break;
|
||||
case FRUSTUM_TOP: t = ComputeInterpFactor(v1[3] + v1[1], v2[3] + v2[1]); break;
|
||||
case FRUSTUM_BOTTOM: t = ComputeInterpFactor(v1[3] - v1[1], v2[3] - v2[1]); break;
|
||||
case FRUSTUM_NEAR: t = ComputeInterpFactor(v1[2], v2[2]); break;
|
||||
case FRUSTUM_FAR: t = ComputeInterpFactor(v1[3] - v1[2], v2[3] - v2[2]); break;
|
||||
default: SWR_ASSERT(false, "invalid clipping plane: %d", ClippingPlane);
|
||||
};
|
||||
|
||||
|
||||
const float *a1 = &pInAttribs[s*numInAttribs];
|
||||
const float *a2 = &pInAttribs[p*numInAttribs];
|
||||
|
||||
float *pOutP = &pOutPts[i*4];
|
||||
float *pOutA = &pOutAttribs[i*numInAttribs];
|
||||
|
||||
// Interpolate new position.
|
||||
for(int j = 0; j < 4; ++j)
|
||||
{
|
||||
pOutP[j] = v1[j] + (v2[j]-v1[j])*t;
|
||||
}
|
||||
|
||||
// Interpolate Attributes
|
||||
for(int attr = 0; attr < numInAttribs; ++attr)
|
||||
{
|
||||
pOutA[attr] = a1[attr] + (a2[attr]-a1[attr])*t;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Checks whether vertex v lies inside clipping plane
|
||||
// in homogenous coords check -w < {x,y,z} < w;
|
||||
//
|
||||
template<SWR_CLIPCODES ClippingPlane>
|
||||
inline int inside(const float v[4])
|
||||
{
|
||||
switch (ClippingPlane)
|
||||
{
|
||||
case FRUSTUM_LEFT : return (v[0]>=-v[3]);
|
||||
case FRUSTUM_RIGHT : return (v[0]<= v[3]);
|
||||
case FRUSTUM_TOP : return (v[1]>=-v[3]);
|
||||
case FRUSTUM_BOTTOM : return (v[1]<= v[3]);
|
||||
case FRUSTUM_NEAR : return (v[2]>=0.0f);
|
||||
case FRUSTUM_FAR : return (v[2]<= v[3]);
|
||||
default:
|
||||
SWR_ASSERT(false, "invalid clipping plane: %d", ClippingPlane);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Clips a polygon in homogenous coordinates to a particular clipping plane.
|
||||
// Takes in vertices of the polygon (InPts) and the clipping plane
|
||||
// Puts the vertices of the clipped polygon in OutPts
|
||||
// Returns number of points in clipped polygon
|
||||
//
|
||||
template<SWR_CLIPCODES ClippingPlane>
|
||||
int ClipTriToPlane( const float *pInPts, int numInPts,
|
||||
const float *pInAttribs, int numInAttribs,
|
||||
float *pOutPts, float *pOutAttribs)
|
||||
{
|
||||
int i=0; // index number of OutPts, # of vertices in OutPts = i div 4;
|
||||
|
||||
for (int j = 0; j < numInPts; ++j)
|
||||
{
|
||||
int s = j;
|
||||
int p = (j + 1) % numInPts;
|
||||
|
||||
int s_in = inside<ClippingPlane>(&pInPts[s*4]);
|
||||
int p_in = inside<ClippingPlane>(&pInPts[p*4]);
|
||||
|
||||
// test if vertex is to be added to output vertices
|
||||
if (s_in != p_in) // edge crosses clipping plane
|
||||
{
|
||||
// find point of intersection
|
||||
intersect<ClippingPlane>(s, p, pInPts, pInAttribs, numInAttribs, i, pOutPts, pOutAttribs);
|
||||
i++;
|
||||
}
|
||||
if (p_in) // 2nd vertex is inside clipping volume, add it to output
|
||||
{
|
||||
// Copy 2nd vertex position of edge over to output.
|
||||
for(int k = 0; k < 4; ++k)
|
||||
{
|
||||
pOutPts[i*4 + k] = pInPts[p*4 + k];
|
||||
}
|
||||
// Copy 2nd vertex attributes of edge over to output.
|
||||
for(int attr = 0; attr < numInAttribs; ++attr)
|
||||
{
|
||||
pOutAttribs[i*numInAttribs+attr] = pInAttribs[p*numInAttribs+attr];
|
||||
}
|
||||
i++;
|
||||
}
|
||||
// edge does not cross clipping plane and vertex outside clipping volume
|
||||
// => do not add vertex
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Clip(const float *pTriangle, const float *pAttribs, int numAttribs, float *pOutTriangles, int *numVerts, float *pOutAttribs)
|
||||
{
|
||||
// temp storage to hold at least 6 sets of vertices, the max number that can be created during clipping
|
||||
OSALIGN(float, 16) tempPts[6 * 4];
|
||||
OSALIGN(float, 16) tempAttribs[6 * KNOB_NUM_ATTRIBUTES * 4];
|
||||
|
||||
// we opt to clip to viewport frustum to produce smaller triangles for rasterization precision
|
||||
int NumOutPts = ClipTriToPlane<FRUSTUM_NEAR>(pTriangle, 3, pAttribs, numAttribs, tempPts, tempAttribs);
|
||||
NumOutPts = ClipTriToPlane<FRUSTUM_FAR>(tempPts, NumOutPts, tempAttribs, numAttribs, pOutTriangles, pOutAttribs);
|
||||
NumOutPts = ClipTriToPlane<FRUSTUM_LEFT>(pOutTriangles, NumOutPts, pOutAttribs, numAttribs, tempPts, tempAttribs);
|
||||
NumOutPts = ClipTriToPlane<FRUSTUM_RIGHT>(tempPts, NumOutPts, tempAttribs, numAttribs, pOutTriangles, pOutAttribs);
|
||||
NumOutPts = ClipTriToPlane<FRUSTUM_BOTTOM>(pOutTriangles, NumOutPts, pOutAttribs, numAttribs, tempPts, tempAttribs);
|
||||
NumOutPts = ClipTriToPlane<FRUSTUM_TOP>(tempPts, NumOutPts, tempAttribs, numAttribs, pOutTriangles, pOutAttribs);
|
||||
|
||||
SWR_ASSERT(NumOutPts <= 6);
|
||||
|
||||
*numVerts = NumOutPts;
|
||||
return;
|
||||
}
|
||||
|
||||
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
|
||||
{
|
||||
RDTSC_START(FEClipTriangles);
|
||||
Clipper<3> clipper(workerId, pDC);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId);
|
||||
RDTSC_STOP(FEClipTriangles, 1, 0);
|
||||
}
|
||||
|
||||
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
|
||||
{
|
||||
RDTSC_START(FEClipLines);
|
||||
Clipper<2> clipper(workerId, pDC);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId);
|
||||
RDTSC_STOP(FEClipLines, 1, 0);
|
||||
}
|
||||
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId)
|
||||
{
|
||||
RDTSC_START(FEClipPoints);
|
||||
Clipper<1> clipper(workerId, pDC);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId);
|
||||
RDTSC_STOP(FEClipPoints, 1, 0);
|
||||
}
|
||||
|
|
@ -0,0 +1,868 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file clip.h
|
||||
*
|
||||
* @brief Definitions for clipping
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include "common/simdintrin.h"
|
||||
#include "core/context.h"
|
||||
#include "core/pa.h"
|
||||
#include "rdtsc_core.h"
|
||||
|
||||
enum SWR_CLIPCODES
|
||||
{
|
||||
// Shift clip codes out of the mantissa to prevent denormalized values when used in float compare.
|
||||
// Guardband is able to use a single high-bit with 4 separate LSBs, because it computes a union, rather than intersection, of clipcodes.
|
||||
#define CLIPCODE_SHIFT 23
|
||||
FRUSTUM_LEFT = (0x01 << CLIPCODE_SHIFT),
|
||||
FRUSTUM_TOP = (0x02 << CLIPCODE_SHIFT),
|
||||
FRUSTUM_RIGHT = (0x04 << CLIPCODE_SHIFT),
|
||||
FRUSTUM_BOTTOM = (0x08 << CLIPCODE_SHIFT),
|
||||
|
||||
FRUSTUM_NEAR = (0x10 << CLIPCODE_SHIFT),
|
||||
FRUSTUM_FAR = (0x20 << CLIPCODE_SHIFT),
|
||||
|
||||
NEGW = (0x40 << CLIPCODE_SHIFT),
|
||||
|
||||
GUARDBAND_LEFT = (0x80 << CLIPCODE_SHIFT | 0x1),
|
||||
GUARDBAND_TOP = (0x80 << CLIPCODE_SHIFT | 0x2),
|
||||
GUARDBAND_RIGHT = (0x80 << CLIPCODE_SHIFT | 0x4),
|
||||
GUARDBAND_BOTTOM = (0x80 << CLIPCODE_SHIFT | 0x8)
|
||||
};
|
||||
|
||||
#define FRUSTUM_CLIP_MASK (FRUSTUM_LEFT|FRUSTUM_TOP|FRUSTUM_RIGHT|FRUSTUM_BOTTOM|FRUSTUM_NEAR|FRUSTUM_FAR)
|
||||
#define GUARDBAND_CLIP_MASK (FRUSTUM_NEAR|FRUSTUM_FAR|GUARDBAND_LEFT|GUARDBAND_TOP|GUARDBAND_RIGHT|GUARDBAND_BOTTOM|NEGW)
|
||||
|
||||
void Clip(const float *pTriangle, const float *pAttribs, int numAttribs, float *pOutTriangles,
|
||||
int *numVerts, float *pOutAttribs);
|
||||
|
||||
INLINE
|
||||
void ComputeClipCodes(DRIVER_TYPE type, const API_STATE& state, const simdvector& vertex, simdscalar& clipCodes)
|
||||
{
|
||||
clipCodes = _simd_setzero_ps();
|
||||
|
||||
// -w
|
||||
simdscalar vNegW = _simd_mul_ps(vertex.w, _simd_set1_ps(-1.0f));
|
||||
|
||||
// FRUSTUM_LEFT
|
||||
simdscalar vRes = _simd_cmplt_ps(vertex.x, vNegW);
|
||||
clipCodes = _simd_and_ps(vRes, _simd_castsi_ps(_simd_set1_epi32(FRUSTUM_LEFT)));
|
||||
|
||||
// FRUSTUM_TOP
|
||||
vRes = _simd_cmplt_ps(vertex.y, vNegW);
|
||||
clipCodes = _simd_or_ps(clipCodes, _simd_and_ps(vRes, _simd_castsi_ps(_simd_set1_epi32(FRUSTUM_TOP))));
|
||||
|
||||
// FRUSTUM_RIGHT
|
||||
vRes = _simd_cmpgt_ps(vertex.x, vertex.w);
|
||||
clipCodes = _simd_or_ps(clipCodes, _simd_and_ps(vRes, _simd_castsi_ps(_simd_set1_epi32(FRUSTUM_RIGHT))));
|
||||
|
||||
// FRUSTUM_BOTTOM
|
||||
vRes = _simd_cmpgt_ps(vertex.y, vertex.w);
|
||||
clipCodes = _simd_or_ps(clipCodes, _simd_and_ps(vRes, _simd_castsi_ps(_simd_set1_epi32(FRUSTUM_BOTTOM))));
|
||||
|
||||
if (state.rastState.depthClipEnable)
|
||||
{
|
||||
// FRUSTUM_NEAR
|
||||
// DX clips depth [0..w], GL clips [-w..w]
|
||||
if (type == DX)
|
||||
{
|
||||
vRes = _simd_cmplt_ps(vertex.z, _simd_setzero_ps());
|
||||
}
|
||||
else
|
||||
{
|
||||
vRes = _simd_cmplt_ps(vertex.z, vNegW);
|
||||
}
|
||||
clipCodes = _simd_or_ps(clipCodes, _simd_and_ps(vRes, _simd_castsi_ps(_simd_set1_epi32(FRUSTUM_NEAR))));
|
||||
|
||||
// FRUSTUM_FAR
|
||||
vRes = _simd_cmpgt_ps(vertex.z, vertex.w);
|
||||
clipCodes = _simd_or_ps(clipCodes, _simd_and_ps(vRes, _simd_castsi_ps(_simd_set1_epi32(FRUSTUM_FAR))));
|
||||
}
|
||||
|
||||
// NEGW
|
||||
vRes = _simd_cmple_ps(vertex.w, _simd_setzero_ps());
|
||||
clipCodes = _simd_or_ps(clipCodes, _simd_and_ps(vRes, _simd_castsi_ps(_simd_set1_epi32(NEGW))));
|
||||
|
||||
// GUARDBAND_LEFT
|
||||
simdscalar gbMult = _simd_mul_ps(vNegW, _simd_set1_ps(state.gbState.left));
|
||||
vRes = _simd_cmplt_ps(vertex.x, gbMult);
|
||||
clipCodes = _simd_or_ps(clipCodes, _simd_and_ps(vRes, _simd_castsi_ps(_simd_set1_epi32(GUARDBAND_LEFT))));
|
||||
|
||||
// GUARDBAND_TOP
|
||||
gbMult = _simd_mul_ps(vNegW, _simd_set1_ps(state.gbState.top));
|
||||
vRes = _simd_cmplt_ps(vertex.y, gbMult);
|
||||
clipCodes = _simd_or_ps(clipCodes, _simd_and_ps(vRes, _simd_castsi_ps(_simd_set1_epi32(GUARDBAND_TOP))));
|
||||
|
||||
// GUARDBAND_RIGHT
|
||||
gbMult = _simd_mul_ps(vertex.w, _simd_set1_ps(state.gbState.right));
|
||||
vRes = _simd_cmpgt_ps(vertex.x, gbMult);
|
||||
clipCodes = _simd_or_ps(clipCodes, _simd_and_ps(vRes, _simd_castsi_ps(_simd_set1_epi32(GUARDBAND_RIGHT))));
|
||||
|
||||
// GUARDBAND_BOTTOM
|
||||
gbMult = _simd_mul_ps(vertex.w, _simd_set1_ps(state.gbState.bottom));
|
||||
vRes = _simd_cmpgt_ps(vertex.y, gbMult);
|
||||
clipCodes = _simd_or_ps(clipCodes, _simd_and_ps(vRes, _simd_castsi_ps(_simd_set1_epi32(GUARDBAND_BOTTOM))));
|
||||
}
|
||||
|
||||
template<uint32_t NumVertsPerPrim>
|
||||
class Clipper
|
||||
{
|
||||
public:
|
||||
Clipper(uint32_t in_workerId, DRAW_CONTEXT* in_pDC) :
|
||||
workerId(in_workerId), driverType(in_pDC->pContext->driverType), pDC(in_pDC), state(GetApiState(in_pDC))
|
||||
{
|
||||
static_assert(NumVertsPerPrim >= 1 && NumVertsPerPrim <= 3, "Invalid NumVertsPerPrim");
|
||||
}
|
||||
|
||||
void ComputeClipCodes(simdvector vertex[])
|
||||
{
|
||||
for (uint32_t i = 0; i < NumVertsPerPrim; ++i)
|
||||
{
|
||||
::ComputeClipCodes(this->driverType, this->state, vertex[i], this->clipCodes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
simdscalar ComputeClipCodeIntersection()
|
||||
{
|
||||
simdscalar result = this->clipCodes[0];
|
||||
for (uint32_t i = 1; i < NumVertsPerPrim; ++i)
|
||||
{
|
||||
result = _simd_and_ps(result, this->clipCodes[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
simdscalar ComputeClipCodeUnion()
|
||||
{
|
||||
simdscalar result = this->clipCodes[0];
|
||||
for (uint32_t i = 1; i < NumVertsPerPrim; ++i)
|
||||
{
|
||||
result = _simd_or_ps(result, this->clipCodes[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int ComputeNegWMask()
|
||||
{
|
||||
simdscalar clipCodeUnion = ComputeClipCodeUnion();
|
||||
clipCodeUnion = _simd_and_ps(clipCodeUnion, _simd_castsi_ps(_simd_set1_epi32(NEGW)));
|
||||
return _simd_movemask_ps(_simd_cmpneq_ps(clipCodeUnion, _simd_setzero_ps()));
|
||||
}
|
||||
|
||||
int ComputeClipMask()
|
||||
{
|
||||
simdscalar clipUnion = ComputeClipCodeUnion();
|
||||
clipUnion = _simd_and_ps(clipUnion, _simd_castsi_ps(_simd_set1_epi32(GUARDBAND_CLIP_MASK)));
|
||||
return _simd_movemask_ps(_simd_cmpneq_ps(clipUnion, _simd_setzero_ps()));
|
||||
}
|
||||
|
||||
// clipper is responsible for culling any prims with NAN coordinates
|
||||
int ComputeNaNMask(simdvector prim[])
|
||||
{
|
||||
simdscalar vNanMask = _simd_setzero_ps();
|
||||
for (uint32_t e = 0; e < NumVertsPerPrim; ++e)
|
||||
{
|
||||
simdscalar vNan01 = _simd_cmp_ps(prim[e].v[0], prim[e].v[1], _CMP_UNORD_Q);
|
||||
vNanMask = _simd_or_ps(vNanMask, vNan01);
|
||||
simdscalar vNan23 = _simd_cmp_ps(prim[e].v[2], prim[e].v[3], _CMP_UNORD_Q);
|
||||
vNanMask = _simd_or_ps(vNanMask, vNan23);
|
||||
}
|
||||
|
||||
return _simd_movemask_ps(vNanMask);
|
||||
}
|
||||
|
||||
int ComputeUserClipCullMask(PA_STATE& pa, simdvector prim[])
|
||||
{
|
||||
uint8_t cullMask = this->state.rastState.cullDistanceMask;
|
||||
simdscalar vClipCullMask = _simd_setzero_ps();
|
||||
DWORD index;
|
||||
|
||||
simdvector vClipCullDistLo[3];
|
||||
simdvector vClipCullDistHi[3];
|
||||
|
||||
pa.Assemble(VERTEX_CLIPCULL_DIST_LO_SLOT, vClipCullDistLo);
|
||||
pa.Assemble(VERTEX_CLIPCULL_DIST_HI_SLOT, vClipCullDistHi);
|
||||
while (_BitScanForward(&index, cullMask))
|
||||
{
|
||||
cullMask &= ~(1 << index);
|
||||
uint32_t slot = index >> 2;
|
||||
uint32_t component = index & 0x3;
|
||||
|
||||
simdscalar vCullMaskElem = _simd_set1_ps(-1.0f);
|
||||
for (uint32_t e = 0; e < NumVertsPerPrim; ++e)
|
||||
{
|
||||
simdscalar vCullComp;
|
||||
if (slot == 0)
|
||||
{
|
||||
vCullComp = vClipCullDistLo[e][component];
|
||||
}
|
||||
else
|
||||
{
|
||||
vCullComp = vClipCullDistHi[e][component];
|
||||
}
|
||||
|
||||
// cull if cull distance < 0 || NAN
|
||||
simdscalar vCull = _simd_cmp_ps(_mm256_setzero_ps(), vCullComp, _CMP_NLE_UQ);
|
||||
vCullMaskElem = _simd_and_ps(vCullMaskElem, vCull);
|
||||
}
|
||||
vClipCullMask = _simd_or_ps(vClipCullMask, vCullMaskElem);
|
||||
}
|
||||
|
||||
// clipper should also discard any primitive with NAN clip distance
|
||||
uint8_t clipMask = this->state.rastState.clipDistanceMask;
|
||||
while (_BitScanForward(&index, clipMask))
|
||||
{
|
||||
clipMask &= ~(1 << index);
|
||||
uint32_t slot = index >> 2;
|
||||
uint32_t component = index & 0x3;
|
||||
|
||||
for (uint32_t e = 0; e < NumVertsPerPrim; ++e)
|
||||
{
|
||||
simdscalar vClipComp;
|
||||
if (slot == 0)
|
||||
{
|
||||
vClipComp = vClipCullDistLo[e][component];
|
||||
}
|
||||
else
|
||||
{
|
||||
vClipComp = vClipCullDistHi[e][component];
|
||||
}
|
||||
|
||||
simdscalar vClip = _simd_cmp_ps(vClipComp, vClipComp, _CMP_UNORD_Q);
|
||||
vClipCullMask = _simd_or_ps(vClipCullMask, vClip);
|
||||
}
|
||||
}
|
||||
|
||||
return _simd_movemask_ps(vClipCullMask);
|
||||
}
|
||||
|
||||
// clip a single primitive
|
||||
int ClipScalar(PA_STATE& pa, uint32_t primIndex, float* pOutPos, float* pOutAttribs)
|
||||
{
|
||||
OSALIGN(float, 16) inVerts[3 * 4];
|
||||
OSALIGN(float, 16) inAttribs[3 * KNOB_NUM_ATTRIBUTES * 4];
|
||||
|
||||
// transpose primitive position
|
||||
__m128 verts[3];
|
||||
pa.AssembleSingle(VERTEX_POSITION_SLOT, primIndex, verts);
|
||||
_mm_store_ps(&inVerts[0], verts[0]);
|
||||
_mm_store_ps(&inVerts[4], verts[1]);
|
||||
_mm_store_ps(&inVerts[8], verts[2]);
|
||||
|
||||
// transpose attribs
|
||||
uint32_t numScalarAttribs = this->state.linkageCount * 4;
|
||||
|
||||
int idx = 0;
|
||||
DWORD slot = 0;
|
||||
uint32_t mapIdx = 0;
|
||||
uint32_t tmpLinkage = uint32_t(this->state.linkageMask);
|
||||
while (_BitScanForward(&slot, tmpLinkage))
|
||||
{
|
||||
tmpLinkage &= ~(1 << slot);
|
||||
// Compute absolute attrib slot in vertex array
|
||||
uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + this->state.linkageMap[mapIdx++];
|
||||
__m128 attrib[3]; // triangle attribs (always 4 wide)
|
||||
pa.AssembleSingle(inputSlot, primIndex, attrib);
|
||||
_mm_store_ps(&inAttribs[idx], attrib[0]);
|
||||
_mm_store_ps(&inAttribs[idx + numScalarAttribs], attrib[1]);
|
||||
_mm_store_ps(&inAttribs[idx + numScalarAttribs * 2], attrib[2]);
|
||||
idx += 4;
|
||||
}
|
||||
|
||||
int numVerts;
|
||||
Clip(inVerts, inAttribs, numScalarAttribs, pOutPos, &numVerts, pOutAttribs);
|
||||
|
||||
return numVerts;
|
||||
}
|
||||
|
||||
// clip SIMD primitives
|
||||
void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId)
|
||||
{
|
||||
// input/output vertex store for clipper
|
||||
simdvertex vertices[7]; // maximum 7 verts generated per triangle
|
||||
|
||||
LONG constantInterpMask = this->state.backendState.constantInterpolationMask;
|
||||
uint32_t provokingVertex = 0;
|
||||
if(pa.binTopology == TOP_TRIANGLE_FAN)
|
||||
{
|
||||
provokingVertex = this->state.frontendState.provokingVertex.triFan;
|
||||
}
|
||||
///@todo: line topology for wireframe?
|
||||
|
||||
// assemble pos
|
||||
simdvector tmpVector[NumVertsPerPrim];
|
||||
pa.Assemble(VERTEX_POSITION_SLOT, tmpVector);
|
||||
for (uint32_t i = 0; i < NumVertsPerPrim; ++i)
|
||||
{
|
||||
vertices[i].attrib[VERTEX_POSITION_SLOT] = tmpVector[i];
|
||||
}
|
||||
|
||||
// assemble attribs
|
||||
DWORD slot = 0;
|
||||
uint32_t mapIdx = 0;
|
||||
uint32_t tmpLinkage = this->state.linkageMask;
|
||||
|
||||
int32_t maxSlot = -1;
|
||||
while (_BitScanForward(&slot, tmpLinkage))
|
||||
{
|
||||
tmpLinkage &= ~(1 << slot);
|
||||
// Compute absolute attrib slot in vertex array
|
||||
uint32_t mapSlot = this->state.linkageMap[mapIdx++];
|
||||
maxSlot = std::max<int32_t>(maxSlot, mapSlot);
|
||||
uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + mapSlot;
|
||||
|
||||
pa.Assemble(inputSlot, tmpVector);
|
||||
|
||||
// if constant interpolation enabled for this attribute, assign the provoking
|
||||
// vertex values to all edges
|
||||
if (_bittest(&constantInterpMask, slot))
|
||||
{
|
||||
for (uint32_t i = 0; i < NumVertsPerPrim; ++i)
|
||||
{
|
||||
vertices[i].attrib[inputSlot] = tmpVector[provokingVertex];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (uint32_t i = 0; i < NumVertsPerPrim; ++i)
|
||||
{
|
||||
vertices[i].attrib[inputSlot] = tmpVector[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t numAttribs = maxSlot + 1;
|
||||
|
||||
simdscalari vNumClippedVerts = ClipPrims((float*)&vertices[0], vPrimMask, vClipMask, numAttribs);
|
||||
|
||||
// set up new PA for binning clipped primitives
|
||||
PFN_PROCESS_PRIMS pfnBinFunc = nullptr;
|
||||
PRIMITIVE_TOPOLOGY clipTopology = TOP_UNKNOWN;
|
||||
if (NumVertsPerPrim == 3)
|
||||
{
|
||||
pfnBinFunc = BinTriangles;
|
||||
clipTopology = TOP_TRIANGLE_FAN;
|
||||
|
||||
// so that the binner knows to bloat wide points later
|
||||
if (pa.binTopology == TOP_POINT_LIST)
|
||||
clipTopology = TOP_POINT_LIST;
|
||||
}
|
||||
else if (NumVertsPerPrim == 2)
|
||||
{
|
||||
pfnBinFunc = BinLines;
|
||||
clipTopology = TOP_LINE_LIST;
|
||||
}
|
||||
else
|
||||
{
|
||||
SWR_ASSERT(0 && "Unexpected points in clipper.");
|
||||
}
|
||||
|
||||
|
||||
uint32_t* pVertexCount = (uint32_t*)&vNumClippedVerts;
|
||||
uint32_t* pPrimitiveId = (uint32_t*)&vPrimId;
|
||||
|
||||
const simdscalari vOffsets = _mm256_set_epi32(
|
||||
0 * sizeof(simdvertex), // unused lane
|
||||
6 * sizeof(simdvertex),
|
||||
5 * sizeof(simdvertex),
|
||||
4 * sizeof(simdvertex),
|
||||
3 * sizeof(simdvertex),
|
||||
2 * sizeof(simdvertex),
|
||||
1 * sizeof(simdvertex),
|
||||
0 * sizeof(simdvertex));
|
||||
|
||||
// only need to gather 7 verts
|
||||
// @todo dynamic mask based on actual # of verts generated per lane
|
||||
const simdscalar vMask = _mm256_set_ps(0, -1, -1, -1, -1, -1, -1, -1);
|
||||
|
||||
uint32_t numClippedPrims = 0;
|
||||
for (uint32_t inputPrim = 0; inputPrim < pa.NumPrims(); ++inputPrim)
|
||||
{
|
||||
uint32_t numEmittedVerts = pVertexCount[inputPrim];
|
||||
if (numEmittedVerts < NumVertsPerPrim)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
SWR_ASSERT(numEmittedVerts <= 7, "Unexpected vertex count from clipper.");
|
||||
|
||||
uint32_t numEmittedPrims = GetNumPrims(clipTopology, numEmittedVerts);
|
||||
numClippedPrims += numEmittedPrims;
|
||||
|
||||
// tranpose clipper output so that each lane's vertices are in SIMD order
|
||||
// set aside space for 2 vertices, as the PA will try to read up to 16 verts
|
||||
// for triangle fan
|
||||
simdvertex transposedPrims[2];
|
||||
|
||||
// transpose pos
|
||||
uint8_t* pBase = (uint8_t*)(&vertices[0].attrib[VERTEX_POSITION_SLOT]) + sizeof(float) * inputPrim;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
transposedPrims[0].attrib[VERTEX_POSITION_SLOT][c] = _simd_mask_i32gather_ps(_mm256_undefined_ps(), (const float*)pBase, vOffsets, vMask, 1);
|
||||
pBase += sizeof(simdscalar);
|
||||
}
|
||||
|
||||
// transpose attribs
|
||||
pBase = (uint8_t*)(&vertices[0].attrib[VERTEX_ATTRIB_START_SLOT]) + sizeof(float) * inputPrim;
|
||||
for (uint32_t attrib = 0; attrib < numAttribs; ++attrib)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
transposedPrims[0].attrib[attribSlot][c] = _simd_mask_i32gather_ps(_mm256_undefined_ps(), (const float*)pBase, vOffsets, vMask, 1);
|
||||
pBase += sizeof(simdscalar);
|
||||
}
|
||||
}
|
||||
|
||||
PA_STATE_OPT clipPa(this->pDC, numEmittedPrims, (uint8_t*)&transposedPrims[0], numEmittedVerts, true, clipTopology);
|
||||
|
||||
while (clipPa.GetNextStreamOutput())
|
||||
{
|
||||
do
|
||||
{
|
||||
simdvector attrib[NumVertsPerPrim];
|
||||
bool assemble = clipPa.Assemble(VERTEX_POSITION_SLOT, attrib);
|
||||
if (assemble)
|
||||
{
|
||||
static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };
|
||||
pfnBinFunc(this->pDC, clipPa, this->workerId, attrib, primMaskMap[numEmittedPrims], _simd_set1_epi32(pPrimitiveId[inputPrim]));
|
||||
}
|
||||
} while (clipPa.NextPrim());
|
||||
}
|
||||
}
|
||||
|
||||
// update global pipeline stat
|
||||
SWR_CONTEXT* pContext = this->pDC->pContext;
|
||||
UPDATE_STAT(CPrimitives, numClippedPrims);
|
||||
}
|
||||
|
||||
// execute the clipper stage
|
||||
void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId)
|
||||
{
|
||||
// set up binner based on PA state
|
||||
PFN_PROCESS_PRIMS pfnBinner;
|
||||
switch (pa.binTopology)
|
||||
{
|
||||
case TOP_POINT_LIST:
|
||||
pfnBinner = BinPoints;
|
||||
break;
|
||||
case TOP_LINE_LIST:
|
||||
case TOP_LINE_STRIP:
|
||||
case TOP_LINE_LOOP:
|
||||
case TOP_LINE_LIST_ADJ:
|
||||
case TOP_LISTSTRIP_ADJ:
|
||||
pfnBinner = BinLines;
|
||||
break;
|
||||
default:
|
||||
pfnBinner = BinTriangles;
|
||||
break;
|
||||
};
|
||||
|
||||
// update clipper invocations pipeline stat
|
||||
SWR_CONTEXT* pContext = this->pDC->pContext;
|
||||
uint32_t numInvoc = _mm_popcnt_u32(primMask);
|
||||
UPDATE_STAT(CInvocations, numInvoc);
|
||||
|
||||
ComputeClipCodes(prim);
|
||||
|
||||
// cull prims with NAN coords
|
||||
primMask &= ~ComputeNaNMask(prim);
|
||||
|
||||
// user cull distance cull
|
||||
if (this->state.rastState.cullDistanceMask)
|
||||
{
|
||||
primMask &= ~ComputeUserClipCullMask(pa, prim);
|
||||
}
|
||||
|
||||
// cull prims outside view frustum
|
||||
simdscalar clipIntersection = ComputeClipCodeIntersection();
|
||||
int validMask = primMask & _simd_movemask_ps(_simd_cmpeq_ps(clipIntersection, _simd_setzero_ps()));
|
||||
|
||||
// skip clipping for points
|
||||
uint32_t clipMask = 0;
|
||||
if (NumVertsPerPrim != 1)
|
||||
{
|
||||
clipMask = primMask & ComputeClipMask();
|
||||
}
|
||||
|
||||
if (clipMask)
|
||||
{
|
||||
RDTSC_START(FEGuardbandClip);
|
||||
// we have to clip tris, execute the clipper, which will also
|
||||
// call the binner
|
||||
ClipSimd(vMask(primMask), vMask(clipMask), pa, primId);
|
||||
RDTSC_STOP(FEGuardbandClip, 1, 0);
|
||||
}
|
||||
else if (validMask)
|
||||
{
|
||||
// update CPrimitives pipeline state
|
||||
SWR_CONTEXT* pContext = this->pDC->pContext;
|
||||
UPDATE_STAT(CPrimitives, _mm_popcnt_u32(validMask));
|
||||
|
||||
// forward valid prims directly to binner
|
||||
pfnBinner(this->pDC, pa, this->workerId, prim, validMask, primId);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
inline simdscalar ComputeInterpFactor(simdscalar boundaryCoord0, simdscalar boundaryCoord1)
|
||||
{
|
||||
return _simd_div_ps(boundaryCoord0, _simd_sub_ps(boundaryCoord0, boundaryCoord1));
|
||||
}
|
||||
|
||||
inline simdscalari ComputeOffsets(uint32_t attrib, simdscalari vIndices, uint32_t component)
|
||||
{
|
||||
const uint32_t simdVertexStride = sizeof(simdvertex);
|
||||
const uint32_t componentStride = sizeof(simdscalar);
|
||||
const uint32_t attribStride = sizeof(simdvector);
|
||||
const __m256i vElemOffset = _mm256_set_epi32(7 * sizeof(float), 6 * sizeof(float), 5 * sizeof(float), 4 * sizeof(float),
|
||||
3 * sizeof(float), 2 * sizeof(float), 1 * sizeof(float), 0 * sizeof(float));
|
||||
|
||||
// step to the simdvertex
|
||||
simdscalari vOffsets = _simd_mullo_epi32(vIndices, _simd_set1_epi32(simdVertexStride));
|
||||
|
||||
// step to the attribute and component
|
||||
vOffsets = _simd_add_epi32(vOffsets, _simd_set1_epi32(attribStride * attrib + componentStride * component));
|
||||
|
||||
// step to the lane
|
||||
vOffsets = _simd_add_epi32(vOffsets, vElemOffset);
|
||||
|
||||
return vOffsets;
|
||||
}
|
||||
|
||||
// gathers a single component for a given attribute for each SIMD lane
|
||||
inline simdscalar GatherComponent(const float* pBuffer, uint32_t attrib, simdscalar vMask, simdscalari vIndices, uint32_t component)
|
||||
{
|
||||
simdscalari vOffsets = ComputeOffsets(attrib, vIndices, component);
|
||||
simdscalar vSrc = _mm256_undefined_ps();
|
||||
return _simd_mask_i32gather_ps(vSrc, pBuffer, vOffsets, vMask, 1);
|
||||
}
|
||||
|
||||
inline void ScatterComponent(const float* pBuffer, uint32_t attrib, simdscalar vMask, simdscalari vIndices, uint32_t component, simdscalar vSrc)
|
||||
{
|
||||
simdscalari vOffsets = ComputeOffsets(attrib, vIndices, component);
|
||||
|
||||
uint32_t* pOffsets = (uint32_t*)&vOffsets;
|
||||
float* pSrc = (float*)&vSrc;
|
||||
uint32_t mask = _simd_movemask_ps(vMask);
|
||||
DWORD lane;
|
||||
while (_BitScanForward(&lane, mask))
|
||||
{
|
||||
mask &= ~(1 << lane);
|
||||
uint8_t* pBuf = (uint8_t*)pBuffer + pOffsets[lane];
|
||||
*(float*)pBuf = pSrc[lane];
|
||||
}
|
||||
}
|
||||
|
||||
template<SWR_CLIPCODES ClippingPlane>
|
||||
inline void intersect(
|
||||
const simdscalar& vActiveMask, // active lanes to operate on
|
||||
const simdscalari& s, // index to first edge vertex v0 in pInPts.
|
||||
const simdscalari& p, // index to second edge vertex v1 in pInPts.
|
||||
const simdvector& v1, // vertex 0 position
|
||||
const simdvector& v2, // vertex 1 position
|
||||
simdscalari& outIndex, // output index.
|
||||
const float *pInVerts, // array of all the input positions.
|
||||
uint32_t numInAttribs, // number of attributes per vertex.
|
||||
float *pOutVerts) // array of output positions. We'll write our new intersection point at i*4.
|
||||
{
|
||||
// compute interpolation factor
|
||||
simdscalar t;
|
||||
switch (ClippingPlane)
|
||||
{
|
||||
case FRUSTUM_LEFT: t = ComputeInterpFactor(_simd_add_ps(v1[3], v1[0]), _simd_add_ps(v2[3], v2[0])); break;
|
||||
case FRUSTUM_RIGHT: t = ComputeInterpFactor(_simd_sub_ps(v1[3], v1[0]), _simd_sub_ps(v2[3], v2[0])); break;
|
||||
case FRUSTUM_TOP: t = ComputeInterpFactor(_simd_add_ps(v1[3], v1[1]), _simd_add_ps(v2[3], v2[1])); break;
|
||||
case FRUSTUM_BOTTOM: t = ComputeInterpFactor(_simd_sub_ps(v1[3], v1[1]), _simd_sub_ps(v2[3], v2[1])); break;
|
||||
case FRUSTUM_NEAR:
|
||||
// DX Znear plane is 0, GL is -w
|
||||
if (this->driverType == DX)
|
||||
{
|
||||
t = ComputeInterpFactor(v1[2], v2[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
t = ComputeInterpFactor(_simd_add_ps(v1[3], v1[2]), _simd_add_ps(v2[3], v2[2]));
|
||||
}
|
||||
break;
|
||||
case FRUSTUM_FAR: t = ComputeInterpFactor(_simd_sub_ps(v1[3], v1[2]), _simd_sub_ps(v2[3], v2[2])); break;
|
||||
default: SWR_ASSERT(false, "invalid clipping plane: %d", ClippingPlane);
|
||||
};
|
||||
|
||||
// interpolate position and store
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simdscalar vOutPos = _simd_fmadd_ps(_simd_sub_ps(v2[c], v1[c]), t, v1[c]);
|
||||
ScatterComponent(pOutVerts, VERTEX_POSITION_SLOT, vActiveMask, outIndex, c, vOutPos);
|
||||
}
|
||||
|
||||
// interpolate attributes and store
|
||||
for (uint32_t a = 0; a < numInAttribs; ++a)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simdscalar vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
|
||||
simdscalar vAttrib1 = GatherComponent(pInVerts, attribSlot, vActiveMask, p, c);
|
||||
simdscalar vOutAttrib = _simd_fmadd_ps(_simd_sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
|
||||
ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<SWR_CLIPCODES ClippingPlane>
|
||||
inline simdscalar inside(const simdvector& v)
|
||||
{
|
||||
switch (ClippingPlane)
|
||||
{
|
||||
case FRUSTUM_LEFT: return _simd_cmpge_ps(v[0], _simd_mul_ps(v[3], _simd_set1_ps(-1.0f)));
|
||||
case FRUSTUM_RIGHT: return _simd_cmple_ps(v[0], v[3]);
|
||||
case FRUSTUM_TOP: return _simd_cmpge_ps(v[1], _simd_mul_ps(v[3], _simd_set1_ps(-1.0f)));
|
||||
case FRUSTUM_BOTTOM: return _simd_cmple_ps(v[1], v[3]);
|
||||
case FRUSTUM_NEAR: return _simd_cmpge_ps(v[2], this->driverType == DX ? _simd_setzero_ps() : _simd_mul_ps(v[3], _simd_set1_ps(-1.0f)));
|
||||
case FRUSTUM_FAR: return _simd_cmple_ps(v[2], v[3]);
|
||||
default:
|
||||
SWR_ASSERT(false, "invalid clipping plane: %d", ClippingPlane);
|
||||
return _simd_setzero_ps();
|
||||
}
|
||||
}
|
||||
|
||||
template<SWR_CLIPCODES ClippingPlane>
|
||||
simdscalari ClipTriToPlane(const float* pInVerts, const simdscalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts)
|
||||
{
|
||||
simdscalari vCurIndex = _simd_setzero_si();
|
||||
simdscalari vOutIndex = _simd_setzero_si();
|
||||
simdscalar vActiveMask = _simd_castsi_ps(_simd_cmplt_epi32(vCurIndex, vNumInPts));
|
||||
|
||||
while (!_simd_testz_ps(vActiveMask, vActiveMask)) // loop until activeMask is empty
|
||||
{
|
||||
simdscalari s = vCurIndex;
|
||||
simdscalari p = _simd_add_epi32(s, _simd_set1_epi32(1));
|
||||
simdscalari underFlowMask = _simd_cmpgt_epi32(vNumInPts, p);
|
||||
p = _simd_castps_si(_simd_blendv_ps(_simd_setzero_ps(), _simd_castsi_ps(p), _simd_castsi_ps(underFlowMask)));
|
||||
|
||||
// gather position
|
||||
simdvector vInPos0, vInPos1;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
vInPos0[c] = GatherComponent(pInVerts, VERTEX_POSITION_SLOT, vActiveMask, s, c);
|
||||
vInPos1[c] = GatherComponent(pInVerts, VERTEX_POSITION_SLOT, vActiveMask, p, c);
|
||||
}
|
||||
|
||||
// compute inside mask
|
||||
simdscalar s_in = inside<ClippingPlane>(vInPos0);
|
||||
simdscalar p_in = inside<ClippingPlane>(vInPos1);
|
||||
|
||||
// compute intersection mask (s_in != p_in)
|
||||
simdscalar intersectMask = _simd_xor_ps(s_in, p_in);
|
||||
intersectMask = _simd_and_ps(intersectMask, vActiveMask);
|
||||
|
||||
// store s if inside
|
||||
s_in = _simd_and_ps(s_in, vActiveMask);
|
||||
if (!_simd_testz_ps(s_in, s_in))
|
||||
{
|
||||
// store position
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
ScatterComponent(pOutVerts, VERTEX_POSITION_SLOT, s_in, vOutIndex, c, vInPos0[c]);
|
||||
}
|
||||
|
||||
// store attribs
|
||||
for (uint32_t a = 0; a < numInAttribs; ++a)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
|
||||
ScatterComponent(pOutVerts, attribSlot, s_in, vOutIndex, c, vAttrib);
|
||||
}
|
||||
}
|
||||
|
||||
// increment outIndex
|
||||
vOutIndex = _simd_blendv_epi32(vOutIndex, _simd_add_epi32(vOutIndex, _simd_set1_epi32(1)), s_in);
|
||||
}
|
||||
|
||||
// compute and store intersection
|
||||
if (!_simd_testz_ps(intersectMask, intersectMask))
|
||||
{
|
||||
intersect<ClippingPlane>(intersectMask, s, p, vInPos0, vInPos1, vOutIndex, pInVerts, numInAttribs, pOutVerts);
|
||||
|
||||
// increment outIndex for active lanes
|
||||
vOutIndex = _simd_blendv_epi32(vOutIndex, _simd_add_epi32(vOutIndex, _simd_set1_epi32(1)), intersectMask);
|
||||
}
|
||||
|
||||
// increment loop index and update active mask
|
||||
vCurIndex = _simd_add_epi32(vCurIndex, _simd_set1_epi32(1));
|
||||
vActiveMask = _simd_castsi_ps(_simd_cmplt_epi32(vCurIndex, vNumInPts));
|
||||
}
|
||||
|
||||
return vOutIndex;
|
||||
}
|
||||
|
||||
template<SWR_CLIPCODES ClippingPlane>
|
||||
simdscalari ClipLineToPlane(const float* pInVerts, const simdscalari& vNumInPts, uint32_t numInAttribs, float* pOutVerts)
|
||||
{
|
||||
simdscalari vCurIndex = _simd_setzero_si();
|
||||
simdscalari vOutIndex = _simd_setzero_si();
|
||||
simdscalar vActiveMask = _simd_castsi_ps(_simd_cmplt_epi32(vCurIndex, vNumInPts));
|
||||
|
||||
if (!_simd_testz_ps(vActiveMask, vActiveMask))
|
||||
{
|
||||
simdscalari s = vCurIndex;
|
||||
simdscalari p = _simd_add_epi32(s, _simd_set1_epi32(1));
|
||||
|
||||
// gather position
|
||||
simdvector vInPos0, vInPos1;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
vInPos0[c] = GatherComponent(pInVerts, VERTEX_POSITION_SLOT, vActiveMask, s, c);
|
||||
vInPos1[c] = GatherComponent(pInVerts, VERTEX_POSITION_SLOT, vActiveMask, p, c);
|
||||
}
|
||||
|
||||
// compute inside mask
|
||||
simdscalar s_in = inside<ClippingPlane>(vInPos0);
|
||||
simdscalar p_in = inside<ClippingPlane>(vInPos1);
|
||||
|
||||
// compute intersection mask (s_in != p_in)
|
||||
simdscalar intersectMask = _simd_xor_ps(s_in, p_in);
|
||||
intersectMask = _simd_and_ps(intersectMask, vActiveMask);
|
||||
|
||||
// store s if inside
|
||||
s_in = _simd_and_ps(s_in, vActiveMask);
|
||||
if (!_simd_testz_ps(s_in, s_in))
|
||||
{
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
ScatterComponent(pOutVerts, VERTEX_POSITION_SLOT, s_in, vOutIndex, c, vInPos0[c]);
|
||||
}
|
||||
|
||||
// interpolate attributes and store
|
||||
for (uint32_t a = 0; a < numInAttribs; ++a)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
|
||||
ScatterComponent(pOutVerts, attribSlot, s_in, vOutIndex, c, vAttrib);
|
||||
}
|
||||
}
|
||||
|
||||
// increment outIndex
|
||||
vOutIndex = _simd_blendv_epi32(vOutIndex, _simd_add_epi32(vOutIndex, _simd_set1_epi32(1)), s_in);
|
||||
}
|
||||
|
||||
// compute and store intersection
|
||||
if (!_simd_testz_ps(intersectMask, intersectMask))
|
||||
{
|
||||
intersect<ClippingPlane>(intersectMask, s, p, vInPos0, vInPos1, vOutIndex, pInVerts, numInAttribs, pOutVerts);
|
||||
|
||||
// increment outIndex for active lanes
|
||||
vOutIndex = _simd_blendv_epi32(vOutIndex, _simd_add_epi32(vOutIndex, _simd_set1_epi32(1)), intersectMask);
|
||||
}
|
||||
|
||||
// store p if inside
|
||||
p_in = _simd_and_ps(p_in, vActiveMask);
|
||||
if (!_simd_testz_ps(p_in, p_in))
|
||||
{
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
ScatterComponent(pOutVerts, VERTEX_POSITION_SLOT, p_in, vOutIndex, c, vInPos1[c]);
|
||||
}
|
||||
|
||||
// interpolate attributes and store
|
||||
for (uint32_t a = 0; a < numInAttribs; ++a)
|
||||
{
|
||||
uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + a;
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
simdscalar vAttrib = GatherComponent(pInVerts, attribSlot, p_in, p, c);
|
||||
ScatterComponent(pOutVerts, attribSlot, p_in, vOutIndex, c, vAttrib);
|
||||
}
|
||||
}
|
||||
|
||||
// increment outIndex
|
||||
vOutIndex = _simd_blendv_epi32(vOutIndex, _simd_add_epi32(vOutIndex, _simd_set1_epi32(1)), p_in);
|
||||
}
|
||||
}
|
||||
|
||||
return vOutIndex;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Vertical clipper. Clips SIMD primitives at a time
|
||||
/// @param pVertices - pointer to vertices in SOA form. Clipper will read input and write results to this buffer
|
||||
/// @param vPrimMask - mask of valid input primitives, including non-clipped prims
|
||||
/// @param numAttribs - number of valid input attribs, including position
|
||||
simdscalari ClipPrims(float* pVertices, const simdscalar& vPrimMask, const simdscalar& vClipMask, int numAttribs)
|
||||
{
|
||||
// temp storage
|
||||
simdvertex tempVertices[7];
|
||||
float* pTempVerts = (float*)&tempVertices[0];
|
||||
|
||||
// zero out num input verts for non-active lanes
|
||||
simdscalari vNumInPts = _simd_set1_epi32(NumVertsPerPrim);
|
||||
vNumInPts = _simd_blendv_epi32(_simd_setzero_si(), vNumInPts, vClipMask);
|
||||
|
||||
// clip prims to frustum
|
||||
simdscalari vNumOutPts;
|
||||
if (NumVertsPerPrim == 3)
|
||||
{
|
||||
vNumOutPts = ClipTriToPlane<FRUSTUM_NEAR>(pVertices, vNumInPts, numAttribs, pTempVerts);
|
||||
vNumOutPts = ClipTriToPlane<FRUSTUM_FAR>(pTempVerts, vNumOutPts, numAttribs, pVertices);
|
||||
vNumOutPts = ClipTriToPlane<FRUSTUM_LEFT>(pVertices, vNumOutPts, numAttribs, pTempVerts);
|
||||
vNumOutPts = ClipTriToPlane<FRUSTUM_RIGHT>(pTempVerts, vNumOutPts, numAttribs, pVertices);
|
||||
vNumOutPts = ClipTriToPlane<FRUSTUM_BOTTOM>(pVertices, vNumOutPts, numAttribs, pTempVerts);
|
||||
vNumOutPts = ClipTriToPlane<FRUSTUM_TOP>(pTempVerts, vNumOutPts, numAttribs, pVertices);
|
||||
}
|
||||
else
|
||||
{
|
||||
SWR_ASSERT(NumVertsPerPrim == 2);
|
||||
vNumOutPts = ClipLineToPlane<FRUSTUM_NEAR>(pVertices, vNumInPts, numAttribs, pTempVerts);
|
||||
vNumOutPts = ClipLineToPlane<FRUSTUM_FAR>(pTempVerts, vNumOutPts, numAttribs, pVertices);
|
||||
vNumOutPts = ClipLineToPlane<FRUSTUM_LEFT>(pVertices, vNumOutPts, numAttribs, pTempVerts);
|
||||
vNumOutPts = ClipLineToPlane<FRUSTUM_RIGHT>(pTempVerts, vNumOutPts, numAttribs, pVertices);
|
||||
vNumOutPts = ClipLineToPlane<FRUSTUM_BOTTOM>(pVertices, vNumOutPts, numAttribs, pTempVerts);
|
||||
vNumOutPts = ClipLineToPlane<FRUSTUM_TOP>(pTempVerts, vNumOutPts, numAttribs, pVertices);
|
||||
}
|
||||
|
||||
// restore num verts for non-clipped, active lanes
|
||||
simdscalar vNonClippedMask = _simd_andnot_ps(vClipMask, vPrimMask);
|
||||
vNumOutPts = _simd_blendv_epi32(vNumOutPts, _simd_set1_epi32(NumVertsPerPrim), vNonClippedMask);
|
||||
|
||||
return vNumOutPts;
|
||||
}
|
||||
|
||||
const uint32_t workerId;
|
||||
const DRIVER_TYPE driverType;
|
||||
DRAW_CONTEXT* pDC;
|
||||
const API_STATE& state;
|
||||
simdscalar clipCodes[NumVertsPerPrim];
|
||||
};
|
||||
|
||||
|
||||
// pipeline stage functions
|
||||
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
|
||||
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
|
||||
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId);
|
|
@ -0,0 +1,495 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file context.h
|
||||
*
|
||||
* @brief Definitions for SWR_CONTEXT and DRAW_CONTEXT
|
||||
* The SWR_CONTEXT is our global context and contains the DC ring,
|
||||
* thread state, etc.
|
||||
*
|
||||
* The DRAW_CONTEXT contains all state associated with a draw operation.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <algorithm>
|
||||
|
||||
#include "core/api.h"
|
||||
#include "core/utils.h"
|
||||
#include "core/arena.h"
|
||||
#include "core/fifo.hpp"
|
||||
#include "core/knobs.h"
|
||||
#include "common/simdintrin.h"
|
||||
#include "core/threads.h"
|
||||
|
||||
// x.8 fixed point precision values
|
||||
#define FIXED_POINT_SHIFT 8
|
||||
#define FIXED_POINT_SCALE 256
|
||||
|
||||
// x.16 fixed point precision values
|
||||
#define FIXED_POINT16_SHIFT 16
|
||||
#define FIXED_POINT16_SCALE 65536
|
||||
|
||||
struct SWR_CONTEXT;
|
||||
struct DRAW_CONTEXT;
|
||||
|
||||
struct TRI_FLAGS
|
||||
{
|
||||
uint32_t frontFacing : 1;
|
||||
uint32_t yMajor : 1;
|
||||
uint32_t coverageMask : (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
|
||||
uint32_t reserved : 32 - 1 - 1 - (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
|
||||
float pointSize;
|
||||
uint32_t primID;
|
||||
uint32_t renderTargetArrayIndex;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// SWR_TRIANGLE_DESC
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
struct SWR_TRIANGLE_DESC
|
||||
{
|
||||
float I[3];
|
||||
float J[3];
|
||||
float Z[3];
|
||||
float OneOverW[3];
|
||||
float recipDet;
|
||||
|
||||
float *pRecipW;
|
||||
float *pAttribs;
|
||||
float *pPerspAttribs;
|
||||
float *pSamplePos;
|
||||
float *pUserClipBuffer;
|
||||
|
||||
uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES];
|
||||
|
||||
TRI_FLAGS triFlags;
|
||||
};
|
||||
|
||||
struct TRIANGLE_WORK_DESC
|
||||
{
|
||||
float *pTriBuffer;
|
||||
float *pAttribs;
|
||||
float *pUserClipBuffer;
|
||||
uint32_t numAttribs;
|
||||
TRI_FLAGS triFlags;
|
||||
};
|
||||
|
||||
union CLEAR_FLAGS
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t mask : 3;
|
||||
};
|
||||
uint32_t bits;
|
||||
};
|
||||
|
||||
struct CLEAR_DESC
|
||||
{
|
||||
CLEAR_FLAGS flags;
|
||||
float clearRTColor[4]; // RGBA_32F
|
||||
float clearDepth; // [0..1]
|
||||
BYTE clearStencil;
|
||||
};
|
||||
|
||||
struct INVALIDATE_TILES_DESC
|
||||
{
|
||||
uint32_t attachmentMask;
|
||||
};
|
||||
|
||||
struct SYNC_DESC
|
||||
{
|
||||
PFN_CALLBACK_FUNC pfnCallbackFunc;
|
||||
uint64_t userData;
|
||||
uint64_t userData2;
|
||||
uint64_t userData3;
|
||||
};
|
||||
|
||||
struct QUERY_DESC
|
||||
{
|
||||
SWR_STATS* pStats;
|
||||
};
|
||||
|
||||
struct STORE_TILES_DESC
|
||||
{
|
||||
SWR_RENDERTARGET_ATTACHMENT attachment;
|
||||
SWR_TILE_STATE postStoreTileState;
|
||||
};
|
||||
|
||||
struct COMPUTE_DESC
|
||||
{
|
||||
uint32_t threadGroupCountX;
|
||||
uint32_t threadGroupCountY;
|
||||
uint32_t threadGroupCountZ;
|
||||
};
|
||||
|
||||
typedef void(*PFN_WORK_FUNC)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc);
|
||||
|
||||
enum WORK_TYPE
|
||||
{
|
||||
SYNC,
|
||||
DRAW,
|
||||
CLEAR,
|
||||
INVALIDATETILES,
|
||||
STORETILES,
|
||||
QUERYSTATS,
|
||||
};
|
||||
|
||||
struct BE_WORK
|
||||
{
|
||||
WORK_TYPE type;
|
||||
PFN_WORK_FUNC pfnWork;
|
||||
union
|
||||
{
|
||||
SYNC_DESC sync;
|
||||
TRIANGLE_WORK_DESC tri;
|
||||
CLEAR_DESC clear;
|
||||
INVALIDATE_TILES_DESC invalidateTiles;
|
||||
STORE_TILES_DESC storeTiles;
|
||||
QUERY_DESC queryStats;
|
||||
} desc;
|
||||
};
|
||||
|
||||
struct DRAW_WORK
|
||||
{
|
||||
DRAW_CONTEXT* pDC;
|
||||
union
|
||||
{
|
||||
uint32_t numIndices; // DrawIndexed: Number of indices for draw.
|
||||
uint32_t numVerts; // Draw: Number of verts (triangles, lines, etc)
|
||||
};
|
||||
union
|
||||
{
|
||||
const int32_t* pIB; // DrawIndexed: App supplied indices
|
||||
uint32_t startVertex; // Draw: Starting vertex in VB to render from.
|
||||
};
|
||||
int32_t baseVertex;
|
||||
uint32_t numInstances; // Number of instances
|
||||
uint32_t startInstance; // Instance offset
|
||||
uint32_t startPrimID; // starting primitiveID for this draw batch
|
||||
uint32_t startVertexID; // starting VertexID for this draw batch (only needed for non-indexed draws)
|
||||
SWR_FORMAT type; // index buffer type
|
||||
};
|
||||
|
||||
typedef void(*PFN_FE_WORK_FUNC)(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pDesc);
|
||||
struct FE_WORK
|
||||
{
|
||||
WORK_TYPE type;
|
||||
PFN_FE_WORK_FUNC pfnWork;
|
||||
union
|
||||
{
|
||||
SYNC_DESC sync;
|
||||
DRAW_WORK draw;
|
||||
CLEAR_DESC clear;
|
||||
INVALIDATE_TILES_DESC invalidateTiles;
|
||||
STORE_TILES_DESC storeTiles;
|
||||
QUERY_DESC queryStats;
|
||||
} desc;
|
||||
};
|
||||
|
||||
struct GUARDBAND
|
||||
{
|
||||
float left, right, top, bottom;
|
||||
};
|
||||
|
||||
struct PA_STATE;
|
||||
|
||||
// function signature for pipeline stages that execute after primitive assembly
|
||||
typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
|
||||
uint32_t primMask, simdscalari primID);
|
||||
|
||||
OSALIGNLINE(struct) API_STATE
|
||||
{
|
||||
// Vertex Buffers
|
||||
SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS];
|
||||
|
||||
// Index Buffer
|
||||
SWR_INDEX_BUFFER_STATE indexBuffer;
|
||||
|
||||
// FS - Fetch Shader State
|
||||
PFN_FETCH_FUNC pfnFetchFunc;
|
||||
|
||||
// VS - Vertex Shader State
|
||||
PFN_VERTEX_FUNC pfnVertexFunc;
|
||||
|
||||
// GS - Geometry Shader State
|
||||
PFN_GS_FUNC pfnGsFunc;
|
||||
SWR_GS_STATE gsState;
|
||||
|
||||
// CS - Compute Shader
|
||||
PFN_CS_FUNC pfnCsFunc;
|
||||
uint32_t totalThreadsInGroup;
|
||||
|
||||
// FE - Frontend State
|
||||
SWR_FRONTEND_STATE frontendState;
|
||||
|
||||
// SOS - Streamout Shader State
|
||||
PFN_SO_FUNC pfnSoFunc[MAX_SO_STREAMS];
|
||||
|
||||
// Streamout state
|
||||
SWR_STREAMOUT_STATE soState;
|
||||
mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS];
|
||||
|
||||
// Tessellation State
|
||||
PFN_HS_FUNC pfnHsFunc;
|
||||
PFN_DS_FUNC pfnDsFunc;
|
||||
SWR_TS_STATE tsState;
|
||||
|
||||
// Specifies which VS outputs are sent to PS.
|
||||
// Does not include position
|
||||
uint32_t linkageMask;
|
||||
uint32_t linkageCount;
|
||||
uint8_t linkageMap[MAX_ATTRIBUTES];
|
||||
|
||||
// attrib mask, specifies the total set of attributes used
|
||||
// by the frontend (vs, so, gs)
|
||||
uint32_t feAttribMask;
|
||||
|
||||
PRIMITIVE_TOPOLOGY topology;
|
||||
bool forceFront;
|
||||
|
||||
// RS - Rasterizer State
|
||||
SWR_RASTSTATE rastState;
|
||||
// floating point multisample offsets
|
||||
float samplePos[SWR_MAX_NUM_MULTISAMPLES * 2];
|
||||
|
||||
GUARDBAND gbState;
|
||||
|
||||
SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
|
||||
SWR_VIEWPORT_MATRIX vpMatrix[KNOB_NUM_VIEWPORTS_SCISSORS];
|
||||
|
||||
BBOX scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
|
||||
BBOX scissorInFixedPoint;
|
||||
|
||||
// Backend state
|
||||
SWR_BACKEND_STATE backendState;
|
||||
|
||||
// PS - Pixel shader state
|
||||
SWR_PS_STATE psState;
|
||||
|
||||
SWR_DEPTH_STENCIL_STATE depthStencilState;
|
||||
|
||||
// OM - Output Merger State
|
||||
SWR_BLEND_STATE blendState;
|
||||
PFN_BLEND_JIT_FUNC pfnBlendFunc[SWR_NUM_RENDERTARGETS];
|
||||
|
||||
// Stats are incremented when this is true.
|
||||
bool enableStats;
|
||||
|
||||
struct
|
||||
{
|
||||
uint32_t colorHottileEnable : 8;
|
||||
uint32_t depthHottileEnable: 1;
|
||||
uint32_t stencilHottileEnable : 1;
|
||||
};
|
||||
};
|
||||
|
||||
class MacroTileMgr;
|
||||
class DispatchQueue;
|
||||
|
||||
struct RenderOutputBuffers
|
||||
{
|
||||
uint8_t* pColor[SWR_NUM_RENDERTARGETS];
|
||||
uint8_t* pDepth;
|
||||
uint8_t* pStencil;
|
||||
};
|
||||
|
||||
// Plane equation A/B/C coeffs used to evaluate I/J barycentric coords
|
||||
struct BarycentricCoeffs
|
||||
{
|
||||
simdscalar vIa;
|
||||
simdscalar vIb;
|
||||
simdscalar vIc;
|
||||
|
||||
simdscalar vJa;
|
||||
simdscalar vJb;
|
||||
simdscalar vJc;
|
||||
|
||||
simdscalar vZa;
|
||||
simdscalar vZb;
|
||||
simdscalar vZc;
|
||||
|
||||
simdscalar vRecipDet;
|
||||
|
||||
simdscalar vAOneOverW;
|
||||
simdscalar vBOneOverW;
|
||||
simdscalar vCOneOverW;
|
||||
};
|
||||
|
||||
// pipeline function pointer types
|
||||
typedef void(*PFN_BACKEND_FUNC)(DRAW_CONTEXT*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC&, RenderOutputBuffers&);
|
||||
typedef void(*PFN_OUTPUT_MERGER)(SWR_PS_CONTEXT &, uint8_t* (&)[SWR_NUM_RENDERTARGETS], uint32_t, const SWR_BLEND_STATE*,
|
||||
const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS], simdscalar&, simdscalar);
|
||||
typedef void(*PFN_CALC_PIXEL_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &);
|
||||
typedef void(*PFN_CALC_SAMPLE_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
|
||||
typedef void(*PFN_CALC_CENTROID_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &, const uint64_t *const, const uint32_t,
|
||||
const simdscalar, const simdscalar);
|
||||
|
||||
struct BACKEND_FUNCS
|
||||
{
|
||||
PFN_BACKEND_FUNC pfnBackend;
|
||||
PFN_CALC_PIXEL_BARYCENTRICS pfnCalcPixelBarycentrics;
|
||||
PFN_CALC_SAMPLE_BARYCENTRICS pfnCalcSampleBarycentrics;
|
||||
PFN_CALC_CENTROID_BARYCENTRICS pfnCalcCentroidBarycentrics;
|
||||
PFN_OUTPUT_MERGER pfnOutputMerger;
|
||||
};
|
||||
|
||||
// Draw State
|
||||
struct DRAW_STATE
|
||||
{
|
||||
API_STATE state;
|
||||
|
||||
void* pPrivateState; // Its required the driver sets this up for each draw.
|
||||
|
||||
// pipeline function pointers, filled in by API thread when setting up the draw
|
||||
BACKEND_FUNCS backendFuncs;
|
||||
PFN_PROCESS_PRIMS pfnProcessPrims;
|
||||
|
||||
Arena* pArena; // This should only be used by API thread.
|
||||
};
|
||||
|
||||
// Draw Context
|
||||
// The api thread sets up a draw context that exists for the life of the draw.
|
||||
// This draw context maintains all of the state needed for the draw operation.
|
||||
struct DRAW_CONTEXT
|
||||
{
|
||||
SWR_CONTEXT *pContext;
|
||||
|
||||
uint64_t drawId;
|
||||
|
||||
bool isCompute; // Is this DC a compute context?
|
||||
|
||||
FE_WORK FeWork;
|
||||
volatile OSALIGNLINE(uint32_t) FeLock;
|
||||
volatile OSALIGNLINE(bool) inUse;
|
||||
volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw?
|
||||
|
||||
// Have all worker threads moved past draw in DC ring?
|
||||
volatile OSALIGNLINE(uint32_t) threadsDoneFE;
|
||||
volatile OSALIGNLINE(uint32_t) threadsDoneBE;
|
||||
|
||||
uint64_t dependency;
|
||||
|
||||
MacroTileMgr* pTileMgr;
|
||||
|
||||
// The following fields are valid if isCompute is true.
|
||||
volatile OSALIGNLINE(bool) doneCompute; // Is this dispatch done? (isCompute)
|
||||
DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
|
||||
|
||||
DRAW_STATE* pState;
|
||||
Arena* pArena;
|
||||
|
||||
uint8_t* pSpillFill[KNOB_MAX_NUM_THREADS]; // Scratch space used for spill fills.
|
||||
};
|
||||
|
||||
INLINE const API_STATE& GetApiState(const DRAW_CONTEXT* pDC)
|
||||
{
|
||||
SWR_ASSERT(pDC != nullptr);
|
||||
SWR_ASSERT(pDC->pState != nullptr);
|
||||
|
||||
return pDC->pState->state;
|
||||
}
|
||||
|
||||
INLINE void* GetPrivateState(const DRAW_CONTEXT* pDC)
|
||||
{
|
||||
SWR_ASSERT(pDC != nullptr);
|
||||
SWR_ASSERT(pDC->pState != nullptr);
|
||||
|
||||
return pDC->pState->pPrivateState;
|
||||
}
|
||||
|
||||
class HotTileMgr;
|
||||
|
||||
struct SWR_CONTEXT
|
||||
{
|
||||
// Draw Context Ring
|
||||
// Each draw needs its own state in order to support mulitple draws in flight across multiple threads.
|
||||
// We maintain N draw contexts configured as a ring. The size of the ring limits the maximum number
|
||||
// of draws that can be in flight at any given time.
|
||||
//
|
||||
// Description:
|
||||
// 1. State - When an application first sets state we'll request a new draw context to use.
|
||||
// a. If there are no available draw contexts then we'll have to wait until one becomes free.
|
||||
// b. If one is available then set pCurDrawContext to point to it and mark it in use.
|
||||
// c. All state calls set state on pCurDrawContext.
|
||||
// 2. Draw - Creates submits a work item that is associated with current draw context.
|
||||
// a. Set pPrevDrawContext = pCurDrawContext
|
||||
// b. Set pCurDrawContext to NULL.
|
||||
// 3. State - When an applications sets state after draw
|
||||
// a. Same as step 1.
|
||||
// b. State is copied from prev draw context to current.
|
||||
DRAW_CONTEXT* dcRing;
|
||||
|
||||
DRAW_CONTEXT *pCurDrawContext; // This points to DC entry in ring for an unsubmitted draw.
|
||||
DRAW_CONTEXT *pPrevDrawContext; // This points to DC entry for the previous context submitted that we can copy state from.
|
||||
|
||||
// Draw State Ring
|
||||
// When draw are very large (lots of primitives) then the API thread will break these up.
|
||||
// These split draws all have identical state. So instead of storing the state directly
|
||||
// in the Draw Context (DC) we instead store it in a Draw State (DS). This allows multiple DCs
|
||||
// to reference a single entry in the DS ring.
|
||||
DRAW_STATE* dsRing;
|
||||
|
||||
uint32_t curStateId; // Current index to the next available entry in the DS ring.
|
||||
|
||||
DRAW_STATE* subCtxSave; // Save area for inactive contexts.
|
||||
uint32_t curSubCtxId; // Current index for active state subcontext.
|
||||
uint32_t numSubContexts; // Number of available subcontexts
|
||||
|
||||
uint32_t NumWorkerThreads;
|
||||
|
||||
THREAD_POOL threadPool; // Thread pool associated with this context
|
||||
|
||||
std::condition_variable FifosNotEmpty;
|
||||
std::mutex WaitLock;
|
||||
|
||||
// Draw Contexts will get a unique drawId generated from this
|
||||
uint64_t nextDrawId;
|
||||
|
||||
// most recent draw id enqueued by the API thread
|
||||
// written by api thread, read by multiple workers
|
||||
OSALIGNLINE(volatile uint64_t) DrawEnqueued;
|
||||
|
||||
DRIVER_TYPE driverType;
|
||||
|
||||
uint32_t privateStateSize;
|
||||
|
||||
HotTileMgr *pHotTileMgr;
|
||||
|
||||
// tile load/store functions, passed in at create context time
|
||||
PFN_LOAD_TILE pfnLoadTile;
|
||||
PFN_STORE_TILE pfnStoreTile;
|
||||
PFN_CLEAR_TILE pfnClearTile;
|
||||
|
||||
// Global Stats
|
||||
SWR_STATS stats[KNOB_MAX_NUM_THREADS];
|
||||
|
||||
// Scratch space for workers.
|
||||
uint8_t* pScratch[KNOB_MAX_NUM_THREADS];
|
||||
};
|
||||
|
||||
void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
|
||||
void WakeAllThreads(SWR_CONTEXT *pContext);
|
||||
|
||||
#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name += count; }
|
||||
#define SET_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name = count; }
|
|
@ -0,0 +1,245 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file depthstencil.h
|
||||
*
|
||||
* @brief Implements depth/stencil functionality
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
#include "common/os.h"
|
||||
#include "format_conversion.h"
|
||||
|
||||
INLINE
|
||||
void StencilOp(SWR_STENCILOP op, simdscalar mask, simdscalar stencilRefps, simdscalar &stencilps)
|
||||
{
|
||||
simdscalari stencil = _simd_castps_si(stencilps);
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case STENCILOP_KEEP:
|
||||
break;
|
||||
case STENCILOP_ZERO:
|
||||
stencilps = _simd_blendv_ps(stencilps, _simd_setzero_ps(), mask);
|
||||
break;
|
||||
case STENCILOP_REPLACE:
|
||||
stencilps = _simd_blendv_ps(stencilps, stencilRefps, mask);
|
||||
break;
|
||||
case STENCILOP_INCRSAT:
|
||||
{
|
||||
simdscalari stencilincr = _simd_adds_epu8(stencil, _simd_set1_epi32(1));
|
||||
stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
|
||||
break;
|
||||
}
|
||||
case STENCILOP_DECRSAT:
|
||||
{
|
||||
simdscalari stencildecr = _simd_subs_epu8(stencil, _simd_set1_epi32(1));
|
||||
stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
|
||||
break;
|
||||
}
|
||||
case STENCILOP_INCR:
|
||||
{
|
||||
simdscalari stencilincr = _simd_add_epi8(stencil, _simd_set1_epi32(1));
|
||||
stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
|
||||
break;
|
||||
}
|
||||
case STENCILOP_DECR:
|
||||
{
|
||||
simdscalari stencildecr = _simd_add_epi8(stencil, _simd_set1_epi32((-1) & 0xff));
|
||||
stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
|
||||
break;
|
||||
}
|
||||
case STENCILOP_INVERT:
|
||||
{
|
||||
simdscalar stencilinvert = _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
|
||||
stencilps = _simd_blendv_ps(stencilps, stencilinvert, mask);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
INLINE
|
||||
simdscalar DepthStencilTest(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
|
||||
bool frontFacing, simdscalar interpZ, BYTE* pDepthBase, simdscalar coverageMask, BYTE *pStencilBase,
|
||||
simdscalar* pStencilMask)
|
||||
{
|
||||
static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
|
||||
static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
|
||||
|
||||
simdscalar depthResult = _simd_set1_ps(-1.0f);
|
||||
simdscalar zbuf;
|
||||
|
||||
// clamp Z to viewport [minZ..maxZ]
|
||||
simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
|
||||
simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
|
||||
interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, interpZ));
|
||||
|
||||
if (pDSState->depthTestEnable)
|
||||
{
|
||||
switch (pDSState->depthTestFunc)
|
||||
{
|
||||
case ZFUNC_NEVER: depthResult = _simd_setzero_ps(); break;
|
||||
case ZFUNC_ALWAYS: break;
|
||||
default:
|
||||
zbuf = _simd_load_ps((const float*)pDepthBase);
|
||||
}
|
||||
|
||||
switch (pDSState->depthTestFunc)
|
||||
{
|
||||
case ZFUNC_LE: depthResult = _simd_cmple_ps(interpZ, zbuf); break;
|
||||
case ZFUNC_LT: depthResult = _simd_cmplt_ps(interpZ, zbuf); break;
|
||||
case ZFUNC_GT: depthResult = _simd_cmpgt_ps(interpZ, zbuf); break;
|
||||
case ZFUNC_GE: depthResult = _simd_cmpge_ps(interpZ, zbuf); break;
|
||||
case ZFUNC_EQ: depthResult = _simd_cmpeq_ps(interpZ, zbuf); break;
|
||||
case ZFUNC_NE: depthResult = _simd_cmpneq_ps(interpZ, zbuf); break;
|
||||
}
|
||||
}
|
||||
|
||||
simdscalar stencilMask = _simd_set1_ps(-1.0f);
|
||||
|
||||
if (pDSState->stencilTestEnable)
|
||||
{
|
||||
uint8_t stencilRefValue;
|
||||
uint32_t stencilTestFunc;
|
||||
uint8_t stencilTestMask;
|
||||
if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
|
||||
{
|
||||
stencilRefValue = pDSState->stencilRefValue;
|
||||
stencilTestFunc = pDSState->stencilTestFunc;
|
||||
stencilTestMask = pDSState->stencilTestMask;
|
||||
}
|
||||
else
|
||||
{
|
||||
stencilRefValue = pDSState->backfaceStencilRefValue;
|
||||
stencilTestFunc = pDSState->backfaceStencilTestFunc;
|
||||
stencilTestMask = pDSState->backfaceStencilTestMask;
|
||||
}
|
||||
|
||||
simdvector sbuf;
|
||||
simdscalar stencilWithMask;
|
||||
simdscalar stencilRef;
|
||||
switch(stencilTestFunc)
|
||||
{
|
||||
case ZFUNC_NEVER: stencilMask = _simd_setzero_ps(); break;
|
||||
case ZFUNC_ALWAYS: break;
|
||||
default:
|
||||
LoadSOA<R8_UINT>(pStencilBase, sbuf);
|
||||
|
||||
// apply stencil read mask
|
||||
stencilWithMask = _simd_castsi_ps(_simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask)));
|
||||
|
||||
// do stencil compare in float to avoid simd integer emulation in AVX1
|
||||
stencilWithMask = _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask));
|
||||
|
||||
stencilRef = _simd_set1_ps((float)(stencilRefValue & stencilTestMask));
|
||||
break;
|
||||
}
|
||||
|
||||
switch(stencilTestFunc)
|
||||
{
|
||||
case ZFUNC_LE: stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask); break;
|
||||
case ZFUNC_LT: stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask); break;
|
||||
case ZFUNC_GT: stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask); break;
|
||||
case ZFUNC_GE: stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask); break;
|
||||
case ZFUNC_EQ: stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask); break;
|
||||
case ZFUNC_NE: stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask); break;
|
||||
}
|
||||
}
|
||||
|
||||
simdscalar depthWriteMask = _simd_and_ps(depthResult, stencilMask);
|
||||
depthWriteMask = _simd_and_ps(depthWriteMask, coverageMask);
|
||||
|
||||
*pStencilMask = stencilMask;
|
||||
return depthWriteMask;
|
||||
}
|
||||
|
||||
INLINE
|
||||
void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
|
||||
bool frontFacing, simdscalar interpZ, BYTE* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask,
|
||||
BYTE *pStencilBase, const simdscalar& stencilMask)
|
||||
{
|
||||
if (pDSState->depthWriteEnable)
|
||||
{
|
||||
// clamp Z to viewport [minZ..maxZ]
|
||||
simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
|
||||
simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
|
||||
interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, interpZ));
|
||||
|
||||
simdscalar vMask = _simd_and_ps(depthMask, coverageMask);
|
||||
_simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ);
|
||||
}
|
||||
|
||||
if (pDSState->stencilWriteEnable)
|
||||
{
|
||||
simdvector sbuf;
|
||||
LoadSOA<R8_UINT>(pStencilBase, sbuf);
|
||||
simdscalar stencilbuf = sbuf.v[0];
|
||||
|
||||
uint8_t stencilRefValue;
|
||||
uint32_t stencilFailOp;
|
||||
uint32_t stencilPassDepthPassOp;
|
||||
uint32_t stencilPassDepthFailOp;
|
||||
uint8_t stencilWriteMask;
|
||||
if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
|
||||
{
|
||||
stencilRefValue = pDSState->stencilRefValue;
|
||||
stencilFailOp = pDSState->stencilFailOp;
|
||||
stencilPassDepthPassOp = pDSState->stencilPassDepthPassOp;
|
||||
stencilPassDepthFailOp = pDSState->stencilPassDepthFailOp;
|
||||
stencilWriteMask = pDSState->stencilWriteMask;
|
||||
}
|
||||
else
|
||||
{
|
||||
stencilRefValue = pDSState->backfaceStencilRefValue;
|
||||
stencilFailOp = pDSState->backfaceStencilFailOp;
|
||||
stencilPassDepthPassOp = pDSState->backfaceStencilPassDepthPassOp;
|
||||
stencilPassDepthFailOp = pDSState->backfaceStencilPassDepthFailOp;
|
||||
stencilWriteMask = pDSState->backfaceStencilWriteMask;
|
||||
}
|
||||
|
||||
simdscalar stencilps = stencilbuf;
|
||||
simdscalar stencilRefps = _simd_castsi_ps(_simd_set1_epi32(stencilRefValue));
|
||||
|
||||
simdscalar stencilFailMask = _simd_andnot_ps(stencilMask, coverageMask);
|
||||
simdscalar stencilPassDepthPassMask = _simd_and_ps(stencilMask, depthMask);
|
||||
simdscalar stencilPassDepthFailMask = _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1)));
|
||||
|
||||
simdscalar origStencil = stencilps;
|
||||
|
||||
StencilOp((SWR_STENCILOP)stencilFailOp, stencilFailMask, stencilRefps, stencilps);
|
||||
StencilOp((SWR_STENCILOP)stencilPassDepthFailOp, stencilPassDepthFailMask, stencilRefps, stencilps);
|
||||
StencilOp((SWR_STENCILOP)stencilPassDepthPassOp, stencilPassDepthPassMask, stencilRefps, stencilps);
|
||||
|
||||
// apply stencil write mask
|
||||
simdscalari vWriteMask = _simd_set1_epi32(stencilWriteMask);
|
||||
stencilps = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask));
|
||||
stencilps = _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps);
|
||||
|
||||
simdvector stencilResult;
|
||||
stencilResult.v[0] = _simd_blendv_ps(origStencil, stencilps, coverageMask);
|
||||
StoreSOA<R8_UINT>(stencilResult, pStencilBase);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file fifo.hpp
|
||||
*
|
||||
* @brief Definitions for our fifos used for thread communication.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
|
||||
#include "common/os.h"
|
||||
#include "arena.h"
|
||||
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
|
||||
template<class T>
|
||||
struct QUEUE
|
||||
{
|
||||
OSALIGNLINE(volatile uint32_t) mLock{ 0 };
|
||||
OSALIGNLINE(volatile uint32_t) mNumEntries{ 0 };
|
||||
std::vector<T*> mBlocks;
|
||||
T* mCurBlock{ nullptr };
|
||||
uint32_t mHead{ 0 };
|
||||
uint32_t mTail{ 0 };
|
||||
uint32_t mCurBlockIdx{ 0 };
|
||||
|
||||
// power of 2
|
||||
static const uint32_t mBlockSizeShift = 6;
|
||||
static const uint32_t mBlockSize = 1 << mBlockSizeShift;
|
||||
|
||||
void clear(Arena& arena)
|
||||
{
|
||||
mHead = 0;
|
||||
mTail = 0;
|
||||
mBlocks.clear();
|
||||
T* pNewBlock = (T*)arena.Alloc(sizeof(T)*mBlockSize);
|
||||
mBlocks.push_back(pNewBlock);
|
||||
mCurBlock = pNewBlock;
|
||||
mCurBlockIdx = 0;
|
||||
|
||||
mNumEntries = 0;
|
||||
_ReadWriteBarrier();
|
||||
mLock = 0;
|
||||
}
|
||||
|
||||
uint32_t getNumQueued()
|
||||
{
|
||||
return mNumEntries;
|
||||
}
|
||||
|
||||
bool tryLock()
|
||||
{
|
||||
if (mLock)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// try to lock the FIFO
|
||||
LONG initial = InterlockedCompareExchange(&mLock, 1, 0);
|
||||
return (initial == 0);
|
||||
}
|
||||
|
||||
void unlock()
|
||||
{
|
||||
mLock = 0;
|
||||
}
|
||||
|
||||
T* peek()
|
||||
{
|
||||
if (mNumEntries == 0)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
uint32_t block = mHead >> mBlockSizeShift;
|
||||
return &mBlocks[block][mHead & (mBlockSize-1)];
|
||||
}
|
||||
|
||||
void dequeue_noinc()
|
||||
{
|
||||
mHead ++;
|
||||
mNumEntries --;
|
||||
}
|
||||
|
||||
bool enqueue_try_nosync(Arena& arena, const T* entry)
|
||||
{
|
||||
memcpy(&mCurBlock[mTail], entry, sizeof(T));
|
||||
|
||||
mTail ++;
|
||||
if (mTail == mBlockSize)
|
||||
{
|
||||
if (++mCurBlockIdx < mBlocks.size())
|
||||
{
|
||||
mCurBlock = mBlocks[mCurBlockIdx];
|
||||
}
|
||||
else
|
||||
{
|
||||
T* newBlock = (T*)arena.Alloc(sizeof(T)*mBlockSize);
|
||||
SWR_ASSERT(newBlock);
|
||||
|
||||
mBlocks.push_back(newBlock);
|
||||
mCurBlock = newBlock;
|
||||
}
|
||||
|
||||
mTail = 0;
|
||||
}
|
||||
|
||||
mNumEntries ++;
|
||||
return true;
|
||||
}
|
||||
|
||||
void destroy()
|
||||
{
|
||||
}
|
||||
|
||||
};
|
|
@ -0,0 +1,196 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file format_conversion.h
|
||||
*
|
||||
* @brief API implementation
|
||||
*
|
||||
******************************************************************************/
|
||||
#include "format_types.h"
|
||||
#include "format_traits.h"
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Load SIMD packed pixels in SOA format and converts to
|
||||
/// SOA RGBA32_FLOAT format.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param dst - output data in SOA form
|
||||
template<SWR_FORMAT SrcFormat>
|
||||
INLINE void LoadSOA(const BYTE *pSrc, simdvector &dst)
|
||||
{
|
||||
// fast path for float32
|
||||
if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32))
|
||||
{
|
||||
auto lambda = [&](int comp)
|
||||
{
|
||||
simdscalar vComp = _simd_load_ps((const float*)(pSrc + comp*sizeof(simdscalar)));
|
||||
|
||||
dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
|
||||
};
|
||||
|
||||
UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
|
||||
return;
|
||||
}
|
||||
|
||||
auto lambda = [&](int comp)
|
||||
{
|
||||
// load SIMD components
|
||||
simdscalar vComp = FormatTraits<SrcFormat>::loadSOA(comp, pSrc);
|
||||
|
||||
// unpack
|
||||
vComp = FormatTraits<SrcFormat>::unpack(comp, vComp);
|
||||
|
||||
// convert
|
||||
if (FormatTraits<SrcFormat>::isNormalized(comp))
|
||||
{
|
||||
vComp = _simd_cvtepi32_ps(_simd_castps_si(vComp));
|
||||
vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<SrcFormat>::toFloat(comp)));
|
||||
}
|
||||
|
||||
dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
|
||||
|
||||
pSrc += (FormatTraits<SrcFormat>::GetBPC(comp) * KNOB_SIMD_WIDTH) / 8;
|
||||
};
|
||||
|
||||
UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Clamps the given component based on the requirements on the
|
||||
/// Format template arg
|
||||
/// @param vComp - SIMD vector of floats
|
||||
/// @param Component - component
|
||||
template<SWR_FORMAT Format>
|
||||
INLINE simdscalar Clamp(simdscalar vComp, uint32_t Component)
|
||||
{
|
||||
if (FormatTraits<Format>::isNormalized(Component))
|
||||
{
|
||||
if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM)
|
||||
{
|
||||
vComp = _simd_max_ps(vComp, _simd_setzero_ps());
|
||||
}
|
||||
|
||||
if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SNORM)
|
||||
{
|
||||
vComp = _simd_max_ps(vComp, _simd_set1_ps(-1.0f));
|
||||
}
|
||||
vComp = _simd_min_ps(vComp, _simd_set1_ps(1.0f));
|
||||
}
|
||||
else if (FormatTraits<Format>::GetBPC(Component) < 32)
|
||||
{
|
||||
if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT)
|
||||
{
|
||||
int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
|
||||
int iMin = 0;
|
||||
simdscalari vCompi = _simd_castps_si(vComp);
|
||||
vCompi = _simd_max_epu32(vCompi, _simd_set1_epi32(iMin));
|
||||
vCompi = _simd_min_epu32(vCompi, _simd_set1_epi32(iMax));
|
||||
vComp = _simd_castsi_ps(vCompi);
|
||||
}
|
||||
else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT)
|
||||
{
|
||||
int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
|
||||
int iMin = -1 - iMax;
|
||||
simdscalari vCompi = _simd_castps_si(vComp);
|
||||
vCompi = _simd_max_epi32(vCompi, _simd_set1_epi32(iMin));
|
||||
vCompi = _simd_min_epi32(vCompi, _simd_set1_epi32(iMax));
|
||||
vComp = _simd_castsi_ps(vCompi);
|
||||
}
|
||||
}
|
||||
|
||||
return vComp;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Normalize the given component based on the requirements on the
|
||||
/// Format template arg
|
||||
/// @param vComp - SIMD vector of floats
|
||||
/// @param Component - component
|
||||
template<SWR_FORMAT Format>
|
||||
INLINE simdscalar Normalize(simdscalar vComp, uint32_t Component)
|
||||
{
|
||||
if (FormatTraits<Format>::isNormalized(Component))
|
||||
{
|
||||
vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<Format>::fromFloat(Component)));
|
||||
vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp));
|
||||
}
|
||||
return vComp;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Convert and store simdvector of pixels in SOA
|
||||
/// RGBA32_FLOAT to SOA format
|
||||
/// @param src - source data in SOA form
|
||||
/// @param dst - output data in SOA form
|
||||
template<SWR_FORMAT DstFormat>
|
||||
INLINE void StoreSOA(const simdvector &src, BYTE *pDst)
|
||||
{
|
||||
// fast path for float32
|
||||
if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32))
|
||||
{
|
||||
for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
|
||||
{
|
||||
simdscalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
|
||||
|
||||
// Gamma-correct
|
||||
if (FormatTraits<DstFormat>::isSRGB)
|
||||
{
|
||||
if (comp < 3) // Input format is always RGBA32_FLOAT.
|
||||
{
|
||||
vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
|
||||
}
|
||||
}
|
||||
|
||||
_simd_store_ps((float*)(pDst + comp*sizeof(simdscalar)), vComp);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
auto lambda = [&](int comp)
|
||||
{
|
||||
simdscalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
|
||||
|
||||
// Gamma-correct
|
||||
if (FormatTraits<DstFormat>::isSRGB)
|
||||
{
|
||||
if (comp < 3) // Input format is always RGBA32_FLOAT.
|
||||
{
|
||||
vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
|
||||
}
|
||||
}
|
||||
|
||||
// clamp
|
||||
vComp = Clamp<DstFormat>(vComp, comp);
|
||||
|
||||
// normalize
|
||||
vComp = Normalize<DstFormat>(vComp, comp);
|
||||
|
||||
// pack
|
||||
vComp = FormatTraits<DstFormat>::pack(comp, vComp);
|
||||
|
||||
// store
|
||||
FormatTraits<DstFormat>::storeSOA(comp, pDst, vComp);
|
||||
|
||||
pDst += (FormatTraits<DstFormat>::GetBPC(comp) * KNOB_SIMD_WIDTH) / 8;
|
||||
};
|
||||
|
||||
UnrollerL<0, FormatTraits<DstFormat>::numComps, 1>::step(lambda);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,327 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file frontend.h
|
||||
*
|
||||
* @brief Definitions for Frontend which handles vertex processing,
|
||||
* primitive assembly, clipping, binning, etc.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
#include "context.h"
|
||||
|
||||
INLINE
|
||||
__m128i fpToFixedPoint(const __m128 vIn)
|
||||
{
|
||||
__m128 vFixed = _mm_mul_ps(vIn, _mm_set1_ps(FIXED_POINT_SCALE));
|
||||
return _mm_cvtps_epi32(vFixed);
|
||||
}
|
||||
|
||||
INLINE
|
||||
simdscalari fpToFixedPointVertical(const simdscalar vIn)
|
||||
{
|
||||
simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(FIXED_POINT_SCALE));
|
||||
return _simd_cvtps_epi32(vFixed);
|
||||
}
|
||||
|
||||
|
||||
// Calculates the A and B coefficients for the 3 edges of the triangle
|
||||
//
|
||||
// maths for edge equations:
|
||||
// standard form of a line in 2d
|
||||
// Ax + By + C = 0
|
||||
// A = y0 - y1
|
||||
// B = x1 - x0
|
||||
// C = x0y1 - x1y0
|
||||
INLINE
|
||||
void triangleSetupAB(const __m128 vX, const __m128 vY, __m128 & vA, __m128 & vB)
|
||||
{
|
||||
// vYsub = y1 y2 y0 dc
|
||||
__m128 vYsub = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(3, 0, 2, 1));
|
||||
// vY = y0 y1 y2 dc
|
||||
vA = _mm_sub_ps(vY, vYsub);
|
||||
|
||||
// Result:
|
||||
// A[0] = y0 - y1
|
||||
// A[1] = y1 - y2
|
||||
// A[2] = y2 - y0
|
||||
|
||||
// vXsub = x1 x2 x0 dc
|
||||
__m128 vXsub = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(3, 0, 2, 1));
|
||||
// vX = x0 x1 x2 dc
|
||||
vB = _mm_sub_ps(vXsub, vX);
|
||||
|
||||
// Result:
|
||||
// B[0] = x1 - x0
|
||||
// B[1] = x2 - x1
|
||||
// B[2] = x0 - x2
|
||||
}
|
||||
|
||||
INLINE
|
||||
void triangleSetupABVertical(const simdscalar vX[3], const simdscalar vY[3], simdscalar (&vA)[3], simdscalar (&vB)[3])
|
||||
{
|
||||
// generate edge equations
|
||||
// A = y0 - y1
|
||||
// B = x1 - x0
|
||||
vA[0] = _simd_sub_ps(vY[0], vY[1]);
|
||||
vA[1] = _simd_sub_ps(vY[1], vY[2]);
|
||||
vA[2] = _simd_sub_ps(vY[2], vY[0]);
|
||||
|
||||
vB[0] = _simd_sub_ps(vX[1], vX[0]);
|
||||
vB[1] = _simd_sub_ps(vX[2], vX[1]);
|
||||
vB[2] = _simd_sub_ps(vX[0], vX[2]);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void triangleSetupABInt(const __m128i vX, const __m128i vY, __m128i & vA, __m128i & vB)
|
||||
{
|
||||
// generate edge equations
|
||||
// A = y0 - y1
|
||||
// B = x1 - x0
|
||||
// C = x0y1 - x1y0
|
||||
__m128i vYsub = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 2, 1));
|
||||
vA = _mm_sub_epi32(vY, vYsub);
|
||||
|
||||
__m128i vXsub = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 0, 2, 1));
|
||||
vB = _mm_sub_epi32(vXsub, vX);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void triangleSetupABIntVertical(const simdscalari vX[3], const simdscalari vY[3], simdscalari (&vA)[3], simdscalari (&vB)[3])
|
||||
{
|
||||
// A = y0 - y1
|
||||
// B = x1 - x0
|
||||
vA[0] = _simd_sub_epi32(vY[0], vY[1]);
|
||||
vA[1] = _simd_sub_epi32(vY[1], vY[2]);
|
||||
vA[2] = _simd_sub_epi32(vY[2], vY[0]);
|
||||
|
||||
vB[0] = _simd_sub_epi32(vX[1], vX[0]);
|
||||
vB[1] = _simd_sub_epi32(vX[2], vX[1]);
|
||||
vB[2] = _simd_sub_epi32(vX[0], vX[2]);
|
||||
}
|
||||
// Calculate the determinant of the triangle
|
||||
// 2 vectors between the 3 points: P, Q
|
||||
// Px = x0-x2, Py = y0-y2
|
||||
// Qx = x1-x2, Qy = y1-y2
|
||||
// |Px Qx|
|
||||
// det = | | = PxQy - PyQx
|
||||
// |Py Qy|
|
||||
// simplifies to : (x0-x2)*(y1-y2) - (y0-y2)*(x1-x2)
|
||||
// try to reuse our A & B coef's already calculated. factor out a -1 from Py and Qx
|
||||
// : B[2]*A[1] - (-(y2-y0))*(-(x2-x1))
|
||||
// : B[2]*A[1] - (-1)(-1)(y2-y0)*(x2-x1)
|
||||
// : B[2]*A[1] - A[2]*B[1]
|
||||
INLINE
|
||||
float calcDeterminantInt(const __m128i vA, const __m128i vB)
|
||||
{
|
||||
// vAShuf = [A1, A0, A2, A0]
|
||||
__m128i vAShuf = _mm_shuffle_epi32(vA, _MM_SHUFFLE(0, 2, 0, 1));
|
||||
// vBShuf = [B2, B0, B1, B0]
|
||||
__m128i vBShuf = _mm_shuffle_epi32(vB, _MM_SHUFFLE(0, 1, 0, 2));
|
||||
// vMul = [A1*B2, B1*A2]
|
||||
__m128i vMul = _mm_mul_epi32(vAShuf, vBShuf);
|
||||
|
||||
// shuffle upper to lower
|
||||
// vMul2 = [B1*A2, B1*A2]
|
||||
__m128i vMul2 = _mm_shuffle_epi32(vMul, _MM_SHUFFLE(3, 2, 3, 2));
|
||||
//vMul = [A1*B2 - B1*A2]
|
||||
vMul = _mm_sub_epi64(vMul, vMul2);
|
||||
|
||||
// According to emmintrin.h __mm_store1_pd(), address must be 16-byte aligned
|
||||
OSALIGN(int64_t, 16) result;
|
||||
_mm_store1_pd((double*)&result, _mm_castsi128_pd(vMul));
|
||||
|
||||
double fResult = (double)result;
|
||||
fResult = fResult * (1.0 / FIXED_POINT16_SCALE);
|
||||
|
||||
return (float)fResult;
|
||||
}
|
||||
|
||||
INLINE
|
||||
void calcDeterminantIntVertical(const simdscalari vA[3], const simdscalari vB[3], simdscalari *pvDet)
|
||||
{
|
||||
// refer to calcDeterminantInt comment for calculation explanation
|
||||
// A1*B2
|
||||
simdscalari vA1Lo = _simd_unpacklo_epi32(vA[1], vA[1]); // 0 0 1 1 4 4 5 5
|
||||
simdscalari vA1Hi = _simd_unpackhi_epi32(vA[1], vA[1]); // 2 2 3 3 6 6 7 7
|
||||
|
||||
simdscalari vB2Lo = _simd_unpacklo_epi32(vB[2], vB[2]);
|
||||
simdscalari vB2Hi = _simd_unpackhi_epi32(vB[2], vB[2]);
|
||||
|
||||
simdscalari vA1B2Lo = _simd_mul_epi32(vA1Lo, vB2Lo); // 0 1 4 5
|
||||
simdscalari vA1B2Hi = _simd_mul_epi32(vA1Hi, vB2Hi); // 2 3 6 7
|
||||
|
||||
// B1*A2
|
||||
simdscalari vA2Lo = _simd_unpacklo_epi32(vA[2], vA[2]);
|
||||
simdscalari vA2Hi = _simd_unpackhi_epi32(vA[2], vA[2]);
|
||||
|
||||
simdscalari vB1Lo = _simd_unpacklo_epi32(vB[1], vB[1]);
|
||||
simdscalari vB1Hi = _simd_unpackhi_epi32(vB[1], vB[1]);
|
||||
|
||||
simdscalari vA2B1Lo = _simd_mul_epi32(vA2Lo, vB1Lo);
|
||||
simdscalari vA2B1Hi = _simd_mul_epi32(vA2Hi, vB1Hi);
|
||||
|
||||
// A1*B2 - A2*B1
|
||||
simdscalari detLo = _simd_sub_epi64(vA1B2Lo, vA2B1Lo);
|
||||
simdscalari detHi = _simd_sub_epi64(vA1B2Hi, vA2B1Hi);
|
||||
|
||||
// shuffle 0 1 4 5 -> 0 1 2 3
|
||||
simdscalari vResultLo = _mm256_permute2f128_si256(detLo, detHi, 0x20);
|
||||
simdscalari vResultHi = _mm256_permute2f128_si256(detLo, detHi, 0x31);
|
||||
|
||||
pvDet[0] = vResultLo;
|
||||
pvDet[1] = vResultHi;
|
||||
}
|
||||
|
||||
INLINE
|
||||
void triangleSetupC(const __m128 vX, const __m128 vY, const __m128 vA, const __m128 &vB, __m128 &vC)
|
||||
{
|
||||
// C = -Ax - By
|
||||
vC = _mm_mul_ps(vA, vX);
|
||||
__m128 vCy = _mm_mul_ps(vB, vY);
|
||||
vC = _mm_mul_ps(vC, _mm_set1_ps(-1.0f));
|
||||
vC = _mm_sub_ps(vC, vCy);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void viewportTransform(__m128 &vX, __m128 &vY, __m128 &vZ, const SWR_VIEWPORT_MATRIX &vpMatrix)
|
||||
{
|
||||
vX = _mm_mul_ps(vX, _mm_set1_ps(vpMatrix.m00));
|
||||
vX = _mm_add_ps(vX, _mm_set1_ps(vpMatrix.m30));
|
||||
|
||||
vY = _mm_mul_ps(vY, _mm_set1_ps(vpMatrix.m11));
|
||||
vY = _mm_add_ps(vY, _mm_set1_ps(vpMatrix.m31));
|
||||
|
||||
vZ = _mm_mul_ps(vZ, _mm_set1_ps(vpMatrix.m22));
|
||||
vZ = _mm_add_ps(vZ, _mm_set1_ps(vpMatrix.m32));
|
||||
}
|
||||
|
||||
template<uint32_t NumVerts>
|
||||
INLINE
|
||||
void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRIX & vpMatrix)
|
||||
{
|
||||
simdscalar m00 = _simd_load1_ps(&vpMatrix.m00);
|
||||
simdscalar m30 = _simd_load1_ps(&vpMatrix.m30);
|
||||
simdscalar m11 = _simd_load1_ps(&vpMatrix.m11);
|
||||
simdscalar m31 = _simd_load1_ps(&vpMatrix.m31);
|
||||
simdscalar m22 = _simd_load1_ps(&vpMatrix.m22);
|
||||
simdscalar m32 = _simd_load1_ps(&vpMatrix.m32);
|
||||
|
||||
for (uint32_t i = 0; i < NumVerts; ++i)
|
||||
{
|
||||
v[i].x = _simd_fmadd_ps(v[i].x, m00, m30);
|
||||
v[i].y = _simd_fmadd_ps(v[i].y, m11, m31);
|
||||
v[i].z = _simd_fmadd_ps(v[i].z, m22, m32);
|
||||
}
|
||||
}
|
||||
|
||||
INLINE
|
||||
void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox)
|
||||
{
|
||||
// Need horizontal fp min here
|
||||
__m128i vX1 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 2, 0, 1));
|
||||
__m128i vX2 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 0, 1, 2));
|
||||
|
||||
__m128i vY1 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 2, 0, 1));
|
||||
__m128i vY2 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 1, 2));
|
||||
|
||||
|
||||
__m128i vMinX = _mm_min_epi32(vX, vX1);
|
||||
vMinX = _mm_min_epi32(vMinX, vX2);
|
||||
|
||||
__m128i vMaxX = _mm_max_epi32(vX, vX1);
|
||||
vMaxX = _mm_max_epi32(vMaxX, vX2);
|
||||
|
||||
__m128i vMinY = _mm_min_epi32(vY, vY1);
|
||||
vMinY = _mm_min_epi32(vMinY, vY2);
|
||||
|
||||
__m128i vMaxY = _mm_max_epi32(vY, vY1);
|
||||
vMaxY = _mm_max_epi32(vMaxY, vY2);
|
||||
|
||||
bbox.left = _mm_extract_epi32(vMinX, 0);
|
||||
bbox.right = _mm_extract_epi32(vMaxX, 0);
|
||||
bbox.top = _mm_extract_epi32(vMinY, 0);
|
||||
bbox.bottom = _mm_extract_epi32(vMaxY, 0);
|
||||
|
||||
#if 0
|
||||
Jacob: A = _mm_shuffle_ps(X, Y, 0 0 0 0)
|
||||
B = _mm_shuffle_ps(Z, W, 0 0 0 0)
|
||||
A = _mm_shuffle_epi32(A, 3 0 3 0)
|
||||
A = _mm_shuffle_ps(A, B, 1 0 1 0)
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
INLINE
|
||||
void calcBoundingBoxIntVertical(const simdscalari (&vX)[3], const simdscalari (&vY)[3], simdBBox &bbox)
|
||||
{
|
||||
simdscalari vMinX = vX[0];
|
||||
vMinX = _simd_min_epi32(vMinX, vX[1]);
|
||||
vMinX = _simd_min_epi32(vMinX, vX[2]);
|
||||
|
||||
simdscalari vMaxX = vX[0];
|
||||
vMaxX = _simd_max_epi32(vMaxX, vX[1]);
|
||||
vMaxX = _simd_max_epi32(vMaxX, vX[2]);
|
||||
|
||||
simdscalari vMinY = vY[0];
|
||||
vMinY = _simd_min_epi32(vMinY, vY[1]);
|
||||
vMinY = _simd_min_epi32(vMinY, vY[2]);
|
||||
|
||||
simdscalari vMaxY = vY[0];
|
||||
vMaxY = _simd_max_epi32(vMaxY, vY[1]);
|
||||
vMaxY = _simd_max_epi32(vMaxY, vY[2]);
|
||||
|
||||
bbox.left = vMinX;
|
||||
bbox.right = vMaxX;
|
||||
bbox.top = vMinY;
|
||||
bbox.bottom = vMaxY;
|
||||
}
|
||||
|
||||
INLINE
|
||||
bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
|
||||
{
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
|
||||
return (state.rastState.sampleCount == SWR_MULTISAMPLE_1X &&
|
||||
state.rastState.pointSize == 1.0f &&
|
||||
!state.rastState.pointParam &&
|
||||
!state.rastState.pointSpriteEnable);
|
||||
}
|
||||
|
||||
uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements);
|
||||
uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts);
|
||||
|
||||
// Templated Draw front-end function. All combinations of template parameter values are available
|
||||
template <bool IsIndexedT, bool HasTessellationT, bool HasGeometryShaderT, bool HasStreamOutT, bool HasRastT>
|
||||
void ProcessDraw(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
|
||||
|
||||
void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
|
||||
void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
|
||||
void ProcessInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
|
||||
void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
|
||||
void ProcessQueryStats(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
|
||||
|
||||
struct PA_STATE_BASE; // forward decl
|
||||
void BinTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector tri[3], uint32_t primMask, simdscalari primID);
|
||||
void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
|
||||
void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID);
|
||||
|
|
@ -0,0 +1,142 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file knobs.h
|
||||
*
|
||||
* @brief Static (Compile-Time) Knobs for Core.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <gen_knobs.h>
|
||||
|
||||
#define KNOB_ARCH_AVX 0
|
||||
#define KNOB_ARCH_AVX2 1
|
||||
#define KNOB_ARCH_AVX512 2
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Architecture validation
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#if !defined(KNOB_ARCH)
|
||||
#define KNOB_ARCH KNOB_ARCH_AVX
|
||||
#endif
|
||||
|
||||
#if (KNOB_ARCH == KNOB_ARCH_AVX)
|
||||
#define KNOB_ARCH_ISA AVX
|
||||
#define KNOB_ARCH_STR "AVX"
|
||||
#define KNOB_SIMD_WIDTH 8
|
||||
#elif (KNOB_ARCH == KNOB_ARCH_AVX2)
|
||||
#define KNOB_ARCH_ISA AVX2
|
||||
#define KNOB_ARCH_STR "AVX2"
|
||||
#define KNOB_SIMD_WIDTH 8
|
||||
#elif (KNOB_ARCH == KNOB_ARCH_AVX512)
|
||||
#define KNOB_ARCH_ISA AVX512F
|
||||
#define KNOB_ARCH_STR "AVX512"
|
||||
#define KNOB_SIMD_WIDTH 16
|
||||
#error "AVX512 not yet supported"
|
||||
#else
|
||||
#error "Unknown architecture"
|
||||
#endif
|
||||
|
||||
#define MAX_KNOB_ARCH_STR_LEN sizeof("AVX512_PLUS_PADDING")
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Configuration knobs
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#define KNOB_MAX_NUM_THREADS 256 // Supports up to dual-HSW-Xeon.
|
||||
|
||||
// Maximum supported number of active vertex buffer streams
|
||||
#define KNOB_NUM_STREAMS 32
|
||||
|
||||
// Maximum supported number of attributes per vertex
|
||||
#define KNOB_NUM_ATTRIBUTES 38
|
||||
|
||||
// Maximum supported active viewports and scissors
|
||||
#define KNOB_NUM_VIEWPORTS_SCISSORS 16
|
||||
|
||||
// Guardband range used by the clipper
|
||||
#define KNOB_GUARDBAND_WIDTH 32768.0f
|
||||
#define KNOB_GUARDBAND_HEIGHT 32768.0f
|
||||
|
||||
///////////////////////////////
|
||||
// Macro tile configuration
|
||||
///////////////////////////////
|
||||
|
||||
// raster tile dimensions
|
||||
#define KNOB_TILE_X_DIM 8
|
||||
#define KNOB_TILE_X_DIM_SHIFT 3
|
||||
#define KNOB_TILE_Y_DIM 8
|
||||
#define KNOB_TILE_Y_DIM_SHIFT 3
|
||||
|
||||
// fixed macrotile pixel dimension for now, eventually will be
|
||||
// dynamically set based on tile format and pixel size
|
||||
#define KNOB_MACROTILE_X_DIM 64
|
||||
#define KNOB_MACROTILE_Y_DIM 64
|
||||
#define KNOB_MACROTILE_X_DIM_FIXED (KNOB_MACROTILE_X_DIM << 8)
|
||||
#define KNOB_MACROTILE_Y_DIM_FIXED (KNOB_MACROTILE_Y_DIM << 8)
|
||||
#define KNOB_MACROTILE_X_DIM_FIXED_SHIFT 14
|
||||
#define KNOB_MACROTILE_Y_DIM_FIXED_SHIFT 14
|
||||
#define KNOB_MACROTILE_X_DIM_IN_TILES (KNOB_MACROTILE_X_DIM >> KNOB_TILE_X_DIM_SHIFT)
|
||||
#define KNOB_MACROTILE_Y_DIM_IN_TILES (KNOB_MACROTILE_Y_DIM >> KNOB_TILE_Y_DIM_SHIFT)
|
||||
|
||||
// total # of hot tiles available. This should be enough to
|
||||
// fully render a 16kx16k 128bpp render target
|
||||
#define KNOB_NUM_HOT_TILES_X 256
|
||||
#define KNOB_NUM_HOT_TILES_Y 256
|
||||
#define KNOB_COLOR_HOT_TILE_FORMAT R32G32B32A32_FLOAT
|
||||
#define KNOB_DEPTH_HOT_TILE_FORMAT R32_FLOAT
|
||||
#define KNOB_STENCIL_HOT_TILE_FORMAT R8_UINT
|
||||
|
||||
// Max scissor rectangle
|
||||
#define KNOB_MAX_SCISSOR_X KNOB_NUM_HOT_TILES_X * KNOB_MACROTILE_X_DIM
|
||||
#define KNOB_MAX_SCISSOR_Y KNOB_NUM_HOT_TILES_Y * KNOB_MACROTILE_Y_DIM
|
||||
|
||||
#if KNOB_SIMD_WIDTH==8 && KNOB_TILE_X_DIM < 4
|
||||
#error "incompatible width/tile dimensions"
|
||||
#endif
|
||||
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
#define SIMD_TILE_X_DIM 4
|
||||
#define SIMD_TILE_Y_DIM 2
|
||||
#else
|
||||
#error "Invalid simd width"
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Optimization knobs
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#define KNOB_USE_FAST_SRGB TRUE
|
||||
|
||||
// enables cut-aware primitive assembler
|
||||
#define KNOB_ENABLE_CUT_AWARE_PA TRUE
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Debug knobs
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//#define KNOB_ENABLE_RDTSC
|
||||
|
||||
// Set to 1 to use the dynamic KNOB_TOSS_XXXX knobs.
|
||||
#if !defined(KNOB_ENABLE_TOSS_POINTS)
|
||||
#define KNOB_ENABLE_TOSS_POINTS 0
|
||||
#endif
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file knobs_init.h
|
||||
*
|
||||
* @brief Dynamic Knobs Initialization for Core.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include <core/knobs.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// Assume the type is compatible with a 32-bit integer
|
||||
template <typename T>
|
||||
static inline void ConvertEnvToKnob(const char* pOverride, T& knobValue)
|
||||
{
|
||||
uint32_t value = 0;
|
||||
if (sscanf(pOverride, "%u", &value))
|
||||
{
|
||||
knobValue = static_cast<T>(value);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ConvertEnvToKnob(const char* pOverride, bool& knobValue)
|
||||
{
|
||||
size_t len = strlen(pOverride);
|
||||
if (len == 1)
|
||||
{
|
||||
auto c = tolower(pOverride[0]);
|
||||
if (c == 'y' || c == 't' || c == '1')
|
||||
{
|
||||
knobValue = true;
|
||||
return;
|
||||
}
|
||||
if (c == 'n' || c == 'f' || c == '0')
|
||||
{
|
||||
knobValue = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Try converting to a number and casting to bool
|
||||
uint32_t value = 0;
|
||||
if (sscanf(pOverride, "%u", &value))
|
||||
{
|
||||
knobValue = value != 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ConvertEnvToKnob(const char* pOverride, float& knobValue)
|
||||
{
|
||||
float value = knobValue;
|
||||
if (sscanf(pOverride, "%f", &value))
|
||||
{
|
||||
knobValue = value;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline void InitKnob(T& knob)
|
||||
{
|
||||
|
||||
// TODO, read registry first
|
||||
|
||||
// Second, read environment variables
|
||||
const char* pOverride = getenv(knob.Name());
|
||||
|
||||
if (pOverride)
|
||||
{
|
||||
auto knobValue = knob.Value();
|
||||
ConvertEnvToKnob(pOverride, knobValue);
|
||||
knob.Value(knobValue);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file multisample.cpp
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "multisample.h"
|
||||
|
||||
const uint32_t MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosXi[2] {0xC0, 0x40};
|
||||
const uint32_t MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosYi[2] {0xC0, 0x40};
|
||||
const uint32_t MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosXi[4] {0x60, 0xE0, 0x20, 0xA0};
|
||||
const uint32_t MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosYi[4] {0x20, 0x60, 0xA0, 0xE0};
|
||||
const uint32_t MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosXi[8] {0x90, 0x70, 0xD0, 0x50, 0x30, 0x10, 0xB0, 0xF0};
|
||||
const uint32_t MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosYi[8] {0x50, 0xB0, 0x90, 0x30, 0xD0, 0x70, 0xF0, 0x10};
|
||||
const uint32_t MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosXi[16]
|
||||
{0x90, 0x70, 0x50, 0xC0, 0x30, 0xA0, 0xD0, 0xB0, 0x60, 0x80, 0x40, 0x20, 0x00, 0xF0, 0xE0, 0x10};
|
||||
const uint32_t MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosYi[16]
|
||||
{0x90, 0x50, 0xA0, 0x70, 0x60, 0xD0, 0xB0, 0x30, 0xE0, 0x10, 0x20, 0xC0, 0x80, 0x40, 0xF0, 0x00};
|
||||
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosX{0.5f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosY{0.5f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosX[2]{0.75f, 0.25f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosY[2]{0.75f, 0.25f};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosX[4]{0.375f, 0.875, 0.125, 0.625};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosY[4]{0.125, 0.375, 0.625, 0.875};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosX[8]{0.5625, 0.4375, 0.8125, 0.3125, 0.1875, 0.0625, 0.6875, 0.9375};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosY[8]{0.3125, 0.6875, 0.5625, 0.1875, 0.8125, 0.4375, 0.9375, 0.0625};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosX[16]
|
||||
{0.5625, 0.4375, 0.3125, 0.7500, 0.1875, 0.6250, 0.8125, 0.6875, 0.3750, 0.5000, 0.2500, 0.1250, 0.0000, 0.9375, 0.8750, 0.0625};
|
||||
const float MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosY[16]
|
||||
{0.5625, 0.3125, 0.6250, 0.4375, 0.3750, 0.8125, 0.6875, 0.1875, 0.8750, 0.0625, 0.1250, 0.7500, 0.5000, 0.2500, 0.9375, 0.0000};
|
|
@ -0,0 +1,620 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file multisample.h
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "context.h"
|
||||
#include "format_traits.h"
|
||||
|
||||
INLINE
|
||||
uint32_t GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount)
|
||||
{
|
||||
static const uint32_t sampleCountLUT[SWR_MULTISAMPLE_TYPE_MAX] {1, 2, 4, 8, 16};
|
||||
assert(sampleCount < SWR_MULTISAMPLE_TYPE_MAX);
|
||||
return sampleCountLUT[sampleCount];
|
||||
}
|
||||
|
||||
INLINE
|
||||
SWR_MULTISAMPLE_COUNT GetSampleCount(uint32_t numSamples)
|
||||
{
|
||||
switch(numSamples)
|
||||
{
|
||||
case 1: return SWR_MULTISAMPLE_1X;
|
||||
case 2: return SWR_MULTISAMPLE_2X;
|
||||
case 4: return SWR_MULTISAMPLE_4X;
|
||||
case 8: return SWR_MULTISAMPLE_8X;
|
||||
case 16: return SWR_MULTISAMPLE_16X;
|
||||
default: assert(0); return SWR_MULTISAMPLE_1X;
|
||||
}
|
||||
}
|
||||
|
||||
// hardcoded offsets based on Direct3d standard multisample positions
|
||||
// 8 x 8 pixel grid ranging from (0, 0) to (15, 15), with (0, 0) = UL pixel corner
|
||||
// coords are 0.8 fixed point offsets from (0, 0)
|
||||
template<SWR_MULTISAMPLE_COUNT sampleCount>
|
||||
struct MultisampleTraits
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum) = delete;
|
||||
INLINE static __m128i vYi(uint32_t sampleNum) = delete;
|
||||
INLINE static simdscalar vX(uint32_t sampleNum) = delete;
|
||||
INLINE static simdscalar vY(uint32_t sampleNum) = delete;
|
||||
INLINE static float X(uint32_t sampleNum) = delete;
|
||||
INLINE static float Y(uint32_t sampleNum) = delete;
|
||||
INLINE static __m128i TileSampleOffsetsX() = delete;
|
||||
INLINE static __m128i TileSampleOffsetsY() = delete;
|
||||
INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum) = delete;
|
||||
INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum) = delete;
|
||||
INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum) = delete;
|
||||
INLINE static simdscalari FullSampleMask() = delete;
|
||||
|
||||
static const uint32_t numSamples = 0;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_1X>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i X = _mm_set1_epi32(samplePosXi);
|
||||
return X;
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i Y = _mm_set1_epi32(samplePosYi);
|
||||
return Y;
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar X = _simd_set1_ps(0.5f);
|
||||
return X;
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar Y = _simd_set1_ps(0.5f);
|
||||
return Y;
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) {return samplePosX;};
|
||||
INLINE static float Y(uint32_t sampleNum) {return samplePosY;};
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
static const uint32_t bboxLeftEdge = 0x80;
|
||||
static const uint32_t bboxRightEdge = 0x80;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
|
||||
return tileSampleOffsetX;
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
static const uint32_t bboxTopEdge = 0x80;
|
||||
static const uint32_t bboxBottomEdge = 0x80;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
|
||||
return tileSampleOffsetY;
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
|
||||
|
||||
static const uint32_t samplePosXi {0x80};
|
||||
static const uint32_t samplePosYi {0x80};
|
||||
static const float samplePosX;
|
||||
static const float samplePosY;
|
||||
static const uint32_t numSamples = 1;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_2X>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
static const __m128i X[numSamples] {_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1])};
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
static const __m128i Y[numSamples] {_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1])};
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar X[numSamples] {_simd_set1_ps(0.75f), _simd_set1_ps(0.25f)};
|
||||
assert(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar Y[numSamples] {_simd_set1_ps(0.75f), _simd_set1_ps(0.25f)};
|
||||
assert(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
|
||||
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
static const uint32_t bboxLeftEdge = 0x40;
|
||||
static const uint32_t bboxRightEdge = 0xC0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
|
||||
return tileSampleOffsetX;
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
static const uint32_t bboxTopEdge = 0x40;
|
||||
static const uint32_t bboxBottomEdge = 0xC0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
|
||||
return tileSampleOffsetY;
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileColorOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8)
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileColorOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileDepthOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8)
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileDepthOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileStencilOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8)
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileStencilOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask =_simd_set1_epi32(0x3);
|
||||
return mask;
|
||||
}
|
||||
|
||||
static const uint32_t samplePosXi[2];
|
||||
static const uint32_t samplePosYi[2];
|
||||
static const float samplePosX[2];
|
||||
static const float samplePosY[2];
|
||||
static const uint32_t numSamples = 2;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_4X>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i X[numSamples]
|
||||
{_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3])};
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i Y[numSamples]
|
||||
{_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3])};
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar X[numSamples]
|
||||
{_simd_set1_ps(0.375f), _simd_set1_ps(0.875), _simd_set1_ps(0.125), _simd_set1_ps(0.625)};
|
||||
assert(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar Y[numSamples]
|
||||
{_simd_set1_ps(0.125), _simd_set1_ps(0.375f), _simd_set1_ps(0.625), _simd_set1_ps(0.875)};
|
||||
assert(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
|
||||
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
static const uint32_t bboxLeftEdge = 0x20;
|
||||
static const uint32_t bboxRightEdge = 0xE0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
|
||||
return tileSampleOffsetX;
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
static const uint32_t bboxTopEdge = 0x20;
|
||||
static const uint32_t bboxBottomEdge = 0xE0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
|
||||
return tileSampleOffsetY;
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileColorOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileColorOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileDepthOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileDepthOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileStencilOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileStencilOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask = _simd_set1_epi32(0xF);
|
||||
return mask;
|
||||
}
|
||||
|
||||
static const uint32_t samplePosXi[4];
|
||||
static const uint32_t samplePosYi[4];
|
||||
static const float samplePosX[4];
|
||||
static const float samplePosY[4];
|
||||
static const uint32_t numSamples = 4;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_8X>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i X[numSamples]
|
||||
{_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3]),
|
||||
_mm_set1_epi32(samplePosXi[4]), _mm_set1_epi32(samplePosXi[5]), _mm_set1_epi32(samplePosXi[6]), _mm_set1_epi32(samplePosXi[7])};
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i Y[numSamples]
|
||||
{_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3]),
|
||||
_mm_set1_epi32(samplePosYi[4]), _mm_set1_epi32(samplePosYi[5]), _mm_set1_epi32(samplePosYi[6]), _mm_set1_epi32(samplePosYi[7])};
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar X[numSamples]
|
||||
{_simd_set1_ps(0.5625), _simd_set1_ps(0.4375), _simd_set1_ps(0.8125), _simd_set1_ps(0.3125),
|
||||
_simd_set1_ps(0.1875), _simd_set1_ps(0.0625), _simd_set1_ps(0.6875), _simd_set1_ps(0.9375)};
|
||||
assert(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar Y[numSamples]
|
||||
{_simd_set1_ps(0.3125), _simd_set1_ps(0.6875), _simd_set1_ps(0.5625), _simd_set1_ps(0.1875),
|
||||
_simd_set1_ps(0.8125), _simd_set1_ps(0.4375), _simd_set1_ps(0.9375), _simd_set1_ps(0.0625)};
|
||||
assert(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
|
||||
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
static const uint32_t bboxLeftEdge = 0x10;
|
||||
static const uint32_t bboxRightEdge = 0xF0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
|
||||
return tileSampleOffsetX;
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
static const uint32_t bboxTopEdge = 0x10;
|
||||
static const uint32_t bboxBottomEdge = 0xF0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
|
||||
return tileSampleOffsetY;
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileColorOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileColorOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileDepthOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileDepthOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileStencilOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 4,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 5,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 6,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 7,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileStencilOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask = _simd_set1_epi32(0xFF);
|
||||
return mask;
|
||||
}
|
||||
|
||||
static const uint32_t samplePosXi[8];
|
||||
static const uint32_t samplePosYi[8];
|
||||
static const float samplePosX[8];
|
||||
static const float samplePosY[8];
|
||||
static const uint32_t numSamples = 8;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MultisampleTraits<SWR_MULTISAMPLE_16X>
|
||||
{
|
||||
INLINE static __m128i vXi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i X[numSamples]
|
||||
{_mm_set1_epi32(samplePosXi[0]), _mm_set1_epi32(samplePosXi[1]), _mm_set1_epi32(samplePosXi[2]), _mm_set1_epi32(samplePosXi[3]),
|
||||
_mm_set1_epi32(samplePosXi[4]), _mm_set1_epi32(samplePosXi[5]), _mm_set1_epi32(samplePosXi[6]), _mm_set1_epi32(samplePosXi[7]),
|
||||
_mm_set1_epi32(samplePosXi[8]), _mm_set1_epi32(samplePosXi[9]), _mm_set1_epi32(samplePosXi[10]), _mm_set1_epi32(samplePosXi[11]),
|
||||
_mm_set1_epi32(samplePosXi[12]), _mm_set1_epi32(samplePosXi[13]), _mm_set1_epi32(samplePosXi[14]), _mm_set1_epi32(samplePosXi[15])};
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static __m128i vYi(uint32_t sampleNum)
|
||||
{
|
||||
static const __m128i Y[numSamples]
|
||||
{_mm_set1_epi32(samplePosYi[0]), _mm_set1_epi32(samplePosYi[1]), _mm_set1_epi32(samplePosYi[2]), _mm_set1_epi32(samplePosYi[3]),
|
||||
_mm_set1_epi32(samplePosYi[4]), _mm_set1_epi32(samplePosYi[5]), _mm_set1_epi32(samplePosYi[6]), _mm_set1_epi32(samplePosYi[7]),
|
||||
_mm_set1_epi32(samplePosYi[8]), _mm_set1_epi32(samplePosYi[9]), _mm_set1_epi32(samplePosYi[10]), _mm_set1_epi32(samplePosYi[11]),
|
||||
_mm_set1_epi32(samplePosYi[12]), _mm_set1_epi32(samplePosYi[13]), _mm_set1_epi32(samplePosYi[14]), _mm_set1_epi32(samplePosYi[15])};
|
||||
SWR_ASSERT(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vX(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar X[numSamples]
|
||||
{_simd_set1_ps(0.5625), _simd_set1_ps(0.4375), _simd_set1_ps(0.3125), _simd_set1_ps(0.7500),
|
||||
_simd_set1_ps(0.1875), _simd_set1_ps(0.6250), _simd_set1_ps(0.8125), _simd_set1_ps(0.6875),
|
||||
_simd_set1_ps(0.3750), _simd_set1_ps(0.5000), _simd_set1_ps(0.2500), _simd_set1_ps(0.1250),
|
||||
_simd_set1_ps(0.0000), _simd_set1_ps(0.9375), _simd_set1_ps(0.8750), _simd_set1_ps(0.0625)};
|
||||
assert(sampleNum < numSamples);
|
||||
return X[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalar vY(uint32_t sampleNum)
|
||||
{
|
||||
static const simdscalar Y[numSamples]
|
||||
{_simd_set1_ps(0.5625), _simd_set1_ps(0.3125), _simd_set1_ps(0.6250), _simd_set1_ps(0.4375),
|
||||
_simd_set1_ps(0.3750), _simd_set1_ps(0.8125), _simd_set1_ps(0.6875), _simd_set1_ps(0.1875),
|
||||
_simd_set1_ps(0.8750), _simd_set1_ps(0.0625), _simd_set1_ps(0.1250), _simd_set1_ps(0.7500),
|
||||
_simd_set1_ps(0.5000), _simd_set1_ps(0.2500), _simd_set1_ps(0.9375), _simd_set1_ps(0.0000)};
|
||||
assert(sampleNum < numSamples);
|
||||
return Y[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
|
||||
INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsX()
|
||||
{
|
||||
static const uint32_t bboxLeftEdge = 0x00;
|
||||
static const uint32_t bboxRightEdge = 0xF0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetX = _mm_set_epi32(bboxRightEdge, bboxLeftEdge, bboxRightEdge, bboxLeftEdge);
|
||||
return tileSampleOffsetX;
|
||||
}
|
||||
|
||||
INLINE static __m128i TileSampleOffsetsY()
|
||||
{
|
||||
static const uint32_t bboxTopEdge = 0x00;
|
||||
static const uint32_t bboxBottomEdge = 0xF0;
|
||||
// BR, BL, UR, UL
|
||||
static const __m128i tileSampleOffsetY = _mm_set_epi32(bboxBottomEdge, bboxBottomEdge, bboxTopEdge, bboxTopEdge);
|
||||
return tileSampleOffsetY;
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileColorOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 8,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 9,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 10,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 11,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 12,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 13,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 14,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 15,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileColorOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileDepthOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 8,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 9,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 10,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 11,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 12,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 13,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 14,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 15,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileDepthOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
|
||||
{
|
||||
static const uint32_t RasterTileStencilOffsets[numSamples]
|
||||
{ 0,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 4,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 5,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 6,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 7,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 8,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 9,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 10,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 11,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 12,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 13,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 14,
|
||||
(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 15,
|
||||
};
|
||||
assert(sampleNum < numSamples);
|
||||
return RasterTileStencilOffsets[sampleNum];
|
||||
}
|
||||
|
||||
INLINE static simdscalari FullSampleMask()
|
||||
{
|
||||
static const simdscalari mask = _simd_set1_epi32(0xFFFF);
|
||||
return mask;
|
||||
}
|
||||
|
||||
static const uint32_t samplePosXi[16];
|
||||
static const uint32_t samplePosYi[16];
|
||||
static const float samplePosX[16];
|
||||
static const float samplePosY[16];
|
||||
static const uint32_t numSamples = 16;
|
||||
};
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,35 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file rasterizer.h
|
||||
*
|
||||
* @brief Definitions for the rasterizer.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include "context.h"
|
||||
|
||||
extern PFN_WORK_FUNC gRasterizerTable[2][SWR_MULTISAMPLE_TYPE_MAX];
|
||||
void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
||||
void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
||||
void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
|
@ -0,0 +1,91 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
#include "rdtsc_core.h"
|
||||
#include "common/rdtsc_buckets.h"
|
||||
|
||||
// must match CORE_BUCKETS enum order
|
||||
BUCKET_DESC gCoreBuckets[] = {
|
||||
{ "APIClearRenderTarget", "", true, 0xff0b8bea },
|
||||
{ "APIDraw", "", true, 0xff000066 },
|
||||
{ "APIDrawWakeAllThreads", "", false, 0xffffffff },
|
||||
{ "APIDrawIndexed", "", true, 0xff000066 },
|
||||
{ "APIDispatch", "", true, 0xff660000 },
|
||||
{ "APIStoreTiles", "", true, 0xff00ffff },
|
||||
{ "APIGetDrawContext", "", false, 0xffffffff },
|
||||
{ "APISync", "", true, 0xff6666ff },
|
||||
{ "APIWaitForIdle", "", true, 0xff0000ff },
|
||||
{ "FEProcessDraw", "", true, 0xff009900 },
|
||||
{ "FEProcessDrawIndexed", "", true, 0xff009900 },
|
||||
{ "FEFetchShader", "", false, 0xffffffff },
|
||||
{ "FEVertexShader", "", false, 0xffffffff },
|
||||
{ "FEHullShader", "", false, 0xffffffff },
|
||||
{ "FETessellation", "", false, 0xffffffff },
|
||||
{ "FEDomainShader", "", false, 0xffffffff },
|
||||
{ "FEGeometryShader", "", false, 0xffffffff },
|
||||
{ "FEStreamout", "", false, 0xffffffff },
|
||||
{ "FEPAAssemble", "", false, 0xffffffff },
|
||||
{ "FEBinPoints", "", false, 0xff29b854 },
|
||||
{ "FEBinLines", "", false, 0xff29b854 },
|
||||
{ "FEBinTriangles", "", false, 0xff29b854 },
|
||||
{ "FETriangleSetup", "", false, 0xffffffff },
|
||||
{ "FEViewportCull", "", false, 0xffffffff },
|
||||
{ "FEGuardbandClip", "", false, 0xffffffff },
|
||||
{ "FEClipPoints", "", false, 0xffffffff },
|
||||
{ "FEClipLines", "", false, 0xffffffff },
|
||||
{ "FEClipTriangles", "", false, 0xffffffff },
|
||||
{ "FECullZeroAreaAndBackface", "", false, 0xffffffff },
|
||||
{ "FECullBetweenCenters", "", false, 0xffffffff },
|
||||
{ "FEProcessStoreTiles", "", true, 0xff39c864 },
|
||||
{ "FEProcessInvalidateTiles", "", true, 0xffffffff },
|
||||
{ "WorkerWorkOnFifoBE", "", false, 0xff40261c },
|
||||
{ "WorkerFoundWork", "", false, 0xff573326 },
|
||||
{ "BELoadTiles", "", true, 0xffb0e2ff },
|
||||
{ "BEDispatch", "", true, 0xff00a2ff },
|
||||
{ "BEClear", "", true, 0xff00ccbb },
|
||||
{ "BERasterizeLine", "", true, 0xffb26a4e },
|
||||
{ "BERasterizeTriangle", "", true, 0xffb26a4e },
|
||||
{ "BETriangleSetup", "", false, 0xffffffff },
|
||||
{ "BEStepSetup", "", false, 0xffffffff },
|
||||
{ "BECullZeroArea", "", false, 0xffffffff },
|
||||
{ "BEEmptyTriangle", "", false, 0xffffffff },
|
||||
{ "BETrivialAccept", "", false, 0xffffffff },
|
||||
{ "BETrivialReject", "", false, 0xffffffff },
|
||||
{ "BERasterizePartial", "", false, 0xffffffff },
|
||||
{ "BEPixelBackend", "", false, 0xffffffff },
|
||||
{ "BESetup", "", false, 0xffffffff },
|
||||
{ "BEBarycentric", "", false, 0xffffffff },
|
||||
{ "BEEarlyDepthTest", "", false, 0xffffffff },
|
||||
{ "BEPixelShader", "", false, 0xffffffff },
|
||||
{ "BELateDepthTest", "", false, 0xffffffff },
|
||||
{ "BEOutputMerger", "", false, 0xffffffff },
|
||||
{ "BEStoreTiles", "", true, 0xff00cccc },
|
||||
{ "BEEndTile", "", false, 0xffffffff },
|
||||
{ "WorkerWaitForThreadEvent", "", false, 0xffffffff },
|
||||
};
|
||||
|
||||
/// @todo bucketmanager and mapping should probably be a part of the SWR context
|
||||
std::vector<uint32_t> gBucketMap;
|
||||
BucketManager gBucketMgr(KNOB_BUCKETS_ENABLE_THREADVIZ);
|
||||
|
||||
uint32_t gCurrentFrame = 0;
|
|
@ -0,0 +1,177 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
#pragma once
|
||||
#include "knobs.h"
|
||||
|
||||
#include "common/os.h"
|
||||
#include "common/rdtsc_buckets.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
enum CORE_BUCKETS
|
||||
{
|
||||
APIClearRenderTarget,
|
||||
APIDraw,
|
||||
APIDrawWakeAllThreads,
|
||||
APIDrawIndexed,
|
||||
APIDispatch,
|
||||
APIStoreTiles,
|
||||
APIGetDrawContext,
|
||||
APISync,
|
||||
APIWaitForIdle,
|
||||
FEProcessDraw,
|
||||
FEProcessDrawIndexed,
|
||||
FEFetchShader,
|
||||
FEVertexShader,
|
||||
FEHullShader,
|
||||
FETessellation,
|
||||
FEDomainShader,
|
||||
FEGeometryShader,
|
||||
FEStreamout,
|
||||
FEPAAssemble,
|
||||
FEBinPoints,
|
||||
FEBinLines,
|
||||
FEBinTriangles,
|
||||
FETriangleSetup,
|
||||
FEViewportCull,
|
||||
FEGuardbandClip,
|
||||
FEClipPoints,
|
||||
FEClipLines,
|
||||
FEClipTriangles,
|
||||
FECullZeroAreaAndBackface,
|
||||
FECullBetweenCenters,
|
||||
FEProcessStoreTiles,
|
||||
FEProcessInvalidateTiles,
|
||||
WorkerWorkOnFifoBE,
|
||||
WorkerFoundWork,
|
||||
BELoadTiles,
|
||||
BEDispatch,
|
||||
BEClear,
|
||||
BERasterizeLine,
|
||||
BERasterizeTriangle,
|
||||
BETriangleSetup,
|
||||
BEStepSetup,
|
||||
BECullZeroArea,
|
||||
BEEmptyTriangle,
|
||||
BETrivialAccept,
|
||||
BETrivialReject,
|
||||
BERasterizePartial,
|
||||
BEPixelBackend,
|
||||
BESetup,
|
||||
BEBarycentric,
|
||||
BEEarlyDepthTest,
|
||||
BEPixelShader,
|
||||
BELateDepthTest,
|
||||
BEOutputMerger,
|
||||
BEStoreTiles,
|
||||
BEEndTile,
|
||||
WorkerWaitForThreadEvent,
|
||||
|
||||
NumBuckets
|
||||
};
|
||||
|
||||
void rdtscReset();
|
||||
void rdtscInit(int threadId);
|
||||
void rdtscStart(uint32_t bucketId);
|
||||
void rdtscStop(uint32_t bucketId, uint32_t count, uint64_t drawId);
|
||||
void rdtscEvent(uint32_t bucketId, uint32_t count1, uint32_t count2);
|
||||
void rdtscEndFrame();
|
||||
|
||||
#ifdef KNOB_ENABLE_RDTSC
|
||||
#define RDTSC_RESET() rdtscReset()
|
||||
#define RDTSC_INIT(threadId) rdtscInit(threadId)
|
||||
#define RDTSC_START(bucket) rdtscStart(bucket)
|
||||
#define RDTSC_STOP(bucket, count, draw) rdtscStop(bucket, count, draw)
|
||||
#define RDTSC_EVENT(bucket, count1, count2) rdtscEvent(bucket, count1, count2)
|
||||
#define RDTSC_ENDFRAME() rdtscEndFrame()
|
||||
#else
|
||||
#define RDTSC_RESET()
|
||||
#define RDTSC_INIT(threadId)
|
||||
#define RDTSC_START(bucket)
|
||||
#define RDTSC_STOP(bucket, count, draw)
|
||||
#define RDTSC_EVENT(bucket, count1, count2)
|
||||
#define RDTSC_ENDFRAME()
|
||||
#endif
|
||||
|
||||
extern std::vector<uint32_t> gBucketMap;
|
||||
extern BucketManager gBucketMgr;
|
||||
extern BUCKET_DESC gCoreBuckets[];
|
||||
extern uint32_t gCurrentFrame;
|
||||
|
||||
INLINE void rdtscReset()
|
||||
{
|
||||
gCurrentFrame = 0;
|
||||
gBucketMgr.ClearThreads();
|
||||
gBucketMgr.ClearBuckets();
|
||||
}
|
||||
|
||||
INLINE void rdtscInit(int threadId)
|
||||
{
|
||||
// register all the buckets once
|
||||
if (threadId == 0)
|
||||
{
|
||||
gBucketMap.resize(NumBuckets);
|
||||
for (uint32_t i = 0; i < NumBuckets; ++i)
|
||||
{
|
||||
gBucketMap[i] = gBucketMgr.RegisterBucket(gCoreBuckets[i]);
|
||||
}
|
||||
}
|
||||
|
||||
std::string name = threadId == 0 ? "API" : "WORKER";
|
||||
gBucketMgr.RegisterThread(name);
|
||||
}
|
||||
|
||||
INLINE void rdtscStart(uint32_t bucketId)
|
||||
{
|
||||
uint32_t id = gBucketMap[bucketId];
|
||||
gBucketMgr.StartBucket(id);
|
||||
}
|
||||
|
||||
INLINE void rdtscStop(uint32_t bucketId, uint32_t count, uint64_t drawId)
|
||||
{
|
||||
uint32_t id = gBucketMap[bucketId];
|
||||
gBucketMgr.StopBucket(id);
|
||||
}
|
||||
|
||||
INLINE void rdtscEvent(uint32_t bucketId, uint32_t count1, uint32_t count2)
|
||||
{
|
||||
uint32_t id = gBucketMap[bucketId];
|
||||
gBucketMgr.AddEvent(id, count1);
|
||||
}
|
||||
|
||||
INLINE void rdtscEndFrame()
|
||||
{
|
||||
gCurrentFrame++;
|
||||
|
||||
if (gCurrentFrame == KNOB_BUCKETS_START_FRAME)
|
||||
{
|
||||
gBucketMgr.StartCapture();
|
||||
}
|
||||
|
||||
if (gCurrentFrame == KNOB_BUCKETS_END_FRAME)
|
||||
{
|
||||
gBucketMgr.StopCapture();
|
||||
gBucketMgr.PrintReport("rdtsc.txt");
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,88 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file tessellator.h
|
||||
*
|
||||
* @brief Tessellator fixed function unit interface definition
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
/// Allocate and initialize a new tessellation context
|
||||
HANDLE SWR_API TSInitCtx(
|
||||
SWR_TS_DOMAIN tsDomain, ///< [IN] Tessellation domain (isoline, quad, triangle)
|
||||
SWR_TS_PARTITIONING tsPartitioning, ///< [IN] Tessellation partitioning algorithm
|
||||
SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology, ///< [IN] Tessellation output topology
|
||||
void* pContextMem, ///< [IN] Memory to use for the context
|
||||
size_t& memSize); ///< [INOUT] In: Amount of memory in pContextMem. Out: Mem required
|
||||
|
||||
/// Destroy & de-allocate tessellation context
|
||||
void SWR_API TSDestroyCtx(
|
||||
HANDLE tsCtx); ///< [IN] Tessellation context to be destroyed
|
||||
|
||||
struct SWR_TS_TESSELLATED_DATA
|
||||
{
|
||||
uint32_t NumPrimitives;
|
||||
uint32_t NumDomainPoints;
|
||||
|
||||
uint32_t* ppIndices[3];
|
||||
float* pDomainPointsU;
|
||||
float* pDomainPointsV;
|
||||
// For Tri: pDomainPointsW[i] = 1.0f - pDomainPointsU[i] - pDomainPointsV[i]
|
||||
};
|
||||
|
||||
/// Perform Tessellation
|
||||
void SWR_API TSTessellate(
|
||||
HANDLE tsCtx, ///< [IN] Tessellation Context
|
||||
const SWR_TESSELLATION_FACTORS& tsTessFactors, ///< [IN] Tessellation Factors
|
||||
SWR_TS_TESSELLATED_DATA& tsTessellatedData); ///< [OUT] Tessellated Data
|
||||
|
||||
|
||||
|
||||
/// @TODO - Implement OSS tessellator
|
||||
|
||||
INLINE HANDLE SWR_API TSInitCtx(
|
||||
SWR_TS_DOMAIN tsDomain,
|
||||
SWR_TS_PARTITIONING tsPartitioning,
|
||||
SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology,
|
||||
void* pContextMem,
|
||||
size_t& memSize)
|
||||
{
|
||||
SWR_ASSERT(0, "%s: Not Implemented", __FUNCTION__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
INLINE void SWR_API TSDestroyCtx(HANDLE tsCtx)
|
||||
{
|
||||
SWR_ASSERT(0, "%s: Not Implemented", __FUNCTION__);
|
||||
}
|
||||
|
||||
|
||||
INLINE void SWR_API TSTessellate(
|
||||
HANDLE tsCtx,
|
||||
const SWR_TESSELLATION_FACTORS& tsTessFactors,
|
||||
SWR_TS_TESSELLATED_DATA& tsTessellatedData)
|
||||
{
|
||||
SWR_ASSERT(0, "%s: Not Implemented", __FUNCTION__);
|
||||
}
|
||||
|
|
@ -0,0 +1,962 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <thread>
|
||||
#include <algorithm>
|
||||
#include <unordered_set>
|
||||
#include <float.h>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "common/os.h"
|
||||
#include "context.h"
|
||||
#include "frontend.h"
|
||||
#include "backend.h"
|
||||
#include "rasterizer.h"
|
||||
#include "rdtsc_core.h"
|
||||
#include "tilemgr.h"
|
||||
#include "core/multisample.h"
|
||||
|
||||
|
||||
|
||||
|
||||
// ThreadId
|
||||
struct Core
|
||||
{
|
||||
uint32_t procGroup = 0;
|
||||
std::vector<uint32_t> threadIds;
|
||||
};
|
||||
|
||||
struct NumaNode
|
||||
{
|
||||
std::vector<Core> cores;
|
||||
};
|
||||
|
||||
typedef std::vector<NumaNode> CPUNumaNodes;
|
||||
|
||||
void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThreadsPerProcGroup)
|
||||
{
|
||||
out_nodes.clear();
|
||||
out_numThreadsPerProcGroup = 0;
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer[KNOB_MAX_NUM_THREADS];
|
||||
DWORD bufSize = sizeof(buffer);
|
||||
|
||||
BOOL ret = GetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufSize);
|
||||
SWR_ASSERT(ret != FALSE, "Failed to get Processor Topology Information");
|
||||
|
||||
uint32_t count = bufSize / buffer->Size;
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBuffer = buffer;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i)
|
||||
{
|
||||
SWR_ASSERT(pBuffer->Relationship == RelationProcessorCore);
|
||||
for (uint32_t g = 0; g < pBuffer->Processor.GroupCount; ++g)
|
||||
{
|
||||
auto& gmask = pBuffer->Processor.GroupMask[g];
|
||||
uint32_t threadId = 0;
|
||||
uint32_t procGroup = gmask.Group;
|
||||
|
||||
Core* pCore = nullptr;
|
||||
|
||||
uint32_t numThreads = (uint32_t)_mm_popcount_sizeT(gmask.Mask);
|
||||
|
||||
while (BitScanForwardSizeT((unsigned long*)&threadId, gmask.Mask))
|
||||
{
|
||||
// clear mask
|
||||
gmask.Mask &= ~(KAFFINITY(1) << threadId);
|
||||
|
||||
// Find Numa Node
|
||||
PROCESSOR_NUMBER procNum = {};
|
||||
procNum.Group = WORD(procGroup);
|
||||
procNum.Number = UCHAR(threadId);
|
||||
|
||||
uint32_t numaId = 0;
|
||||
ret = GetNumaProcessorNodeEx(&procNum, (PUSHORT)&numaId);
|
||||
SWR_ASSERT(ret);
|
||||
|
||||
// Store data
|
||||
if (out_nodes.size() <= numaId) out_nodes.resize(numaId + 1);
|
||||
auto& numaNode = out_nodes[numaId];
|
||||
|
||||
uint32_t coreId = 0;
|
||||
|
||||
if (nullptr == pCore)
|
||||
{
|
||||
numaNode.cores.push_back(Core());
|
||||
pCore = &numaNode.cores.back();
|
||||
pCore->procGroup = procGroup;
|
||||
#if !defined(_WIN64)
|
||||
coreId = (uint32_t)numaNode.cores.size();
|
||||
if ((coreId * numThreads) >= 32)
|
||||
{
|
||||
// Windows doesn't return threadIds >= 32 for a processor group correctly
|
||||
// when running a 32-bit application.
|
||||
// Just save -1 as the threadId
|
||||
threadId = uint32_t(-1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
pCore->threadIds.push_back(threadId);
|
||||
if (procGroup == 0)
|
||||
{
|
||||
out_numThreadsPerProcGroup++;
|
||||
}
|
||||
}
|
||||
}
|
||||
pBuffer = PtrAdd(pBuffer, pBuffer->Size);
|
||||
}
|
||||
|
||||
|
||||
#elif defined(__linux__) || defined (__gnu_linux__)
|
||||
|
||||
// Parse /proc/cpuinfo to get full topology
|
||||
std::ifstream input("/proc/cpuinfo");
|
||||
std::string line;
|
||||
char* c;
|
||||
uint32_t threadId = uint32_t(-1);
|
||||
uint32_t coreId = uint32_t(-1);
|
||||
uint32_t numaId = uint32_t(-1);
|
||||
|
||||
while (std::getline(input, line))
|
||||
{
|
||||
if (line.find("processor") != std::string::npos)
|
||||
{
|
||||
if (threadId != uint32_t(-1))
|
||||
{
|
||||
// Save information.
|
||||
if (out_nodes.size() <= numaId) out_nodes.resize(numaId + 1);
|
||||
auto& numaNode = out_nodes[numaId];
|
||||
if (numaNode.cores.size() <= coreId) numaNode.cores.resize(coreId + 1);
|
||||
auto& core = numaNode.cores[coreId];
|
||||
|
||||
core.procGroup = coreId;
|
||||
core.threadIds.push_back(threadId);
|
||||
|
||||
out_numThreadsPerProcGroup++;
|
||||
}
|
||||
|
||||
auto data_start = line.find(": ") + 2;
|
||||
threadId = std::strtoul(&line.c_str()[data_start], &c, 10);
|
||||
continue;
|
||||
}
|
||||
if (line.find("core id") != std::string::npos)
|
||||
{
|
||||
auto data_start = line.find(": ") + 2;
|
||||
coreId = std::strtoul(&line.c_str()[data_start], &c, 10);
|
||||
continue;
|
||||
}
|
||||
if (line.find("physical id") != std::string::npos)
|
||||
{
|
||||
auto data_start = line.find(": ") + 2;
|
||||
numaId = std::strtoul(&line.c_str()[data_start], &c, 10);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (threadId != uint32_t(-1))
|
||||
{
|
||||
// Save information.
|
||||
if (out_nodes.size() <= numaId) out_nodes.resize(numaId + 1);
|
||||
auto& numaNode = out_nodes[numaId];
|
||||
if (numaNode.cores.size() <= coreId) numaNode.cores.resize(coreId + 1);
|
||||
auto& core = numaNode.cores[coreId];
|
||||
|
||||
core.procGroup = coreId;
|
||||
core.threadIds.push_back(threadId);
|
||||
out_numThreadsPerProcGroup++;
|
||||
}
|
||||
|
||||
for (uint32_t node = 0; node < out_nodes.size(); node++) {
|
||||
auto& numaNode = out_nodes[node];
|
||||
auto it = numaNode.cores.begin();
|
||||
for ( ; it != numaNode.cores.end(); ) {
|
||||
if (it->threadIds.size() == 0)
|
||||
numaNode.cores.erase(it);
|
||||
else
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#error Unsupported platform
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void bindThread(uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=false)
|
||||
{
|
||||
// Only bind threads when MAX_WORKER_THREADS isn't set.
|
||||
if (KNOB_MAX_WORKER_THREADS && bindProcGroup == false)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
{
|
||||
GROUP_AFFINITY affinity = {};
|
||||
affinity.Group = procGroupId;
|
||||
|
||||
#if !defined(_WIN64)
|
||||
if (threadId >= 32)
|
||||
{
|
||||
// In a 32-bit process on Windows it is impossible to bind
|
||||
// to logical processors 32-63 within a processor group.
|
||||
// In this case set the mask to 0 and let the system assign
|
||||
// the processor. Hopefully it will make smart choices.
|
||||
affinity.Mask = 0;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
// If KNOB_MAX_WORKER_THREADS is set, only bind to the proc group,
|
||||
// Not the individual HW thread.
|
||||
if (!KNOB_MAX_WORKER_THREADS)
|
||||
{
|
||||
affinity.Mask = KAFFINITY(1) << threadId;
|
||||
}
|
||||
}
|
||||
|
||||
SetThreadGroupAffinity(GetCurrentThread(), &affinity, nullptr);
|
||||
}
|
||||
#else
|
||||
cpu_set_t cpuset;
|
||||
pthread_t thread = pthread_self();
|
||||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(threadId, &cpuset);
|
||||
|
||||
pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
|
||||
#endif
|
||||
}
|
||||
|
||||
INLINE
|
||||
uint64_t GetEnqueuedDraw(SWR_CONTEXT *pContext)
|
||||
{
|
||||
//uint64_t result = _InterlockedCompareExchange64((volatile __int64*)&pContext->DrawEnqueued, 0, 0);
|
||||
//return result;
|
||||
return pContext->DrawEnqueued;
|
||||
}
|
||||
|
||||
INLINE
|
||||
DRAW_CONTEXT *GetDC(SWR_CONTEXT *pContext, uint64_t drawId)
|
||||
{
|
||||
return &pContext->dcRing[(drawId-1) % KNOB_MAX_DRAWS_IN_FLIGHT];
|
||||
}
|
||||
|
||||
// returns true if dependency not met
|
||||
INLINE
|
||||
bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint64_t lastRetiredDraw)
|
||||
{
|
||||
return (pDC->dependency > lastRetiredDraw);
|
||||
}
|
||||
|
||||
void ClearColorHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
|
||||
{
|
||||
// Load clear color into SIMD register...
|
||||
float *pClearData = (float*)(pHotTile->clearData);
|
||||
simdscalar valR = _simd_broadcast_ss(&pClearData[0]);
|
||||
simdscalar valG = _simd_broadcast_ss(&pClearData[1]);
|
||||
simdscalar valB = _simd_broadcast_ss(&pClearData[2]);
|
||||
simdscalar valA = _simd_broadcast_ss(&pClearData[3]);
|
||||
|
||||
float *pfBuf = (float*)pHotTile->pBuffer;
|
||||
uint32_t numSamples = pHotTile->numSamples;
|
||||
|
||||
for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
|
||||
{
|
||||
for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
|
||||
{
|
||||
for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM) //SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM); si++)
|
||||
{
|
||||
_simd_store_ps(pfBuf, valR);
|
||||
pfBuf += KNOB_SIMD_WIDTH;
|
||||
_simd_store_ps(pfBuf, valG);
|
||||
pfBuf += KNOB_SIMD_WIDTH;
|
||||
_simd_store_ps(pfBuf, valB);
|
||||
pfBuf += KNOB_SIMD_WIDTH;
|
||||
_simd_store_ps(pfBuf, valA);
|
||||
pfBuf += KNOB_SIMD_WIDTH;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ClearDepthHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
|
||||
{
|
||||
// Load clear color into SIMD register...
|
||||
float *pClearData = (float*)(pHotTile->clearData);
|
||||
simdscalar valZ = _simd_broadcast_ss(&pClearData[0]);
|
||||
|
||||
float *pfBuf = (float*)pHotTile->pBuffer;
|
||||
uint32_t numSamples = pHotTile->numSamples;
|
||||
|
||||
for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
|
||||
{
|
||||
for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
|
||||
{
|
||||
for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM)
|
||||
{
|
||||
_simd_store_ps(pfBuf, valZ);
|
||||
pfBuf += KNOB_SIMD_WIDTH;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ClearStencilHotTile(const HOTTILE* pHotTile)
|
||||
{
|
||||
// convert from F32 to U8.
|
||||
uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
|
||||
//broadcast 32x into __m256i...
|
||||
simdscalari valS = _simd_set1_epi8(clearVal);
|
||||
|
||||
simdscalari* pBuf = (simdscalari*)pHotTile->pBuffer;
|
||||
uint32_t numSamples = pHotTile->numSamples;
|
||||
|
||||
for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
|
||||
{
|
||||
for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
|
||||
{
|
||||
// We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
|
||||
for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM * 4)
|
||||
{
|
||||
_simd_store_si(pBuf, valS);
|
||||
pBuf += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for draw calls, we initialize the active hot tiles and perform deferred
|
||||
// load on them if tile is in invalid state. we do this in the outer thread loop instead of inside
|
||||
// the draw routine itself mainly for performance, to avoid unnecessary setup
|
||||
// every triangle
|
||||
// @todo support deferred clear
|
||||
INLINE
|
||||
void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, const TRIANGLE_WORK_DESC* pWork)
|
||||
{
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
HotTileMgr *pHotTileMgr = pContext->pHotTileMgr;
|
||||
|
||||
uint32_t x, y;
|
||||
MacroTileMgr::getTileIndices(macroID, x, y);
|
||||
x *= KNOB_MACROTILE_X_DIM;
|
||||
y *= KNOB_MACROTILE_Y_DIM;
|
||||
|
||||
uint32_t numSamples = GetNumSamples(state.rastState.sampleCount);
|
||||
|
||||
// check RT if enabled
|
||||
unsigned long rtSlot = 0;
|
||||
uint32_t colorHottileEnableMask = state.colorHottileEnable;
|
||||
while(_BitScanForward(&rtSlot, colorHottileEnableMask))
|
||||
{
|
||||
HOTTILE* pHotTile = pHotTileMgr->GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples);
|
||||
|
||||
if (pHotTile->state == HOTTILE_INVALID)
|
||||
{
|
||||
RDTSC_START(BELoadTiles);
|
||||
// invalid hottile before draw requires a load from surface before we can draw to it
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_STOP(BELoadTiles, 0, 0);
|
||||
}
|
||||
else if (pHotTile->state == HOTTILE_CLEAR)
|
||||
{
|
||||
RDTSC_START(BELoadTiles);
|
||||
// Clear the tile.
|
||||
ClearColorHotTile(pHotTile);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_STOP(BELoadTiles, 0, 0);
|
||||
}
|
||||
colorHottileEnableMask &= ~(1 << rtSlot);
|
||||
}
|
||||
|
||||
// check depth if enabled
|
||||
if (state.depthHottileEnable)
|
||||
{
|
||||
HOTTILE* pHotTile = pHotTileMgr->GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
|
||||
if (pHotTile->state == HOTTILE_INVALID)
|
||||
{
|
||||
RDTSC_START(BELoadTiles);
|
||||
// invalid hottile before draw requires a load from surface before we can draw to it
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_STOP(BELoadTiles, 0, 0);
|
||||
}
|
||||
else if (pHotTile->state == HOTTILE_CLEAR)
|
||||
{
|
||||
RDTSC_START(BELoadTiles);
|
||||
// Clear the tile.
|
||||
ClearDepthHotTile(pHotTile);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_STOP(BELoadTiles, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// check stencil if enabled
|
||||
if (state.stencilHottileEnable)
|
||||
{
|
||||
HOTTILE* pHotTile = pHotTileMgr->GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
|
||||
if (pHotTile->state == HOTTILE_INVALID)
|
||||
{
|
||||
RDTSC_START(BELoadTiles);
|
||||
// invalid hottile before draw requires a load from surface before we can draw to it
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_STOP(BELoadTiles, 0, 0);
|
||||
}
|
||||
else if (pHotTile->state == HOTTILE_CLEAR)
|
||||
{
|
||||
RDTSC_START(BELoadTiles);
|
||||
// Clear the tile.
|
||||
ClearStencilHotTile(pHotTile);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_STOP(BELoadTiles, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE)
|
||||
{
|
||||
// increment our current draw id to the first incomplete draw
|
||||
uint64_t drawEnqueued = GetEnqueuedDraw(pContext);
|
||||
while (curDrawBE < drawEnqueued)
|
||||
{
|
||||
DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT];
|
||||
|
||||
// If its not compute and FE is not done then break out of loop.
|
||||
if (!pDC->doneFE && !pDC->isCompute) break;
|
||||
|
||||
bool isWorkComplete = (pDC->isCompute) ?
|
||||
pDC->pDispatch->isWorkComplete() : pDC->pTileMgr->isWorkComplete();
|
||||
|
||||
if (isWorkComplete)
|
||||
{
|
||||
curDrawBE++;
|
||||
InterlockedIncrement(&pDC->threadsDoneBE);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If there are no more incomplete draws then return false.
|
||||
return (curDrawBE >= drawEnqueued) ? false : true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief If there is any BE work then go work on it.
|
||||
/// @param pContext - pointer to SWR context.
|
||||
/// @param workerId - The unique worker ID that is assigned to this thread.
|
||||
/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread
|
||||
/// has its own curDrawBE counter and this ensures that each worker processes all the
|
||||
/// draws in order.
|
||||
/// @param lockedTiles - This is the set of tiles locked by other threads. Each thread maintains its
|
||||
/// own set and each time it fails to lock a macrotile, because its already locked,
|
||||
/// then it will add that tile to the lockedTiles set. As a worker begins to work
|
||||
/// on future draws the lockedTiles ensure that it doesn't work on tiles that may
|
||||
/// still have work pending in a previous draw. Additionally, the lockedTiles is
|
||||
/// hueristic that can steer a worker back to the same macrotile that it had been
|
||||
/// working on in a previous draw.
|
||||
void WorkOnFifoBE(
|
||||
SWR_CONTEXT *pContext,
|
||||
uint32_t workerId,
|
||||
uint64_t &curDrawBE,
|
||||
std::unordered_set<uint32_t>& lockedTiles)
|
||||
{
|
||||
// Find the first incomplete draw that has pending work. If no such draw is found then
|
||||
// return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE.
|
||||
if (FindFirstIncompleteDraw(pContext, curDrawBE) == false)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1;
|
||||
|
||||
// Reset our history for locked tiles. We'll have to re-learn which tiles are locked.
|
||||
lockedTiles.clear();
|
||||
|
||||
// Try to work on each draw in order of the available draws in flight.
|
||||
// 1. If we're on curDrawBE, we can work on any macrotile that is available.
|
||||
// 2. If we're trying to work on draws after curDrawBE, we are restricted to
|
||||
// working on those macrotiles that are known to be complete in the prior draw to
|
||||
// maintain order. The locked tiles provides the history to ensures this.
|
||||
for (uint64_t i = curDrawBE; i < GetEnqueuedDraw(pContext); ++i)
|
||||
{
|
||||
DRAW_CONTEXT *pDC = &pContext->dcRing[i % KNOB_MAX_DRAWS_IN_FLIGHT];
|
||||
|
||||
if (pDC->isCompute) return; // We don't look at compute work.
|
||||
|
||||
// First wait for FE to be finished with this draw. This keeps threading model simple
|
||||
// but if there are lots of bubbles between draws then serializing FE and BE may
|
||||
// need to be revisited.
|
||||
if (!pDC->doneFE) return;
|
||||
|
||||
// If this draw is dependent on a previous draw then we need to bail.
|
||||
if (CheckDependency(pContext, pDC, lastRetiredDraw))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Grab the list of all dirty macrotiles. A tile is dirty if it has work queued to it.
|
||||
std::vector<uint32_t> ¯oTiles = pDC->pTileMgr->getDirtyTiles();
|
||||
|
||||
for (uint32_t tileID : macroTiles)
|
||||
{
|
||||
MacroTileQueue &tile = pDC->pTileMgr->getMacroTileQueue(tileID);
|
||||
|
||||
// can only work on this draw if it's not in use by other threads
|
||||
if (lockedTiles.find(tileID) == lockedTiles.end())
|
||||
{
|
||||
if (tile.getNumQueued())
|
||||
{
|
||||
if (tile.tryLock())
|
||||
{
|
||||
BE_WORK *pWork;
|
||||
|
||||
RDTSC_START(WorkerFoundWork);
|
||||
|
||||
uint32_t numWorkItems = tile.getNumQueued();
|
||||
|
||||
if (numWorkItems != 0)
|
||||
{
|
||||
pWork = tile.peek();
|
||||
SWR_ASSERT(pWork);
|
||||
if (pWork->type == DRAW)
|
||||
{
|
||||
InitializeHotTiles(pContext, pDC, tileID, (const TRIANGLE_WORK_DESC*)&pWork->desc);
|
||||
}
|
||||
}
|
||||
|
||||
while ((pWork = tile.peek()) != nullptr)
|
||||
{
|
||||
pWork->pfnWork(pDC, workerId, tileID, &pWork->desc);
|
||||
tile.dequeue();
|
||||
}
|
||||
RDTSC_STOP(WorkerFoundWork, numWorkItems, pDC->drawId);
|
||||
|
||||
_ReadWriteBarrier();
|
||||
|
||||
pDC->pTileMgr->markTileComplete(tileID);
|
||||
|
||||
// Optimization: If the draw is complete and we're the last one to have worked on it then
|
||||
// we can reset the locked list as we know that all previous draws before the next are guaranteed to be complete.
|
||||
if ((curDrawBE == i) && pDC->pTileMgr->isWorkComplete())
|
||||
{
|
||||
// We can increment the current BE and safely move to next draw since we know this draw is complete.
|
||||
curDrawBE++;
|
||||
InterlockedIncrement(&pDC->threadsDoneBE);
|
||||
|
||||
lastRetiredDraw++;
|
||||
|
||||
lockedTiles.clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// This tile is already locked. So let's add it to our locked tiles set. This way we don't try locking this one again.
|
||||
lockedTiles.insert(tileID);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE, UCHAR numaNode)
|
||||
{
|
||||
// Try to grab the next DC from the ring
|
||||
uint64_t drawEnqueued = GetEnqueuedDraw(pContext);
|
||||
while (curDrawFE < drawEnqueued)
|
||||
{
|
||||
uint32_t dcSlot = curDrawFE % KNOB_MAX_DRAWS_IN_FLIGHT;
|
||||
DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
|
||||
if (pDC->isCompute || pDC->doneFE || pDC->FeLock)
|
||||
{
|
||||
curDrawFE++;
|
||||
InterlockedIncrement(&pDC->threadsDoneFE);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t curDraw = curDrawFE;
|
||||
while (curDraw < drawEnqueued)
|
||||
{
|
||||
uint32_t dcSlot = curDraw % KNOB_MAX_DRAWS_IN_FLIGHT;
|
||||
DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
|
||||
|
||||
if (!pDC->isCompute && !pDC->FeLock)
|
||||
{
|
||||
uint32_t initial = InterlockedCompareExchange((volatile uint32_t*)&pDC->FeLock, 1, 0);
|
||||
if (initial == 0)
|
||||
{
|
||||
// successfully grabbed the DC, now run the FE
|
||||
pDC->FeWork.pfnWork(pContext, pDC, workerId, &pDC->FeWork.desc);
|
||||
|
||||
_ReadWriteBarrier();
|
||||
pDC->doneFE = true;
|
||||
}
|
||||
}
|
||||
curDraw++;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief If there is any compute work then go work on it.
|
||||
/// @param pContext - pointer to SWR context.
|
||||
/// @param workerId - The unique worker ID that is assigned to this thread.
|
||||
/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread
|
||||
/// has its own curDrawBE counter and this ensures that each worker processes all the
|
||||
/// draws in order.
|
||||
void WorkOnCompute(
|
||||
SWR_CONTEXT *pContext,
|
||||
uint32_t workerId,
|
||||
uint64_t& curDrawBE)
|
||||
{
|
||||
if (FindFirstIncompleteDraw(pContext, curDrawBE) == false)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t lastRetiredDraw = pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT].drawId - 1;
|
||||
|
||||
DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % KNOB_MAX_DRAWS_IN_FLIGHT];
|
||||
if (pDC->isCompute == false) return;
|
||||
|
||||
// check dependencies
|
||||
if (CheckDependency(pContext, pDC, lastRetiredDraw))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
SWR_ASSERT(pDC->pDispatch != nullptr);
|
||||
DispatchQueue& queue = *pDC->pDispatch;
|
||||
|
||||
// Is there any work remaining?
|
||||
if (queue.getNumQueued() > 0)
|
||||
{
|
||||
bool lastToComplete = false;
|
||||
|
||||
uint32_t threadGroupId = 0;
|
||||
while (queue.getWork(threadGroupId))
|
||||
{
|
||||
ProcessComputeBE(pDC, workerId, threadGroupId);
|
||||
|
||||
lastToComplete = queue.finishedWork();
|
||||
}
|
||||
|
||||
_ReadWriteBarrier();
|
||||
|
||||
if (lastToComplete)
|
||||
{
|
||||
SWR_ASSERT(queue.isWorkComplete() == true);
|
||||
pDC->doneCompute = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DWORD workerThreadMain(LPVOID pData)
|
||||
{
|
||||
THREAD_DATA *pThreadData = (THREAD_DATA*)pData;
|
||||
SWR_CONTEXT *pContext = pThreadData->pContext;
|
||||
uint32_t threadId = pThreadData->threadId;
|
||||
uint32_t workerId = pThreadData->workerId;
|
||||
|
||||
bindThread(threadId, pThreadData->procGroupId, pThreadData->forceBindProcGroup);
|
||||
|
||||
RDTSC_INIT(threadId);
|
||||
|
||||
int numaNode = (int)pThreadData->numaId;
|
||||
|
||||
// flush denormals to 0
|
||||
_mm_setcsr(_mm_getcsr() | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
|
||||
|
||||
// Track tiles locked by other threads. If we try to lock a macrotile and find its already
|
||||
// locked then we'll add it to this list so that we don't try and lock it again.
|
||||
std::unordered_set<uint32_t> lockedTiles;
|
||||
|
||||
// each worker has the ability to work on any of the queued draws as long as certain
|
||||
// conditions are met. the data associated
|
||||
// with a draw is guaranteed to be active as long as a worker hasn't signaled that he
|
||||
// has moved on to the next draw when he determines there is no more work to do. The api
|
||||
// thread will not increment the head of the dc ring until all workers have moved past the
|
||||
// current head.
|
||||
// the logic to determine what to work on is:
|
||||
// 1- try to work on the FE any draw that is queued. For now there are no dependencies
|
||||
// on the FE work, so any worker can grab any FE and process in parallel. Eventually
|
||||
// we'll need dependency tracking to force serialization on FEs. The worker will try
|
||||
// to pick an FE by atomically incrementing a counter in the swr context. he'll keep
|
||||
// trying until he reaches the tail.
|
||||
// 2- BE work must be done in strict order. we accomplish this today by pulling work off
|
||||
// the oldest draw (ie the head) of the dcRing. the worker can determine if there is
|
||||
// any work left by comparing the total # of binned work items and the total # of completed
|
||||
// work items. If they are equal, then there is no more work to do for this draw, and
|
||||
// the worker can safely increment its oldestDraw counter and move on to the next draw.
|
||||
std::unique_lock<std::mutex> lock(pContext->WaitLock, std::defer_lock);
|
||||
|
||||
auto threadHasWork = [&](uint64_t curDraw) { return curDraw != pContext->DrawEnqueued; };
|
||||
|
||||
uint64_t curDrawBE = 1;
|
||||
uint64_t curDrawFE = 1;
|
||||
|
||||
while (pContext->threadPool.inThreadShutdown == false)
|
||||
{
|
||||
uint32_t loop = 0;
|
||||
while (loop++ < KNOB_WORKER_SPIN_LOOP_COUNT && !threadHasWork(curDrawBE))
|
||||
{
|
||||
_mm_pause();
|
||||
}
|
||||
|
||||
if (!threadHasWork(curDrawBE))
|
||||
{
|
||||
lock.lock();
|
||||
|
||||
// check for thread idle condition again under lock
|
||||
if (threadHasWork(curDrawBE))
|
||||
{
|
||||
lock.unlock();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pContext->threadPool.inThreadShutdown)
|
||||
{
|
||||
lock.unlock();
|
||||
break;
|
||||
}
|
||||
|
||||
RDTSC_START(WorkerWaitForThreadEvent);
|
||||
|
||||
pContext->FifosNotEmpty.wait(lock);
|
||||
lock.unlock();
|
||||
|
||||
RDTSC_STOP(WorkerWaitForThreadEvent, 0, 0);
|
||||
|
||||
if (pContext->threadPool.inThreadShutdown)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
RDTSC_START(WorkerWorkOnFifoBE);
|
||||
WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles);
|
||||
RDTSC_STOP(WorkerWorkOnFifoBE, 0, 0);
|
||||
|
||||
WorkOnCompute(pContext, workerId, curDrawBE);
|
||||
|
||||
WorkOnFifoFE(pContext, workerId, curDrawFE, numaNode);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DWORD workerThreadInit(LPVOID pData)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
__try
|
||||
#endif // _WIN32
|
||||
{
|
||||
return workerThreadMain(pData);
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
__except(EXCEPTION_CONTINUE_SEARCH)
|
||||
{
|
||||
}
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
|
||||
{
|
||||
bindThread(0);
|
||||
|
||||
CPUNumaNodes nodes;
|
||||
uint32_t numThreadsPerProcGroup = 0;
|
||||
CalculateProcessorTopology(nodes, numThreadsPerProcGroup);
|
||||
|
||||
uint32_t numHWNodes = (uint32_t)nodes.size();
|
||||
uint32_t numHWCoresPerNode = (uint32_t)nodes[0].cores.size();
|
||||
uint32_t numHWHyperThreads = (uint32_t)nodes[0].cores[0].threadIds.size();
|
||||
|
||||
uint32_t numNodes = numHWNodes;
|
||||
uint32_t numCoresPerNode = numHWCoresPerNode;
|
||||
uint32_t numHyperThreads = numHWHyperThreads;
|
||||
|
||||
if (KNOB_MAX_NUMA_NODES)
|
||||
{
|
||||
numNodes = std::min(numNodes, KNOB_MAX_NUMA_NODES);
|
||||
}
|
||||
|
||||
if (KNOB_MAX_CORES_PER_NUMA_NODE)
|
||||
{
|
||||
numCoresPerNode = std::min(numCoresPerNode, KNOB_MAX_CORES_PER_NUMA_NODE);
|
||||
}
|
||||
|
||||
if (KNOB_MAX_THREADS_PER_CORE)
|
||||
{
|
||||
numHyperThreads = std::min(numHyperThreads, KNOB_MAX_THREADS_PER_CORE);
|
||||
}
|
||||
|
||||
// Calculate numThreads
|
||||
uint32_t numThreads = numNodes * numCoresPerNode * numHyperThreads;
|
||||
|
||||
if (KNOB_MAX_WORKER_THREADS)
|
||||
{
|
||||
uint32_t maxHWThreads = numHWNodes * numHWCoresPerNode * numHWHyperThreads;
|
||||
numThreads = std::min(KNOB_MAX_WORKER_THREADS, maxHWThreads);
|
||||
}
|
||||
|
||||
if (numThreads > KNOB_MAX_NUM_THREADS)
|
||||
{
|
||||
printf("WARNING: system thread count %u exceeds max %u, "
|
||||
"performance will be degraded\n",
|
||||
numThreads, KNOB_MAX_NUM_THREADS);
|
||||
}
|
||||
|
||||
if (numThreads == 1)
|
||||
{
|
||||
// If only 1 worker thread, try to move it to an available
|
||||
// HW thread. If that fails, use the API thread.
|
||||
if (numCoresPerNode < numHWCoresPerNode)
|
||||
{
|
||||
numCoresPerNode++;
|
||||
}
|
||||
else if (numHyperThreads < numHWHyperThreads)
|
||||
{
|
||||
numHyperThreads++;
|
||||
}
|
||||
else if (numNodes < numHWNodes)
|
||||
{
|
||||
numNodes++;
|
||||
}
|
||||
else
|
||||
{
|
||||
pPool->numThreads = 0;
|
||||
SET_KNOB(SINGLE_THREADED, true);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Save a HW thread for the API thread.
|
||||
numThreads--;
|
||||
}
|
||||
|
||||
pPool->numThreads = numThreads;
|
||||
pContext->NumWorkerThreads = pPool->numThreads;
|
||||
|
||||
pPool->inThreadShutdown = false;
|
||||
pPool->pThreadData = (THREAD_DATA *)malloc(pPool->numThreads * sizeof(THREAD_DATA));
|
||||
|
||||
if (KNOB_MAX_WORKER_THREADS)
|
||||
{
|
||||
bool bForceBindProcGroup = (numThreads > numThreadsPerProcGroup);
|
||||
uint32_t numProcGroups = (numThreads + numThreadsPerProcGroup - 1) / numThreadsPerProcGroup;
|
||||
// When MAX_WORKER_THREADS is set we don't bother to bind to specific HW threads
|
||||
// But Windows will still require binding to specific process groups
|
||||
for (uint32_t workerId = 0; workerId < numThreads; ++workerId)
|
||||
{
|
||||
pPool->pThreadData[workerId].workerId = workerId;
|
||||
pPool->pThreadData[workerId].procGroupId = workerId % numProcGroups;
|
||||
pPool->pThreadData[workerId].threadId = 0;
|
||||
pPool->pThreadData[workerId].numaId = 0;
|
||||
pPool->pThreadData[workerId].pContext = pContext;
|
||||
pPool->pThreadData[workerId].forceBindProcGroup = bForceBindProcGroup;
|
||||
pPool->threads[workerId] = new std::thread(workerThreadInit, &pPool->pThreadData[workerId]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t workerId = 0;
|
||||
for (uint32_t n = 0; n < numNodes; ++n)
|
||||
{
|
||||
auto& node = nodes[n];
|
||||
|
||||
uint32_t numCores = numCoresPerNode;
|
||||
for (uint32_t c = 0; c < numCores; ++c)
|
||||
{
|
||||
auto& core = node.cores[c];
|
||||
for (uint32_t t = 0; t < numHyperThreads; ++t)
|
||||
{
|
||||
if (c == 0 && n == 0 && t == 0)
|
||||
{
|
||||
// Skip core 0, thread0 on node 0 to reserve for API thread
|
||||
continue;
|
||||
}
|
||||
|
||||
pPool->pThreadData[workerId].workerId = workerId;
|
||||
pPool->pThreadData[workerId].procGroupId = core.procGroup;
|
||||
pPool->pThreadData[workerId].threadId = core.threadIds[t];
|
||||
pPool->pThreadData[workerId].numaId = n;
|
||||
pPool->pThreadData[workerId].pContext = pContext;
|
||||
pPool->threads[workerId] = new std::thread(workerThreadInit, &pPool->pThreadData[workerId]);
|
||||
|
||||
++workerId;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
|
||||
{
|
||||
if (!KNOB_SINGLE_THREADED)
|
||||
{
|
||||
// Inform threads to finish up
|
||||
std::unique_lock<std::mutex> lock(pContext->WaitLock);
|
||||
pPool->inThreadShutdown = true;
|
||||
_mm_mfence();
|
||||
pContext->FifosNotEmpty.notify_all();
|
||||
lock.unlock();
|
||||
|
||||
// Wait for threads to finish and destroy them
|
||||
for (uint32_t t = 0; t < pPool->numThreads; ++t)
|
||||
{
|
||||
pPool->threads[t]->join();
|
||||
delete(pPool->threads[t]);
|
||||
}
|
||||
|
||||
// Clean up data used by threads
|
||||
free(pPool->pThreadData);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file threads.h
|
||||
*
|
||||
* @brief Definitions for SWR threading model.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include "knobs.h"
|
||||
|
||||
#include <unordered_set>
|
||||
#include <thread>
|
||||
typedef std::thread* THREAD_PTR;
|
||||
|
||||
struct SWR_CONTEXT;
|
||||
|
||||
struct THREAD_DATA
|
||||
{
|
||||
uint32_t procGroupId; // Will always be 0 for non-Windows OS
|
||||
uint32_t threadId; // within the procGroup for Windows
|
||||
uint32_t numaId; // NUMA node id
|
||||
uint32_t workerId;
|
||||
SWR_CONTEXT *pContext;
|
||||
bool forceBindProcGroup; // Only useful when KNOB_MAX_WORKER_THREADS is set.
|
||||
};
|
||||
|
||||
|
||||
struct THREAD_POOL
|
||||
{
|
||||
THREAD_PTR threads[KNOB_MAX_NUM_THREADS];
|
||||
uint32_t numThreads;
|
||||
volatile bool inThreadShutdown;
|
||||
THREAD_DATA *pThreadData;
|
||||
};
|
||||
|
||||
void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
|
||||
void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
|
||||
|
||||
// Expose FE and BE worker functions to the API thread if single threaded
|
||||
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE, UCHAR numaNode);
|
||||
void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE, std::unordered_set<uint32_t> &usedTiles);
|
||||
void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE);
|
|
@ -0,0 +1,105 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file tilemgr.cpp
|
||||
*
|
||||
* @brief Implementation for Macro Tile Manager which provides the facilities
|
||||
* for threads to work on an macro tile.
|
||||
*
|
||||
******************************************************************************/
|
||||
#include <unordered_map>
|
||||
|
||||
#include "fifo.hpp"
|
||||
#include "tilemgr.h"
|
||||
|
||||
#define TILE_ID(x,y) ((x << 16 | y))
|
||||
|
||||
// override new/delete for alignment
|
||||
void *MacroTileMgr::operator new(size_t size)
|
||||
{
|
||||
return _aligned_malloc(size, 64);
|
||||
}
|
||||
|
||||
void MacroTileMgr::operator delete(void *p)
|
||||
{
|
||||
_aligned_free(p);
|
||||
}
|
||||
|
||||
void* DispatchQueue::operator new(size_t size)
|
||||
{
|
||||
return _aligned_malloc(size, 64);
|
||||
}
|
||||
|
||||
void DispatchQueue::operator delete(void *p)
|
||||
{
|
||||
_aligned_free(p);
|
||||
}
|
||||
|
||||
MacroTileMgr::MacroTileMgr(Arena& arena) : mArena(arena)
|
||||
{
|
||||
}
|
||||
|
||||
void MacroTileMgr::initialize()
|
||||
{
|
||||
mWorkItemsProduced = 0;
|
||||
mWorkItemsConsumed = 0;
|
||||
|
||||
mDirtyTiles.clear();
|
||||
}
|
||||
|
||||
void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK *pWork)
|
||||
{
|
||||
// Should not enqueue more then what we have backing for in the hot tile manager.
|
||||
SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
|
||||
SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
|
||||
|
||||
uint32_t id = TILE_ID(x, y);
|
||||
|
||||
MacroTileQueue &tile = mTiles[id];
|
||||
tile.mWorkItemsFE++;
|
||||
|
||||
if (tile.mWorkItemsFE == 1)
|
||||
{
|
||||
tile.clear(mArena);
|
||||
mDirtyTiles.push_back(id);
|
||||
}
|
||||
|
||||
mWorkItemsProduced++;
|
||||
tile.enqueue_try_nosync(mArena, pWork);
|
||||
}
|
||||
|
||||
void MacroTileMgr::markTileComplete(uint32_t id)
|
||||
{
|
||||
SWR_ASSERT(mTiles.find(id) != mTiles.end());
|
||||
MacroTileQueue &tile = mTiles[id];
|
||||
uint32_t numTiles = tile.mWorkItemsFE;
|
||||
InterlockedExchangeAdd(&mWorkItemsConsumed, numTiles);
|
||||
|
||||
_ReadWriteBarrier();
|
||||
tile.mWorkItemsBE += numTiles;
|
||||
SWR_ASSERT(tile.mWorkItemsFE == tile.mWorkItemsBE);
|
||||
|
||||
// clear out tile, but defer fifo clear until the next DC first queues to it.
|
||||
// this prevents worker threads from constantly locking a completed macro tile
|
||||
tile.mWorkItemsFE = 0;
|
||||
tile.mWorkItemsBE = 0;
|
||||
}
|
|
@ -0,0 +1,390 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file tilemgr.h
|
||||
*
|
||||
* @brief Definitions for Macro Tile Manager which provides the facilities
|
||||
* for threads to work on an macro tile.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include "common/formats.h"
|
||||
#include "fifo.hpp"
|
||||
#include "context.h"
|
||||
#include "format_traits.h"
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// MacroTile - work queue for a tile.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct MacroTileQueue
|
||||
{
|
||||
MacroTileQueue() { }
|
||||
~MacroTileQueue() { }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Returns number of work items queued for this tile.
|
||||
uint32_t getNumQueued()
|
||||
{
|
||||
return mFifo.getNumQueued();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Attempt to lock the work fifo. If already locked then return false.
|
||||
bool tryLock()
|
||||
{
|
||||
return mFifo.tryLock();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Clear fifo and unlock it.
|
||||
void clear(Arena& arena)
|
||||
{
|
||||
mFifo.clear(arena);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Peek at work sitting at the front of the fifo.
|
||||
BE_WORK* peek()
|
||||
{
|
||||
return mFifo.peek();
|
||||
}
|
||||
|
||||
bool enqueue_try_nosync(Arena& arena, const BE_WORK* entry)
|
||||
{
|
||||
return mFifo.enqueue_try_nosync(arena, entry);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Move to next work item
|
||||
void dequeue()
|
||||
{
|
||||
mFifo.dequeue_noinc();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Destroy fifo
|
||||
void destroy()
|
||||
{
|
||||
mFifo.destroy();
|
||||
}
|
||||
|
||||
///@todo This will all be private.
|
||||
uint32_t mWorkItemsFE = 0;
|
||||
uint32_t mWorkItemsBE = 0;
|
||||
|
||||
private:
|
||||
QUEUE<BE_WORK> mFifo;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// MacroTileMgr - Manages macrotiles for a draw.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
class MacroTileMgr
|
||||
{
|
||||
public:
|
||||
MacroTileMgr(Arena& arena);
|
||||
~MacroTileMgr()
|
||||
{
|
||||
for (auto &tile : mTiles)
|
||||
{
|
||||
tile.second.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
void initialize();
|
||||
INLINE std::vector<uint32_t>& getDirtyTiles() { return mDirtyTiles; }
|
||||
INLINE MacroTileQueue& getMacroTileQueue(uint32_t id) { return mTiles[id]; }
|
||||
void markTileComplete(uint32_t id);
|
||||
|
||||
INLINE bool isWorkComplete()
|
||||
{
|
||||
return mWorkItemsProduced == mWorkItemsConsumed;
|
||||
}
|
||||
|
||||
void enqueue(uint32_t x, uint32_t y, BE_WORK *pWork);
|
||||
|
||||
static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y)
|
||||
{
|
||||
y = tileID & 0xffff;
|
||||
x = (tileID >> 16) & 0xffff;
|
||||
}
|
||||
|
||||
void *operator new(size_t size);
|
||||
void operator delete (void *p);
|
||||
|
||||
private:
|
||||
Arena& mArena;
|
||||
SWR_FORMAT mFormat;
|
||||
std::unordered_map<uint32_t, MacroTileQueue> mTiles;
|
||||
|
||||
// Any tile that has work queued to it is a dirty tile.
|
||||
std::vector<uint32_t> mDirtyTiles;
|
||||
|
||||
OSALIGNLINE(LONG) mWorkItemsProduced;
|
||||
OSALIGNLINE(volatile LONG) mWorkItemsConsumed;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// DispatchQueue - work queue for dispatch
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
class DispatchQueue
|
||||
{
|
||||
public:
|
||||
DispatchQueue() {}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Setup the producer consumer counts.
|
||||
void initialize(uint32_t totalTasks, void* pTaskData)
|
||||
{
|
||||
// The available and outstanding counts start with total tasks.
|
||||
// At the start there are N tasks available and outstanding.
|
||||
// When both the available and outstanding counts have reached 0 then all work has completed.
|
||||
// When a worker starts on a threadgroup then it decrements the available count.
|
||||
// When a worker completes a threadgroup then it decrements the outstanding count.
|
||||
|
||||
mTasksAvailable = totalTasks;
|
||||
mTasksOutstanding = totalTasks;
|
||||
|
||||
mpTaskData = pTaskData;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Returns number of tasks available for this dispatch.
|
||||
uint32_t getNumQueued()
|
||||
{
|
||||
return (mTasksAvailable > 0) ? mTasksAvailable : 0;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Atomically decrement the work available count. If the result
|
||||
// is greater than 0 then we can on the associated thread group.
|
||||
// Otherwise, there is no more work to do.
|
||||
bool getWork(uint32_t& groupId)
|
||||
{
|
||||
LONG result = InterlockedDecrement(&mTasksAvailable);
|
||||
|
||||
if (result >= 0)
|
||||
{
|
||||
groupId = result;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Atomically decrement the outstanding count. A worker is notifying
|
||||
/// us that he just finished some work. Also, return true if we're
|
||||
/// the last worker to complete this dispatch.
|
||||
bool finishedWork()
|
||||
{
|
||||
LONG result = InterlockedDecrement(&mTasksOutstanding);
|
||||
SWR_ASSERT(result >= 0, "Should never oversubscribe work");
|
||||
|
||||
return (result == 0) ? true : false;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Work is complete once both the available/outstanding counts have reached 0.
|
||||
bool isWorkComplete()
|
||||
{
|
||||
return ((mTasksAvailable <= 0) &&
|
||||
(mTasksOutstanding <= 0));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Return pointer to task data.
|
||||
const void* GetTasksData()
|
||||
{
|
||||
return mpTaskData;
|
||||
}
|
||||
|
||||
void *operator new(size_t size);
|
||||
void operator delete (void *p);
|
||||
|
||||
void* mpTaskData; // The API thread will set this up and the callback task function will interpet this.
|
||||
|
||||
OSALIGNLINE(volatile LONG) mTasksAvailable{ 0 };
|
||||
OSALIGNLINE(volatile LONG) mTasksOutstanding{ 0 };
|
||||
};
|
||||
|
||||
|
||||
enum HOTTILE_STATE
|
||||
{
|
||||
HOTTILE_INVALID, // tile is in unitialized state and should be loaded with surface contents before rendering
|
||||
HOTTILE_CLEAR, // tile should be cleared
|
||||
HOTTILE_DIRTY, // tile has been rendered to
|
||||
HOTTILE_RESOLVED, // tile has been stored to memory
|
||||
};
|
||||
|
||||
struct HOTTILE
|
||||
{
|
||||
BYTE *pBuffer;
|
||||
HOTTILE_STATE state;
|
||||
DWORD clearData[4]; // May need to change based on pfnClearTile implementation. Reorder for alignment?
|
||||
uint32_t numSamples;
|
||||
uint32_t renderTargetArrayIndex; // current render target array index loaded
|
||||
};
|
||||
|
||||
union HotTileSet
|
||||
{
|
||||
struct
|
||||
{
|
||||
HOTTILE Color[SWR_NUM_RENDERTARGETS];
|
||||
HOTTILE Depth;
|
||||
HOTTILE Stencil;
|
||||
};
|
||||
HOTTILE Attachment[SWR_NUM_ATTACHMENTS];
|
||||
};
|
||||
|
||||
class HotTileMgr
|
||||
{
|
||||
public:
|
||||
HotTileMgr()
|
||||
{
|
||||
memset(&mHotTiles[0][0], 0, sizeof(mHotTiles));
|
||||
|
||||
// cache hottile size
|
||||
for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i)
|
||||
{
|
||||
mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
|
||||
}
|
||||
mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
|
||||
mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
|
||||
}
|
||||
|
||||
~HotTileMgr()
|
||||
{
|
||||
for (int x = 0; x < KNOB_NUM_HOT_TILES_X; ++x)
|
||||
{
|
||||
for (int y = 0; y < KNOB_NUM_HOT_TILES_Y; ++y)
|
||||
{
|
||||
for (int a = 0; a < SWR_NUM_ATTACHMENTS; ++a)
|
||||
{
|
||||
if (mHotTiles[x][y].Attachment[a].pBuffer != NULL)
|
||||
{
|
||||
_aligned_free(mHotTiles[x][y].Attachment[a].pBuffer);
|
||||
mHotTiles[x][y].Attachment[a].pBuffer = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
|
||||
uint32_t renderTargetArrayIndex = 0)
|
||||
{
|
||||
uint32_t x, y;
|
||||
MacroTileMgr::getTileIndices(macroID, x, y);
|
||||
|
||||
assert(x < KNOB_NUM_HOT_TILES_X);
|
||||
assert(y < KNOB_NUM_HOT_TILES_Y);
|
||||
|
||||
HotTileSet &tile = mHotTiles[x][y];
|
||||
HOTTILE& hotTile = tile.Attachment[attachment];
|
||||
if (hotTile.pBuffer == NULL)
|
||||
{
|
||||
if (create)
|
||||
{
|
||||
uint32_t size = numSamples * mHotTileSize[attachment];
|
||||
hotTile.pBuffer = (BYTE*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
|
||||
hotTile.state = HOTTILE_INVALID;
|
||||
hotTile.numSamples = numSamples;
|
||||
hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
|
||||
}
|
||||
else
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// free the old tile and create a new one with enough space to hold all samples
|
||||
if (numSamples > hotTile.numSamples)
|
||||
{
|
||||
// tile should be either uninitialized or resolved if we're deleting and switching to a
|
||||
// new sample count
|
||||
assert((hotTile.state == HOTTILE_INVALID) ||
|
||||
(hotTile.state == HOTTILE_RESOLVED) ||
|
||||
(hotTile.state == HOTTILE_CLEAR));
|
||||
_aligned_free(hotTile.pBuffer);
|
||||
|
||||
uint32_t size = numSamples * mHotTileSize[attachment];
|
||||
hotTile.pBuffer = (BYTE*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
|
||||
hotTile.state = HOTTILE_INVALID;
|
||||
hotTile.numSamples = numSamples;
|
||||
}
|
||||
|
||||
// if requested render target array index isn't currently loaded, need to store out the current hottile
|
||||
// and load the requested array slice
|
||||
if (renderTargetArrayIndex != hotTile.renderTargetArrayIndex)
|
||||
{
|
||||
SWR_FORMAT format;
|
||||
switch (attachment)
|
||||
{
|
||||
case SWR_ATTACHMENT_COLOR0:
|
||||
case SWR_ATTACHMENT_COLOR1:
|
||||
case SWR_ATTACHMENT_COLOR2:
|
||||
case SWR_ATTACHMENT_COLOR3:
|
||||
case SWR_ATTACHMENT_COLOR4:
|
||||
case SWR_ATTACHMENT_COLOR5:
|
||||
case SWR_ATTACHMENT_COLOR6:
|
||||
case SWR_ATTACHMENT_COLOR7: format = KNOB_COLOR_HOT_TILE_FORMAT; break;
|
||||
case SWR_ATTACHMENT_DEPTH: format = KNOB_DEPTH_HOT_TILE_FORMAT; break;
|
||||
case SWR_ATTACHMENT_STENCIL: format = KNOB_STENCIL_HOT_TILE_FORMAT; break;
|
||||
default: SWR_ASSERT(false, "Unknown attachment: %d", attachment); format = KNOB_COLOR_HOT_TILE_FORMAT; break;
|
||||
}
|
||||
|
||||
if (hotTile.state == HOTTILE_DIRTY)
|
||||
{
|
||||
pContext->pfnStoreTile(GetPrivateState(pDC), format, attachment,
|
||||
x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, hotTile.renderTargetArrayIndex, hotTile.pBuffer);
|
||||
}
|
||||
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), format, attachment,
|
||||
x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, renderTargetArrayIndex, hotTile.pBuffer);
|
||||
|
||||
hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
|
||||
hotTile.state = HOTTILE_DIRTY;
|
||||
}
|
||||
}
|
||||
return &tile.Attachment[attachment];
|
||||
}
|
||||
|
||||
HotTileSet &GetHotTile(uint32_t macroID)
|
||||
{
|
||||
uint32_t x, y;
|
||||
MacroTileMgr::getTileIndices(macroID, x, y);
|
||||
assert(x < KNOB_NUM_HOT_TILES_X);
|
||||
assert(y < KNOB_NUM_HOT_TILES_Y);
|
||||
|
||||
return mHotTiles[x][y];
|
||||
}
|
||||
|
||||
private:
|
||||
HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y];
|
||||
uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS];
|
||||
};
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file utils.cpp
|
||||
*
|
||||
* @brief Utilities used by SWR core.
|
||||
*
|
||||
******************************************************************************/
|
||||
#if defined(_WIN32)
|
||||
|
||||
#include<Windows.h>
|
||||
#include <Gdiplus.h>
|
||||
#include <Gdiplusheaders.h>
|
||||
#include <cstdint>
|
||||
|
||||
using namespace Gdiplus;
|
||||
|
||||
int GetEncoderClsid(const WCHAR* format, CLSID* pClsid)
|
||||
{
|
||||
uint32_t num = 0; // number of image encoders
|
||||
uint32_t size = 0; // size of the image encoder array in bytes
|
||||
|
||||
ImageCodecInfo* pImageCodecInfo = nullptr;
|
||||
|
||||
GetImageEncodersSize(&num, &size);
|
||||
if(size == 0)
|
||||
return -1; // Failure
|
||||
|
||||
pImageCodecInfo = (ImageCodecInfo*)(malloc(size));
|
||||
if(pImageCodecInfo == nullptr)
|
||||
return -1; // Failure
|
||||
|
||||
GetImageEncoders(num, size, pImageCodecInfo);
|
||||
|
||||
for(uint32_t j = 0; j < num; ++j)
|
||||
{
|
||||
if( wcscmp(pImageCodecInfo[j].MimeType, format) == 0 )
|
||||
{
|
||||
*pClsid = pImageCodecInfo[j].Clsid;
|
||||
free(pImageCodecInfo);
|
||||
return j; // Success
|
||||
}
|
||||
}
|
||||
|
||||
free(pImageCodecInfo);
|
||||
return -1; // Failure
|
||||
}
|
||||
|
||||
void SaveImageToPNGFile(
|
||||
const WCHAR *pFilename,
|
||||
void *pBuffer,
|
||||
uint32_t width,
|
||||
uint32_t height)
|
||||
{
|
||||
// dump pixels to a png
|
||||
// Initialize GDI+.
|
||||
GdiplusStartupInput gdiplusStartupInput;
|
||||
ULONG_PTR gdiplusToken;
|
||||
GdiplusStartup(&gdiplusToken, &gdiplusStartupInput, nullptr);
|
||||
|
||||
Bitmap *bitmap = new Bitmap(width, height);
|
||||
BYTE *pBytes = (BYTE*)pBuffer;
|
||||
static const uint32_t bytesPerPixel = 4;
|
||||
for (uint32_t y = 0; y < height; ++y)
|
||||
for (uint32_t x = 0; x < width; ++x)
|
||||
{
|
||||
uint32_t pixel = *(uint32_t*)pBytes;
|
||||
if (pixel == 0xcdcdcdcd)
|
||||
{
|
||||
pixel = 0xFFFF00FF;
|
||||
}
|
||||
else if (pixel == 0xdddddddd)
|
||||
{
|
||||
pixel = 0x80FF0000;
|
||||
}
|
||||
else
|
||||
{
|
||||
pixel |= 0xFF000000;
|
||||
}
|
||||
Color color(pixel);
|
||||
bitmap->SetPixel(x, y, color);
|
||||
pBytes += bytesPerPixel;
|
||||
}
|
||||
|
||||
// Save image.
|
||||
CLSID pngClsid;
|
||||
GetEncoderClsid(L"image/png", &pngClsid);
|
||||
bitmap->Save(pFilename, &pngClsid, nullptr);
|
||||
|
||||
delete bitmap;
|
||||
|
||||
GdiplusShutdown(gdiplusToken);
|
||||
}
|
||||
|
||||
void OpenBitmapFromFile(
|
||||
const WCHAR *pFilename,
|
||||
void **pBuffer,
|
||||
uint32_t *width,
|
||||
uint32_t *height)
|
||||
{
|
||||
GdiplusStartupInput gdiplusStartupInput;
|
||||
ULONG_PTR gdiplusToken;
|
||||
GdiplusStartup(&gdiplusToken, &gdiplusStartupInput, nullptr);
|
||||
|
||||
Bitmap *bitmap = new Bitmap(pFilename);
|
||||
|
||||
*width = bitmap->GetWidth();
|
||||
*height = bitmap->GetHeight();
|
||||
*pBuffer = new BYTE[*width * *height * 4]; // width * height * |RGBA|
|
||||
|
||||
// The folder 'stb_image' contains a PNG open/close module which
|
||||
// is far less painful than this is, yo.
|
||||
Gdiplus::Color clr;
|
||||
for (uint32_t y = 0, idx = 0; y < *height; ++y)
|
||||
{
|
||||
for (uint32_t x = 0; x < *width; ++x, idx += 4)
|
||||
{
|
||||
bitmap->GetPixel(x, *height - y - 1, &clr);
|
||||
((BYTE*)*pBuffer)[idx + 0] = clr.GetBlue();
|
||||
((BYTE*)*pBuffer)[idx + 1] = clr.GetGreen();
|
||||
((BYTE*)*pBuffer)[idx + 2] = clr.GetRed();
|
||||
((BYTE*)*pBuffer)[idx + 3] = clr.GetAlpha();
|
||||
}
|
||||
}
|
||||
|
||||
delete bitmap;
|
||||
bitmap = 0;
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,831 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file utils.h
|
||||
*
|
||||
* @brief Utilities used by SWR core.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include <string.h>
|
||||
#include "common/os.h"
|
||||
#include "common/simdintrin.h"
|
||||
#include "common/swr_assert.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
void SaveImageToPNGFile(
|
||||
const WCHAR *pFilename,
|
||||
void *pBuffer,
|
||||
uint32_t width,
|
||||
uint32_t height);
|
||||
|
||||
void OpenBitmapFromFile(
|
||||
const WCHAR *pFilename,
|
||||
void **pBuffer,
|
||||
uint32_t *width,
|
||||
uint32_t *height);
|
||||
#endif
|
||||
|
||||
/// @todo assume linux is always 64 bit
|
||||
#if defined(_WIN64) || defined(__linux__) || defined(__gnu_linux__)
|
||||
#define _MM_INSERT_EPI64 _mm_insert_epi64
|
||||
#define _MM_EXTRACT_EPI64 _mm_extract_epi64
|
||||
#else
|
||||
INLINE INT64 _MM_EXTRACT_EPI64(__m128i a, const int32_t ndx)
|
||||
{
|
||||
OSALIGNLINE(uint32_t) elems[4];
|
||||
_mm_store_si128((__m128i*)elems, a);
|
||||
if (ndx == 0)
|
||||
{
|
||||
uint64_t foo = elems[0];
|
||||
foo |= (uint64_t)elems[1] << 32;
|
||||
return foo;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint64_t foo = elems[2];
|
||||
foo |= (uint64_t)elems[3] << 32;
|
||||
return foo;
|
||||
}
|
||||
}
|
||||
|
||||
INLINE __m128i _MM_INSERT_EPI64(__m128i a, INT64 b, const int32_t ndx)
|
||||
{
|
||||
OSALIGNLINE(int64_t) elems[2];
|
||||
_mm_store_si128((__m128i*)elems, a);
|
||||
if (ndx == 0)
|
||||
{
|
||||
elems[0] = b;
|
||||
}
|
||||
else
|
||||
{
|
||||
elems[1] = b;
|
||||
}
|
||||
__m128i out;
|
||||
out = _mm_load_si128((const __m128i*)elems);
|
||||
return out;
|
||||
}
|
||||
#endif
|
||||
|
||||
OSALIGNLINE(struct) BBOX
|
||||
{
|
||||
int top, bottom, left, right;
|
||||
|
||||
BBOX() {}
|
||||
BBOX(int t, int b, int l, int r) : top(t), bottom(b), left(l), right(r) {}
|
||||
|
||||
bool operator==(const BBOX& rhs)
|
||||
{
|
||||
return (this->top == rhs.top &&
|
||||
this->bottom == rhs.bottom &&
|
||||
this->left == rhs.left &&
|
||||
this->right == rhs.right);
|
||||
}
|
||||
|
||||
bool operator!=(const BBOX& rhs)
|
||||
{
|
||||
return !(*this == rhs);
|
||||
}
|
||||
};
|
||||
|
||||
struct simdBBox
|
||||
{
|
||||
simdscalari top, bottom, left, right;
|
||||
};
|
||||
|
||||
INLINE
|
||||
void vTranspose(__m128 &row0, __m128 &row1, __m128 &row2, __m128 &row3)
|
||||
{
|
||||
__m128i row0i = _mm_castps_si128(row0);
|
||||
__m128i row1i = _mm_castps_si128(row1);
|
||||
__m128i row2i = _mm_castps_si128(row2);
|
||||
__m128i row3i = _mm_castps_si128(row3);
|
||||
|
||||
__m128i vTemp = row2i;
|
||||
row2i = _mm_unpacklo_epi32(row2i, row3i);
|
||||
vTemp = _mm_unpackhi_epi32(vTemp, row3i);
|
||||
|
||||
row3i = row0i;
|
||||
row0i = _mm_unpacklo_epi32(row0i, row1i);
|
||||
row3i = _mm_unpackhi_epi32(row3i, row1i);
|
||||
|
||||
row1i = row0i;
|
||||
row0i = _mm_unpacklo_epi64(row0i, row2i);
|
||||
row1i = _mm_unpackhi_epi64(row1i, row2i);
|
||||
|
||||
row2i = row3i;
|
||||
row2i = _mm_unpacklo_epi64(row2i, vTemp);
|
||||
row3i = _mm_unpackhi_epi64(row3i, vTemp);
|
||||
|
||||
row0 = _mm_castsi128_ps(row0i);
|
||||
row1 = _mm_castsi128_ps(row1i);
|
||||
row2 = _mm_castsi128_ps(row2i);
|
||||
row3 = _mm_castsi128_ps(row3i);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void vTranspose(__m128i &row0, __m128i &row1, __m128i &row2, __m128i &row3)
|
||||
{
|
||||
__m128i vTemp = row2;
|
||||
row2 = _mm_unpacklo_epi32(row2, row3);
|
||||
vTemp = _mm_unpackhi_epi32(vTemp, row3);
|
||||
|
||||
row3 = row0;
|
||||
row0 = _mm_unpacklo_epi32(row0, row1);
|
||||
row3 = _mm_unpackhi_epi32(row3, row1);
|
||||
|
||||
row1 = row0;
|
||||
row0 = _mm_unpacklo_epi64(row0, row2);
|
||||
row1 = _mm_unpackhi_epi64(row1, row2);
|
||||
|
||||
row2 = row3;
|
||||
row2 = _mm_unpacklo_epi64(row2, vTemp);
|
||||
row3 = _mm_unpackhi_epi64(row3, vTemp);
|
||||
}
|
||||
|
||||
#define GCC_VERSION (__GNUC__ * 10000 \
|
||||
+ __GNUC_MINOR__ * 100 \
|
||||
+ __GNUC_PATCHLEVEL__)
|
||||
|
||||
#if defined(__GNUC__) && (GCC_VERSION < 40900)
|
||||
#define _mm_undefined_ps _mm_setzero_ps
|
||||
#define _mm_undefined_si128 _mm_setzero_si128
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
#define _mm256_undefined_ps _mm256_setzero_ps
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
INLINE
|
||||
void vTranspose3x8(__m128 (&vDst)[8], __m256 &vSrc0, __m256 &vSrc1, __m256 &vSrc2)
|
||||
{
|
||||
__m256 r0r2 = _mm256_unpacklo_ps(vSrc0, vSrc2); //x0z0x1z1 x4z4x5z5
|
||||
__m256 r1rx = _mm256_unpacklo_ps(vSrc1, _mm256_undefined_ps()); //y0w0y1w1 y4w4y5w5
|
||||
__m256 r02r1xlolo = _mm256_unpacklo_ps(r0r2, r1rx); //x0y0z0w0 x4y4z4w4
|
||||
__m256 r02r1xlohi = _mm256_unpackhi_ps(r0r2, r1rx); //x1y1z1w1 x5y5z5w5
|
||||
|
||||
r0r2 = _mm256_unpackhi_ps(vSrc0, vSrc2); //x2z2x3z3 x6z6x7z7
|
||||
r1rx = _mm256_unpackhi_ps(vSrc1, _mm256_undefined_ps()); //y2w2y3w3 y6w6yw77
|
||||
__m256 r02r1xhilo = _mm256_unpacklo_ps(r0r2, r1rx); //x2y2z2w2 x6y6z6w6
|
||||
__m256 r02r1xhihi = _mm256_unpackhi_ps(r0r2, r1rx); //x3y3z3w3 x7y7z7w7
|
||||
|
||||
vDst[0] = _mm256_castps256_ps128(r02r1xlolo);
|
||||
vDst[1] = _mm256_castps256_ps128(r02r1xlohi);
|
||||
vDst[2] = _mm256_castps256_ps128(r02r1xhilo);
|
||||
vDst[3] = _mm256_castps256_ps128(r02r1xhihi);
|
||||
|
||||
vDst[4] = _mm256_extractf128_ps(r02r1xlolo, 1);
|
||||
vDst[5] = _mm256_extractf128_ps(r02r1xlohi, 1);
|
||||
vDst[6] = _mm256_extractf128_ps(r02r1xhilo, 1);
|
||||
vDst[7] = _mm256_extractf128_ps(r02r1xhihi, 1);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void vTranspose4x8(__m128 (&vDst)[8], __m256 &vSrc0, __m256 &vSrc1, __m256 &vSrc2, __m256 &vSrc3)
|
||||
{
|
||||
__m256 r0r2 = _mm256_unpacklo_ps(vSrc0, vSrc2); //x0z0x1z1 x4z4x5z5
|
||||
__m256 r1rx = _mm256_unpacklo_ps(vSrc1, vSrc3); //y0w0y1w1 y4w4y5w5
|
||||
__m256 r02r1xlolo = _mm256_unpacklo_ps(r0r2, r1rx); //x0y0z0w0 x4y4z4w4
|
||||
__m256 r02r1xlohi = _mm256_unpackhi_ps(r0r2, r1rx); //x1y1z1w1 x5y5z5w5
|
||||
|
||||
r0r2 = _mm256_unpackhi_ps(vSrc0, vSrc2); //x2z2x3z3 x6z6x7z7
|
||||
r1rx = _mm256_unpackhi_ps(vSrc1, vSrc3) ; //y2w2y3w3 y6w6yw77
|
||||
__m256 r02r1xhilo = _mm256_unpacklo_ps(r0r2, r1rx); //x2y2z2w2 x6y6z6w6
|
||||
__m256 r02r1xhihi = _mm256_unpackhi_ps(r0r2, r1rx); //x3y3z3w3 x7y7z7w7
|
||||
|
||||
vDst[0] = _mm256_castps256_ps128(r02r1xlolo);
|
||||
vDst[1] = _mm256_castps256_ps128(r02r1xlohi);
|
||||
vDst[2] = _mm256_castps256_ps128(r02r1xhilo);
|
||||
vDst[3] = _mm256_castps256_ps128(r02r1xhihi);
|
||||
|
||||
vDst[4] = _mm256_extractf128_ps(r02r1xlolo, 1);
|
||||
vDst[5] = _mm256_extractf128_ps(r02r1xlohi, 1);
|
||||
vDst[6] = _mm256_extractf128_ps(r02r1xhilo, 1);
|
||||
vDst[7] = _mm256_extractf128_ps(r02r1xhihi, 1);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void vTranspose8x8(__m256 (&vDst)[8], const __m256 &vMask0, const __m256 &vMask1, const __m256 &vMask2, const __m256 &vMask3, const __m256 &vMask4, const __m256 &vMask5, const __m256 &vMask6, const __m256 &vMask7)
|
||||
{
|
||||
__m256 __t0 = _mm256_unpacklo_ps(vMask0, vMask1);
|
||||
__m256 __t1 = _mm256_unpackhi_ps(vMask0, vMask1);
|
||||
__m256 __t2 = _mm256_unpacklo_ps(vMask2, vMask3);
|
||||
__m256 __t3 = _mm256_unpackhi_ps(vMask2, vMask3);
|
||||
__m256 __t4 = _mm256_unpacklo_ps(vMask4, vMask5);
|
||||
__m256 __t5 = _mm256_unpackhi_ps(vMask4, vMask5);
|
||||
__m256 __t6 = _mm256_unpacklo_ps(vMask6, vMask7);
|
||||
__m256 __t7 = _mm256_unpackhi_ps(vMask6, vMask7);
|
||||
__m256 __tt0 = _mm256_shuffle_ps(__t0,__t2,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 __tt1 = _mm256_shuffle_ps(__t0,__t2,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 __tt2 = _mm256_shuffle_ps(__t1,__t3,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 __tt3 = _mm256_shuffle_ps(__t1,__t3,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 __tt4 = _mm256_shuffle_ps(__t4,__t6,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 __tt5 = _mm256_shuffle_ps(__t4,__t6,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 __tt6 = _mm256_shuffle_ps(__t5,__t7,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 __tt7 = _mm256_shuffle_ps(__t5,__t7,_MM_SHUFFLE(3,2,3,2));
|
||||
vDst[0] = _mm256_permute2f128_ps(__tt0, __tt4, 0x20);
|
||||
vDst[1] = _mm256_permute2f128_ps(__tt1, __tt5, 0x20);
|
||||
vDst[2] = _mm256_permute2f128_ps(__tt2, __tt6, 0x20);
|
||||
vDst[3] = _mm256_permute2f128_ps(__tt3, __tt7, 0x20);
|
||||
vDst[4] = _mm256_permute2f128_ps(__tt0, __tt4, 0x31);
|
||||
vDst[5] = _mm256_permute2f128_ps(__tt1, __tt5, 0x31);
|
||||
vDst[6] = _mm256_permute2f128_ps(__tt2, __tt6, 0x31);
|
||||
vDst[7] = _mm256_permute2f128_ps(__tt3, __tt7, 0x31);
|
||||
}
|
||||
|
||||
INLINE
|
||||
void vTranspose8x8(__m256 (&vDst)[8], const __m256i &vMask0, const __m256i &vMask1, const __m256i &vMask2, const __m256i &vMask3, const __m256i &vMask4, const __m256i &vMask5, const __m256i &vMask6, const __m256i &vMask7)
|
||||
{
|
||||
vTranspose8x8(vDst, _mm256_castsi256_ps(vMask0), _mm256_castsi256_ps(vMask1), _mm256_castsi256_ps(vMask2), _mm256_castsi256_ps(vMask3),
|
||||
_mm256_castsi256_ps(vMask4), _mm256_castsi256_ps(vMask5), _mm256_castsi256_ps(vMask6), _mm256_castsi256_ps(vMask7));
|
||||
}
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// TranposeSingleComponent
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<uint32_t bpp>
|
||||
struct TransposeSingleComponent
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Pass-thru for single component.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
|
||||
{
|
||||
memcpy(pDst, pSrc, (bpp * KNOB_SIMD_WIDTH) / 8);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose8_8_8_8
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose8_8_8_8
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 8_8_8_8 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
|
||||
{
|
||||
simdscalari src = _simd_load_si((const simdscalari*)pSrc);
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
#if KNOB_ARCH == KNOB_ARCH_AVX
|
||||
__m128i c0c1 = _mm256_castsi256_si128(src); // rrrrrrrrgggggggg
|
||||
__m128i c2c3 = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(src), 1)); // bbbbbbbbaaaaaaaa
|
||||
__m128i c0c2 = _mm_unpacklo_epi64(c0c1, c2c3); // rrrrrrrrbbbbbbbb
|
||||
__m128i c1c3 = _mm_unpackhi_epi64(c0c1, c2c3); // ggggggggaaaaaaaa
|
||||
__m128i c01 = _mm_unpacklo_epi8(c0c2, c1c3); // rgrgrgrgrgrgrgrg
|
||||
__m128i c23 = _mm_unpackhi_epi8(c0c2, c1c3); // babababababababa
|
||||
__m128i c0123lo = _mm_unpacklo_epi16(c01, c23); // rgbargbargbargba
|
||||
__m128i c0123hi = _mm_unpackhi_epi16(c01, c23); // rgbargbargbargba
|
||||
_mm_store_si128((__m128i*)pDst, c0123lo);
|
||||
_mm_store_si128((__m128i*)(pDst + 16), c0123hi);
|
||||
#elif KNOB_ARCH == KNOB_ARCH_AVX2
|
||||
simdscalari dst01 = _mm256_shuffle_epi8(src,
|
||||
_mm256_set_epi32(0x0f078080, 0x0e068080, 0x0d058080, 0x0c048080, 0x80800b03, 0x80800a02, 0x80800901, 0x80800800));
|
||||
simdscalari dst23 = _mm256_permute2x128_si256(src, src, 0x01);
|
||||
dst23 = _mm256_shuffle_epi8(dst23,
|
||||
_mm256_set_epi32(0x80800f07, 0x80800e06, 0x80800d05, 0x80800c04, 0x0b038080, 0x0a028080, 0x09018080, 0x08008080));
|
||||
simdscalari dst = _mm256_or_si256(dst01, dst23);
|
||||
_simd_store_si((simdscalari*)pDst, dst);
|
||||
#endif
|
||||
#else
|
||||
#error Unsupported vector width
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose8_8_8
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose8_8_8
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 8_8_8 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose8_8
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose8_8
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 8_8 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
|
||||
{
|
||||
simdscalari src = _simd_load_si((const simdscalari*)pSrc);
|
||||
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
__m128i rg = _mm256_castsi256_si128(src); // rrrrrrrr gggggggg
|
||||
__m128i g = _mm_unpackhi_epi64(rg, rg); // gggggggg gggggggg
|
||||
rg = _mm_unpacklo_epi8(rg, g);
|
||||
_mm_store_si128((__m128i*)pDst, rg);
|
||||
#else
|
||||
#error Unsupported vector width
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose32_32_32_32
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose32_32_32_32
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 32_32_32_32 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
|
||||
{
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
simdscalar src0 = _simd_load_ps((const float*)pSrc);
|
||||
simdscalar src1 = _simd_load_ps((const float*)pSrc + 8);
|
||||
simdscalar src2 = _simd_load_ps((const float*)pSrc + 16);
|
||||
simdscalar src3 = _simd_load_ps((const float*)pSrc + 24);
|
||||
|
||||
__m128 vDst[8];
|
||||
vTranspose4x8(vDst, src0, src1, src2, src3);
|
||||
_mm_store_ps((float*)pDst, vDst[0]);
|
||||
_mm_store_ps((float*)pDst+4, vDst[1]);
|
||||
_mm_store_ps((float*)pDst+8, vDst[2]);
|
||||
_mm_store_ps((float*)pDst+12, vDst[3]);
|
||||
_mm_store_ps((float*)pDst+16, vDst[4]);
|
||||
_mm_store_ps((float*)pDst+20, vDst[5]);
|
||||
_mm_store_ps((float*)pDst+24, vDst[6]);
|
||||
_mm_store_ps((float*)pDst+28, vDst[7]);
|
||||
#else
|
||||
#error Unsupported vector width
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose32_32_32
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose32_32_32
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 32_32_32 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
|
||||
{
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
simdscalar src0 = _simd_load_ps((const float*)pSrc);
|
||||
simdscalar src1 = _simd_load_ps((const float*)pSrc + 8);
|
||||
simdscalar src2 = _simd_load_ps((const float*)pSrc + 16);
|
||||
|
||||
__m128 vDst[8];
|
||||
vTranspose3x8(vDst, src0, src1, src2);
|
||||
_mm_store_ps((float*)pDst, vDst[0]);
|
||||
_mm_store_ps((float*)pDst + 4, vDst[1]);
|
||||
_mm_store_ps((float*)pDst + 8, vDst[2]);
|
||||
_mm_store_ps((float*)pDst + 12, vDst[3]);
|
||||
_mm_store_ps((float*)pDst + 16, vDst[4]);
|
||||
_mm_store_ps((float*)pDst + 20, vDst[5]);
|
||||
_mm_store_ps((float*)pDst + 24, vDst[6]);
|
||||
_mm_store_ps((float*)pDst + 28, vDst[7]);
|
||||
#else
|
||||
#error Unsupported vector width
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose32_32
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose32_32
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 32_32 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
|
||||
{
|
||||
const float* pfSrc = (const float*)pSrc;
|
||||
__m128 src_r0 = _mm_load_ps(pfSrc + 0);
|
||||
__m128 src_r1 = _mm_load_ps(pfSrc + 4);
|
||||
__m128 src_g0 = _mm_load_ps(pfSrc + 8);
|
||||
__m128 src_g1 = _mm_load_ps(pfSrc + 12);
|
||||
|
||||
__m128 dst0 = _mm_unpacklo_ps(src_r0, src_g0);
|
||||
__m128 dst1 = _mm_unpackhi_ps(src_r0, src_g0);
|
||||
__m128 dst2 = _mm_unpacklo_ps(src_r1, src_g1);
|
||||
__m128 dst3 = _mm_unpackhi_ps(src_r1, src_g1);
|
||||
|
||||
float* pfDst = (float*)pDst;
|
||||
_mm_store_ps(pfDst + 0, dst0);
|
||||
_mm_store_ps(pfDst + 4, dst1);
|
||||
_mm_store_ps(pfDst + 8, dst2);
|
||||
_mm_store_ps(pfDst + 12, dst3);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose16_16_16_16
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose16_16_16_16
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 16_16_16_16 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
|
||||
{
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
simdscalari src_rg = _simd_load_si((const simdscalari*)pSrc);
|
||||
simdscalari src_ba = _simd_load_si((const simdscalari*)(pSrc + sizeof(simdscalari)));
|
||||
|
||||
__m128i src_r = _mm256_extractf128_si256(src_rg, 0);
|
||||
__m128i src_g = _mm256_extractf128_si256(src_rg, 1);
|
||||
__m128i src_b = _mm256_extractf128_si256(src_ba, 0);
|
||||
__m128i src_a = _mm256_extractf128_si256(src_ba, 1);
|
||||
|
||||
__m128i rg0 = _mm_unpacklo_epi16(src_r, src_g);
|
||||
__m128i rg1 = _mm_unpackhi_epi16(src_r, src_g);
|
||||
__m128i ba0 = _mm_unpacklo_epi16(src_b, src_a);
|
||||
__m128i ba1 = _mm_unpackhi_epi16(src_b, src_a);
|
||||
|
||||
__m128i dst0 = _mm_unpacklo_epi32(rg0, ba0);
|
||||
__m128i dst1 = _mm_unpackhi_epi32(rg0, ba0);
|
||||
__m128i dst2 = _mm_unpacklo_epi32(rg1, ba1);
|
||||
__m128i dst3 = _mm_unpackhi_epi32(rg1, ba1);
|
||||
|
||||
_mm_store_si128(((__m128i*)pDst) + 0, dst0);
|
||||
_mm_store_si128(((__m128i*)pDst) + 1, dst1);
|
||||
_mm_store_si128(((__m128i*)pDst) + 2, dst2);
|
||||
_mm_store_si128(((__m128i*)pDst) + 3, dst3);
|
||||
#else
|
||||
#error Unsupported vector width
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose16_16_16
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose16_16_16
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 16_16_16 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
|
||||
{
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
simdscalari src_rg = _simd_load_si((const simdscalari*)pSrc);
|
||||
|
||||
__m128i src_r = _mm256_extractf128_si256(src_rg, 0);
|
||||
__m128i src_g = _mm256_extractf128_si256(src_rg, 1);
|
||||
__m128i src_b = _mm_load_si128((const __m128i*)(pSrc + sizeof(simdscalari)));
|
||||
__m128i src_a = _mm_undefined_si128();
|
||||
|
||||
__m128i rg0 = _mm_unpacklo_epi16(src_r, src_g);
|
||||
__m128i rg1 = _mm_unpackhi_epi16(src_r, src_g);
|
||||
__m128i ba0 = _mm_unpacklo_epi16(src_b, src_a);
|
||||
__m128i ba1 = _mm_unpackhi_epi16(src_b, src_a);
|
||||
|
||||
__m128i dst0 = _mm_unpacklo_epi32(rg0, ba0);
|
||||
__m128i dst1 = _mm_unpackhi_epi32(rg0, ba0);
|
||||
__m128i dst2 = _mm_unpacklo_epi32(rg1, ba1);
|
||||
__m128i dst3 = _mm_unpackhi_epi32(rg1, ba1);
|
||||
|
||||
_mm_store_si128(((__m128i*)pDst) + 0, dst0);
|
||||
_mm_store_si128(((__m128i*)pDst) + 1, dst1);
|
||||
_mm_store_si128(((__m128i*)pDst) + 2, dst2);
|
||||
_mm_store_si128(((__m128i*)pDst) + 3, dst3);
|
||||
#else
|
||||
#error Unsupported vector width
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose16_16
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose16_16
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 16_16 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
|
||||
{
|
||||
simdscalar src = _simd_load_ps((const float*)pSrc);
|
||||
|
||||
#if KNOB_SIMD_WIDTH == 8
|
||||
__m128 comp0 = _mm256_castps256_ps128(src);
|
||||
__m128 comp1 = _mm256_extractf128_ps(src, 1);
|
||||
|
||||
__m128i comp0i = _mm_castps_si128(comp0);
|
||||
__m128i comp1i = _mm_castps_si128(comp1);
|
||||
|
||||
__m128i resLo = _mm_unpacklo_epi16(comp0i, comp1i);
|
||||
__m128i resHi = _mm_unpackhi_epi16(comp0i, comp1i);
|
||||
|
||||
_mm_store_si128((__m128i*)pDst, resLo);
|
||||
_mm_store_si128((__m128i*)pDst + 1, resHi);
|
||||
#else
|
||||
#error Unsupported vector width
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose24_8
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose24_8
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 24_8 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose32_8_24
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose32_8_24
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 32_8_24 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
|
||||
};
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose4_4_4_4
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose4_4_4_4
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 4_4_4_4 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose5_6_5
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose5_6_5
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 5_6_5 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose9_9_9_5
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose9_9_9_5
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 9_9_9_5 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose5_5_5_1
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose5_5_5_1
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 5_5_5_1 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose10_10_10_2
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose10_10_10_2
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 10_10_10_2 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Transpose11_11_10
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct Transpose11_11_10
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs an SOA to AOS conversion for packed 11_11_10 data.
|
||||
/// @param pSrc - source data in SOA form
|
||||
/// @param pDst - output data in AOS form
|
||||
static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
|
||||
};
|
||||
|
||||
// helper function to unroll loops
|
||||
template<int Begin, int End, int Step = 1>
|
||||
struct UnrollerL {
|
||||
template<typename Lambda>
|
||||
INLINE static void step(Lambda& func) {
|
||||
func(Begin);
|
||||
UnrollerL<Begin + Step, End, Step>::step(func);
|
||||
}
|
||||
};
|
||||
|
||||
template<int End, int Step>
|
||||
struct UnrollerL<End, End, Step> {
|
||||
template<typename Lambda>
|
||||
static void step(Lambda& func) {
|
||||
}
|
||||
};
|
||||
|
||||
// general CRC compute
|
||||
INLINE
|
||||
uint32_t ComputeCRC(uint32_t crc, const void *pData, uint32_t size)
|
||||
{
|
||||
#if defined(_WIN64) || defined(__linux__) || defined(__gnu_linux__)
|
||||
uint32_t sizeInQwords = size / sizeof(uint64_t);
|
||||
uint32_t sizeRemainderBytes = size % sizeof(uint64_t);
|
||||
uint64_t* pDataWords = (uint64_t*)pData;
|
||||
for (uint32_t i = 0; i < sizeInQwords; ++i)
|
||||
{
|
||||
crc = (uint32_t)_mm_crc32_u64(crc, *pDataWords++);
|
||||
}
|
||||
#else
|
||||
uint32_t sizeInDwords = size / sizeof(uint32_t);
|
||||
uint32_t sizeRemainderBytes = size % sizeof(uint32_t);
|
||||
uint32_t* pDataWords = (uint32_t*)pData;
|
||||
for (uint32_t i = 0; i < sizeInDwords; ++i)
|
||||
{
|
||||
crc = _mm_crc32_u32(crc, *pDataWords++);
|
||||
}
|
||||
#endif
|
||||
|
||||
BYTE* pRemainderBytes = (BYTE*)pDataWords;
|
||||
for (uint32_t i = 0; i < sizeRemainderBytes; ++i)
|
||||
{
|
||||
crc = _mm_crc32_u8(crc, *pRemainderBytes++);
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Add byte offset to any-type pointer
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T>
|
||||
INLINE
|
||||
static T* PtrAdd(T* p, intptr_t offset)
|
||||
{
|
||||
intptr_t intp = reinterpret_cast<intptr_t>(p);
|
||||
return reinterpret_cast<T*>(intp + offset);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Is a power-of-2?
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T>
|
||||
INLINE
|
||||
static bool IsPow2(T value)
|
||||
{
|
||||
return value == (value & (0 - value));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Align down to specified alignment
|
||||
/// Note: IsPow2(alignment) MUST be true
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T1, typename T2>
|
||||
INLINE
|
||||
static T1 AlignDownPow2(T1 value, T2 alignment)
|
||||
{
|
||||
SWR_ASSERT(IsPow2(alignment));
|
||||
return value & ~T1(alignment - 1);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Align up to specified alignment
|
||||
/// Note: IsPow2(alignment) MUST be true
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T1, typename T2>
|
||||
INLINE
|
||||
static T1 AlignUpPow2(T1 value, T2 alignment)
|
||||
{
|
||||
return AlignDownPow2(value + T1(alignment - 1), alignment);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Align up ptr to specified alignment
|
||||
/// Note: IsPow2(alignment) MUST be true
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T1, typename T2>
|
||||
INLINE
|
||||
static T1* AlignUpPow2(T1* value, T2 alignment)
|
||||
{
|
||||
return reinterpret_cast<T1*>(
|
||||
AlignDownPow2(reinterpret_cast<uintptr_t>(value) + uintptr_t(alignment - 1), alignment));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Align down to specified alignment
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T1, typename T2>
|
||||
INLINE
|
||||
static T1 AlignDown(T1 value, T2 alignment)
|
||||
{
|
||||
if (IsPow2(alignment)) { return AlignDownPow2(value, alignment); }
|
||||
return value - T1(value % alignment);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Align down to specified alignment
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T1, typename T2>
|
||||
INLINE
|
||||
static T1* AlignDown(T1* value, T2 alignment)
|
||||
{
|
||||
return (T1*)AlignDown(uintptr_t(value), alignment);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Align up to specified alignment
|
||||
/// Note: IsPow2(alignment) MUST be true
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T1, typename T2>
|
||||
INLINE
|
||||
static T1 AlignUp(T1 value, T2 alignment)
|
||||
{
|
||||
return AlignDown(value + T1(alignment - 1), alignment);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Align up to specified alignment
|
||||
/// Note: IsPow2(alignment) MUST be true
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T1, typename T2>
|
||||
INLINE
|
||||
static T1* AlignUp(T1* value, T2 alignment)
|
||||
{
|
||||
return AlignDown(PtrAdd(value, alignment - 1), alignment);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Helper structure used to access an array of elements that don't
|
||||
/// correspond to a typical word size.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<typename T, size_t BitsPerElementT, size_t ArrayLenT>
|
||||
class BitsArray
|
||||
{
|
||||
private:
|
||||
static const size_t BITS_PER_WORD = sizeof(size_t) * 8;
|
||||
static const size_t ELEMENTS_PER_WORD = BITS_PER_WORD / BitsPerElementT;
|
||||
static const size_t NUM_WORDS = (ArrayLenT + ELEMENTS_PER_WORD - 1) / ELEMENTS_PER_WORD;
|
||||
static const size_t ELEMENT_MASK = (size_t(1) << BitsPerElementT) - 1;
|
||||
|
||||
static_assert(ELEMENTS_PER_WORD * BitsPerElementT == BITS_PER_WORD,
|
||||
"Element size must an integral fraction of pointer size");
|
||||
|
||||
size_t m_words[NUM_WORDS] = {};
|
||||
|
||||
public:
|
||||
|
||||
T operator[] (size_t elementIndex) const
|
||||
{
|
||||
size_t word = m_words[elementIndex / ELEMENTS_PER_WORD];
|
||||
word >>= ((elementIndex % ELEMENTS_PER_WORD) * BitsPerElementT);
|
||||
return T(word & ELEMENT_MASK);
|
||||
}
|
||||
};
|
|
@ -0,0 +1,313 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file JitManager.cpp
|
||||
*
|
||||
* @brief Implementation if the Jit Manager.
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#if defined(_WIN32)
|
||||
#pragma warning(disable: 4800 4146 4244 4267 4355 4996)
|
||||
#endif
|
||||
|
||||
#include "jit_api.h"
|
||||
#include "JitManager.h"
|
||||
#include "fetch_jit.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#endif
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/Support/DynamicLibrary.h"
|
||||
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/SourceMgr.h"
|
||||
|
||||
#include "llvm/Analysis/CFGPrinter.h"
|
||||
#include "llvm/IRReader/IRReader.h"
|
||||
|
||||
#include "core/state.h"
|
||||
#include "common/containers.hpp"
|
||||
|
||||
#include "state_llvm.h"
|
||||
|
||||
#include <sstream>
|
||||
#if defined(_WIN32)
|
||||
#include <psapi.h>
|
||||
#include <cstring>
|
||||
|
||||
#define INTEL_OUTPUT_DIR "c:\\Intel"
|
||||
#define SWR_OUTPUT_DIR INTEL_OUTPUT_DIR "\\SWR"
|
||||
#define JITTER_OUTPUT_DIR SWR_OUTPUT_DIR "\\Jitter"
|
||||
#endif
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Contructor for JitManager.
|
||||
/// @param simdWidth - SIMD width to be used in generated program.
|
||||
JitManager::JitManager(uint32_t simdWidth, const char *arch)
|
||||
: mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth), mArch(arch)
|
||||
{
|
||||
InitializeNativeTarget();
|
||||
InitializeNativeTargetAsmPrinter();
|
||||
InitializeNativeTargetDisassembler();
|
||||
|
||||
TargetOptions tOpts;
|
||||
tOpts.AllowFPOpFusion = FPOpFusion::Fast;
|
||||
tOpts.NoInfsFPMath = false;
|
||||
tOpts.NoNaNsFPMath = false;
|
||||
tOpts.UnsafeFPMath = true;
|
||||
#if defined(_DEBUG)
|
||||
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7
|
||||
tOpts.NoFramePointerElim = true;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//tOpts.PrintMachineCode = true;
|
||||
|
||||
std::stringstream fnName("JitModule", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
|
||||
fnName << mJitNumber++;
|
||||
std::unique_ptr<Module> newModule(new Module(fnName.str(), mContext));
|
||||
mpCurrentModule = newModule.get();
|
||||
|
||||
auto &&EB = EngineBuilder(std::move(newModule));
|
||||
EB.setTargetOptions(tOpts);
|
||||
EB.setOptLevel(CodeGenOpt::Aggressive);
|
||||
|
||||
StringRef hostCPUName;
|
||||
|
||||
// force JIT to use the same CPU arch as the rest of swr
|
||||
if(mArch.AVX512F())
|
||||
{
|
||||
assert(0 && "Implement AVX512 jitter");
|
||||
hostCPUName = sys::getHostCPUName();
|
||||
if (mVWidth == 0)
|
||||
{
|
||||
mVWidth = 16;
|
||||
}
|
||||
}
|
||||
else if(mArch.AVX2())
|
||||
{
|
||||
hostCPUName = StringRef("core-avx2");
|
||||
if (mVWidth == 0)
|
||||
{
|
||||
mVWidth = 8;
|
||||
}
|
||||
}
|
||||
else if(mArch.AVX())
|
||||
{
|
||||
if (mArch.F16C())
|
||||
{
|
||||
hostCPUName = StringRef("core-avx-i");
|
||||
}
|
||||
else
|
||||
{
|
||||
hostCPUName = StringRef("corei7-avx");
|
||||
}
|
||||
if (mVWidth == 0)
|
||||
{
|
||||
mVWidth = 8;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
hostCPUName = sys::getHostCPUName();
|
||||
if (mVWidth == 0)
|
||||
{
|
||||
mVWidth = 8; // 4?
|
||||
}
|
||||
}
|
||||
|
||||
EB.setMCPU(hostCPUName);
|
||||
|
||||
#if defined(_WIN32)
|
||||
// Needed for MCJIT on windows
|
||||
Triple hostTriple(sys::getProcessTriple());
|
||||
hostTriple.setObjectFormat(Triple::ELF);
|
||||
mpCurrentModule->setTargetTriple(hostTriple.getTriple());
|
||||
#endif // _WIN32
|
||||
|
||||
mpExec = EB.create();
|
||||
|
||||
#if LLVM_USE_INTEL_JITEVENTS
|
||||
JITEventListener *vTune = JITEventListener::createIntelJITEventListener();
|
||||
mpExec->RegisterJITEventListener(vTune);
|
||||
#endif
|
||||
|
||||
mFP32Ty = Type::getFloatTy(mContext); // float type
|
||||
mInt8Ty = Type::getInt8Ty(mContext);
|
||||
mInt32Ty = Type::getInt32Ty(mContext); // int type
|
||||
mInt64Ty = Type::getInt64Ty(mContext); // int type
|
||||
mV4FP32Ty = StructType::get(mContext, std::vector<Type*>(4, mFP32Ty), false); // vector4 float type (represented as structure)
|
||||
mV4Int32Ty = StructType::get(mContext, std::vector<Type*>(4, mInt32Ty), false); // vector4 int type
|
||||
|
||||
// fetch function signature
|
||||
// typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
|
||||
std::vector<Type*> fsArgs;
|
||||
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
|
||||
fsArgs.push_back(PointerType::get(Gen_simdvertex(this), 0));
|
||||
|
||||
mFetchShaderTy = FunctionType::get(Type::getVoidTy(mContext), fsArgs, false);
|
||||
|
||||
mSimtFP32Ty = VectorType::get(mFP32Ty, mVWidth);
|
||||
mSimtInt32Ty = VectorType::get(mInt32Ty, mVWidth);
|
||||
|
||||
mSimdVectorTy = StructType::get(mContext, std::vector<Type*>(4, mSimtFP32Ty), false);
|
||||
mSimdVectorInt32Ty = StructType::get(mContext, std::vector<Type*>(4, mSimtInt32Ty), false);
|
||||
|
||||
#if defined(_WIN32)
|
||||
// explicitly instantiate used symbols from potentially staticly linked libs
|
||||
sys::DynamicLibrary::AddSymbol("exp2f", &exp2f);
|
||||
sys::DynamicLibrary::AddSymbol("log2f", &log2f);
|
||||
sys::DynamicLibrary::AddSymbol("sinf", &sinf);
|
||||
sys::DynamicLibrary::AddSymbol("cosf", &cosf);
|
||||
sys::DynamicLibrary::AddSymbol("powf", &powf);
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
if (KNOB_DUMP_SHADER_IR)
|
||||
{
|
||||
CreateDirectory(INTEL_OUTPUT_DIR, NULL);
|
||||
CreateDirectory(SWR_OUTPUT_DIR, NULL);
|
||||
CreateDirectory(JITTER_OUTPUT_DIR, NULL);
|
||||
}
|
||||
|
||||
///@todo Figure out a better solution for this.
|
||||
// Redirect stdin, stdout, and stderr to attached console.
|
||||
freopen("CONIN$", "r", stdin);
|
||||
freopen("CONOUT$", "w", stdout);
|
||||
freopen("CONOUT$", "w", stderr);
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Create new LLVM module.
|
||||
void JitManager::SetupNewModule()
|
||||
{
|
||||
SWR_ASSERT(mIsModuleFinalized == true && "Current module is not finalized!");
|
||||
|
||||
std::stringstream fnName("JitModule", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
|
||||
fnName << mJitNumber++;
|
||||
std::unique_ptr<Module> newModule(new Module(fnName.str(), mContext));
|
||||
mpCurrentModule = newModule.get();
|
||||
#if defined(_WIN32)
|
||||
// Needed for MCJIT on windows
|
||||
Triple hostTriple(sys::getProcessTriple());
|
||||
hostTriple.setObjectFormat(Triple::ELF);
|
||||
newModule->setTargetTriple(hostTriple.getTriple());
|
||||
#endif // _WIN32
|
||||
|
||||
mpExec->addModule(std::move(newModule));
|
||||
mIsModuleFinalized = false;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Create new LLVM module from IR.
|
||||
bool JitManager::SetupModuleFromIR(const uint8_t *pIR)
|
||||
{
|
||||
std::unique_ptr<MemoryBuffer> pMem = MemoryBuffer::getMemBuffer(StringRef((const char*)pIR), "");
|
||||
|
||||
SMDiagnostic Err;
|
||||
std::unique_ptr<Module> newModule = parseIR(pMem.get()->getMemBufferRef(), Err, mContext);
|
||||
|
||||
if (newModule == nullptr)
|
||||
{
|
||||
SWR_ASSERT(0, "Parse failed! Check Err for details.");
|
||||
return false;
|
||||
}
|
||||
|
||||
mpCurrentModule = newModule.get();
|
||||
#if defined(_WIN32)
|
||||
// Needed for MCJIT on windows
|
||||
Triple hostTriple(sys::getProcessTriple());
|
||||
hostTriple.setObjectFormat(Triple::ELF);
|
||||
newModule->setTargetTriple(hostTriple.getTriple());
|
||||
#endif // _WIN32
|
||||
|
||||
mpExec->addModule(std::move(newModule));
|
||||
mIsModuleFinalized = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Dump function to file.
|
||||
void JitManager::DumpToFile(Function *f, const char *fileName)
|
||||
{
|
||||
if (KNOB_DUMP_SHADER_IR)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
DWORD pid = GetCurrentProcessId();
|
||||
TCHAR procname[MAX_PATH];
|
||||
GetModuleFileName(NULL, procname, MAX_PATH);
|
||||
const char* pBaseName = strrchr(procname, '\\');
|
||||
std::stringstream outDir;
|
||||
outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
|
||||
CreateDirectory(outDir.str().c_str(), NULL);
|
||||
#endif
|
||||
|
||||
std::error_code EC;
|
||||
const char *funcName = f->getName().data();
|
||||
char fName[256];
|
||||
#if defined(_WIN32)
|
||||
sprintf(fName, "%s\\%s.%s.ll", outDir.str().c_str(), funcName, fileName);
|
||||
#else
|
||||
sprintf(fName, "%s.%s.ll", funcName, fileName);
|
||||
#endif
|
||||
raw_fd_ostream fd(fName, EC, llvm::sys::fs::F_None);
|
||||
Module* pModule = f->getParent();
|
||||
pModule->print(fd, nullptr);
|
||||
|
||||
#if defined(_WIN32)
|
||||
sprintf(fName, "%s\\cfg.%s.%s.dot", outDir.str().c_str(), funcName, fileName);
|
||||
#else
|
||||
sprintf(fName, "cfg.%s.%s.dot", funcName, fileName);
|
||||
#endif
|
||||
fd.flush();
|
||||
|
||||
raw_fd_ostream fd_cfg(fName, EC, llvm::sys::fs::F_Text);
|
||||
WriteGraph(fd_cfg, (const Function*)f);
|
||||
|
||||
fd_cfg.flush();
|
||||
}
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Create JIT context.
|
||||
/// @param simdWidth - SIMD width to be used in generated program.
|
||||
HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char* arch)
|
||||
{
|
||||
return new JitManager(targetSimdWidth, arch);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Destroy JIT context.
|
||||
void JITCALL JitDestroyContext(HANDLE hJitContext)
|
||||
{
|
||||
delete reinterpret_cast<JitManager*>(hJitContext);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,186 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file JitManager.h
|
||||
*
|
||||
* @brief JitManager contains the LLVM data structures used for JIT generation
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include "common/os.h"
|
||||
#include "common/isa.hpp"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#pragma warning(disable : 4146 4244 4267 4800 4996)
|
||||
#endif
|
||||
|
||||
// llvm 3.7+ reuses "DEBUG" as an enum value
|
||||
#pragma push_macro("DEBUG")
|
||||
#undef DEBUG
|
||||
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
|
||||
#include "llvm/Config/llvm-config.h"
|
||||
#ifndef LLVM_VERSION_MAJOR
|
||||
#include "llvm/Config/config.h"
|
||||
#endif
|
||||
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/ExecutionEngine/MCJIT.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#define LLVM_F_NONE sys::fs::F_None
|
||||
|
||||
#include "llvm/Analysis/Passes.h"
|
||||
|
||||
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 6
|
||||
#include "llvm/PassManager.h"
|
||||
#else
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
using namespace llvm::legacy;
|
||||
#endif
|
||||
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/ExecutionEngine/ExecutionEngine.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
#include "llvm/Transforms/IPO.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Support/Host.h"
|
||||
|
||||
|
||||
#pragma pop_macro("DEBUG")
|
||||
|
||||
using namespace llvm;
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// JitInstructionSet
|
||||
/// @brief Subclass of InstructionSet that allows users to override
|
||||
/// the reporting of support for certain ISA features. This allows capping
|
||||
/// the jitted code to a certain feature level, e.g. jit AVX level code on
|
||||
/// a platform that supports AVX2.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
class JitInstructionSet : public InstructionSet
|
||||
{
|
||||
public:
|
||||
JitInstructionSet(const char* requestedIsa) : isaRequest(requestedIsa)
|
||||
{
|
||||
std::transform(isaRequest.begin(), isaRequest.end(), isaRequest.begin(), ::tolower);
|
||||
|
||||
if(isaRequest == "avx")
|
||||
{
|
||||
bForceAVX = true;
|
||||
bForceAVX2 = false;
|
||||
bForceAVX512 = false;
|
||||
}
|
||||
else if(isaRequest == "avx2")
|
||||
{
|
||||
bForceAVX = false;
|
||||
bForceAVX2 = true;
|
||||
bForceAVX512 = false;
|
||||
}
|
||||
#if 0
|
||||
else if(isaRequest == "avx512")
|
||||
{
|
||||
bForceAVX = false;
|
||||
bForceAVX2 = false;
|
||||
bForceAVX512 = true;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
bool AVX2(void) { return bForceAVX ? 0 : InstructionSet::AVX2(); }
|
||||
bool AVX512F(void) { return (bForceAVX | bForceAVX2) ? 0 : InstructionSet::AVX512F(); }
|
||||
bool BMI2(void) { return bForceAVX ? 0 : InstructionSet::BMI2(); }
|
||||
|
||||
private:
|
||||
bool bForceAVX = false;
|
||||
bool bForceAVX2 = false;
|
||||
bool bForceAVX512 = false;
|
||||
std::string isaRequest;
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct JitLLVMContext : LLVMContext
|
||||
{
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// JitManager
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct JitManager
|
||||
{
|
||||
JitManager(uint32_t w, const char *arch);
|
||||
~JitManager(){};
|
||||
|
||||
JitLLVMContext mContext; ///< LLVM compiler
|
||||
IRBuilder<> mBuilder; ///< LLVM IR Builder
|
||||
ExecutionEngine* mpExec;
|
||||
|
||||
// Need to be rebuilt after a JIT and before building new IR
|
||||
Module* mpCurrentModule;
|
||||
bool mIsModuleFinalized;
|
||||
uint32_t mJitNumber;
|
||||
|
||||
uint32_t mVWidth;
|
||||
|
||||
// Built in types.
|
||||
Type* mInt8Ty;
|
||||
Type* mInt32Ty;
|
||||
Type* mInt64Ty;
|
||||
Type* mFP32Ty;
|
||||
StructType* mV4FP32Ty;
|
||||
StructType* mV4Int32Ty;
|
||||
|
||||
// helper scalar function types
|
||||
FunctionType* mUnaryFPTy;
|
||||
FunctionType* mBinaryFPTy;
|
||||
FunctionType* mTrinaryFPTy;
|
||||
FunctionType* mUnaryIntTy;
|
||||
FunctionType* mBinaryIntTy;
|
||||
FunctionType* mTrinaryIntTy;
|
||||
|
||||
Type* mSimtFP32Ty;
|
||||
Type* mSimtInt32Ty;
|
||||
|
||||
Type* mSimdVectorInt32Ty;
|
||||
Type* mSimdVectorTy;
|
||||
|
||||
// fetch shader types
|
||||
FunctionType* mFetchShaderTy;
|
||||
|
||||
JitInstructionSet mArch;
|
||||
|
||||
void SetupNewModule();
|
||||
bool SetupModuleFromIR(const uint8_t *pIR);
|
||||
|
||||
static void DumpToFile(Function *f, const char *fileName);
|
||||
};
|
|
@ -0,0 +1,772 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file blend_jit.cpp
|
||||
*
|
||||
* @brief Implementation of the blend jitter
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#include "jit_api.h"
|
||||
#include "blend_jit.h"
|
||||
#include "builder.h"
|
||||
#include "state_llvm.h"
|
||||
#include "common/containers.hpp"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
// components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized
|
||||
#define QUANTIZE_THRESHOLD 2
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Interface to Jitting a blend shader
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct BlendJit : public Builder
|
||||
{
|
||||
BlendJit(JitManager* pJitMgr) : Builder(pJitMgr){};
|
||||
|
||||
template<bool Color, bool Alpha>
|
||||
void GenerateBlendFactor(SWR_BLEND_FACTOR factor, Value* constColor[4], Value* src[4], Value* src1[4], Value* dst[4], Value* result[4])
|
||||
{
|
||||
Value* out[4];
|
||||
|
||||
switch (factor)
|
||||
{
|
||||
case BLENDFACTOR_ONE:
|
||||
out[0] = out[1] = out[2] = out[3] = VIMMED1(1.0f);
|
||||
break;
|
||||
case BLENDFACTOR_SRC_COLOR:
|
||||
out[0] = src[0];
|
||||
out[1] = src[1];
|
||||
out[2] = src[2];
|
||||
out[3] = src[3];
|
||||
break;
|
||||
case BLENDFACTOR_SRC_ALPHA:
|
||||
out[0] = out[1] = out[2] = out[3] = src[3];
|
||||
break;
|
||||
case BLENDFACTOR_DST_ALPHA:
|
||||
out[0] = out[1] = out[2] = out[3] = dst[3];
|
||||
break;
|
||||
case BLENDFACTOR_DST_COLOR:
|
||||
out[0] = dst[0];
|
||||
out[1] = dst[1];
|
||||
out[2] = dst[2];
|
||||
out[3] = dst[3];
|
||||
break;
|
||||
case BLENDFACTOR_SRC_ALPHA_SATURATE:
|
||||
out[0] = out[1] = out[2] = VMINPS(src[3], FSUB(VIMMED1(1.0f), dst[3]));
|
||||
out[3] = VIMMED1(1.0f);
|
||||
break;
|
||||
case BLENDFACTOR_CONST_COLOR:
|
||||
out[0] = constColor[0];
|
||||
out[1] = constColor[1];
|
||||
out[2] = constColor[2];
|
||||
out[3] = constColor[3];
|
||||
break;
|
||||
case BLENDFACTOR_CONST_ALPHA:
|
||||
out[0] = out[1] = out[2] = out[3] = constColor[3];
|
||||
break;
|
||||
case BLENDFACTOR_SRC1_COLOR:
|
||||
out[0] = src1[0];
|
||||
out[1] = src1[1];
|
||||
out[2] = src1[2];
|
||||
out[3] = src1[3];
|
||||
break;
|
||||
case BLENDFACTOR_SRC1_ALPHA:
|
||||
out[0] = out[1] = out[2] = out[3] = src1[3];
|
||||
break;
|
||||
case BLENDFACTOR_ZERO:
|
||||
out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
|
||||
break;
|
||||
case BLENDFACTOR_INV_SRC_COLOR:
|
||||
out[0] = FSUB(VIMMED1(1.0f), src[0]);
|
||||
out[1] = FSUB(VIMMED1(1.0f), src[1]);
|
||||
out[2] = FSUB(VIMMED1(1.0f), src[2]);
|
||||
out[3] = FSUB(VIMMED1(1.0f), src[3]);
|
||||
break;
|
||||
case BLENDFACTOR_INV_SRC_ALPHA:
|
||||
out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src[3]);
|
||||
break;
|
||||
case BLENDFACTOR_INV_DST_ALPHA:
|
||||
out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), dst[3]);
|
||||
break;
|
||||
case BLENDFACTOR_INV_DST_COLOR:
|
||||
out[0] = FSUB(VIMMED1(1.0f), dst[0]);
|
||||
out[1] = FSUB(VIMMED1(1.0f), dst[1]);
|
||||
out[2] = FSUB(VIMMED1(1.0f), dst[2]);
|
||||
out[3] = FSUB(VIMMED1(1.0f), dst[3]);
|
||||
break;
|
||||
case BLENDFACTOR_INV_CONST_COLOR:
|
||||
out[0] = FSUB(VIMMED1(1.0f), constColor[0]);
|
||||
out[1] = FSUB(VIMMED1(1.0f), constColor[1]);
|
||||
out[2] = FSUB(VIMMED1(1.0f), constColor[2]);
|
||||
out[3] = FSUB(VIMMED1(1.0f), constColor[3]);
|
||||
break;
|
||||
case BLENDFACTOR_INV_CONST_ALPHA:
|
||||
out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), constColor[3]);
|
||||
break;
|
||||
case BLENDFACTOR_INV_SRC1_COLOR:
|
||||
out[0] = FSUB(VIMMED1(1.0f), src1[0]);
|
||||
out[1] = FSUB(VIMMED1(1.0f), src1[1]);
|
||||
out[2] = FSUB(VIMMED1(1.0f), src1[2]);
|
||||
out[3] = FSUB(VIMMED1(1.0f), src1[3]);
|
||||
break;
|
||||
case BLENDFACTOR_INV_SRC1_ALPHA:
|
||||
out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src1[3]);
|
||||
break;
|
||||
default:
|
||||
SWR_ASSERT(false, "Unsupported blend factor: %d", factor);
|
||||
out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
|
||||
break;
|
||||
}
|
||||
|
||||
if (Color)
|
||||
{
|
||||
result[0] = out[0];
|
||||
result[1] = out[1];
|
||||
result[2] = out[2];
|
||||
}
|
||||
|
||||
if (Alpha)
|
||||
{
|
||||
result[3] = out[3];
|
||||
}
|
||||
}
|
||||
|
||||
void Clamp(SWR_FORMAT format, Value* src[4])
|
||||
{
|
||||
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
|
||||
SWR_TYPE type = info.type[0];
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case SWR_TYPE_FLOAT:
|
||||
break;
|
||||
|
||||
case SWR_TYPE_UNORM:
|
||||
src[0] = VMINPS(VMAXPS(src[0], VIMMED1(0.0f)), VIMMED1(1.0f));
|
||||
src[1] = VMINPS(VMAXPS(src[1], VIMMED1(0.0f)), VIMMED1(1.0f));
|
||||
src[2] = VMINPS(VMAXPS(src[2], VIMMED1(0.0f)), VIMMED1(1.0f));
|
||||
src[3] = VMINPS(VMAXPS(src[3], VIMMED1(0.0f)), VIMMED1(1.0f));
|
||||
break;
|
||||
|
||||
case SWR_TYPE_SNORM:
|
||||
src[0] = VMINPS(VMAXPS(src[0], VIMMED1(-1.0f)), VIMMED1(1.0f));
|
||||
src[1] = VMINPS(VMAXPS(src[1], VIMMED1(-1.0f)), VIMMED1(1.0f));
|
||||
src[2] = VMINPS(VMAXPS(src[2], VIMMED1(-1.0f)), VIMMED1(1.0f));
|
||||
src[3] = VMINPS(VMAXPS(src[3], VIMMED1(-1.0f)), VIMMED1(1.0f));
|
||||
break;
|
||||
|
||||
default: SWR_ASSERT(false, "Unsupport format type: %d", type);
|
||||
}
|
||||
}
|
||||
|
||||
void ApplyDefaults(SWR_FORMAT format, Value* src[4])
|
||||
{
|
||||
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
|
||||
|
||||
bool valid[] = { false, false, false, false };
|
||||
for (uint32_t c = 0; c < info.numComps; ++c)
|
||||
{
|
||||
valid[info.swizzle[c]] = true;
|
||||
}
|
||||
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
if (!valid[c])
|
||||
{
|
||||
src[c] = BITCAST(VIMMED1((int)info.defaults[c]), mSimdFP32Ty);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ApplyUnusedDefaults(SWR_FORMAT format, Value* src[4])
|
||||
{
|
||||
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
|
||||
|
||||
for (uint32_t c = 0; c < info.numComps; ++c)
|
||||
{
|
||||
if (info.type[c] == SWR_TYPE_UNUSED)
|
||||
{
|
||||
src[info.swizzle[c]] = BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Quantize(SWR_FORMAT format, Value* src[4])
|
||||
{
|
||||
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
|
||||
for (uint32_t c = 0; c < info.numComps; ++c)
|
||||
{
|
||||
if (info.bpc[c] <= QUANTIZE_THRESHOLD)
|
||||
{
|
||||
uint32_t swizComp = info.swizzle[c];
|
||||
float factor = (float)((1 << info.bpc[c]) - 1);
|
||||
switch (info.type[c])
|
||||
{
|
||||
case SWR_TYPE_UNORM:
|
||||
src[swizComp] = FADD(FMUL(src[swizComp], VIMMED1(factor)), VIMMED1(0.5f));
|
||||
src[swizComp] = VROUND(src[swizComp], C(_MM_FROUND_TO_ZERO));
|
||||
src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f /factor));
|
||||
break;
|
||||
default: SWR_ASSERT(false, "Unsupported format type: %d", info.type[c]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<bool Color, bool Alpha>
|
||||
void BlendFunc(SWR_BLEND_OP blendOp, Value* src[4], Value* srcFactor[4], Value* dst[4], Value* dstFactor[4], Value* result[4])
|
||||
{
|
||||
Value* out[4];
|
||||
Value* srcBlend[4];
|
||||
Value* dstBlend[4];
|
||||
for (uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
srcBlend[i] = FMUL(src[i], srcFactor[i]);
|
||||
dstBlend[i] = FMUL(dst[i], dstFactor[i]);
|
||||
}
|
||||
|
||||
switch (blendOp)
|
||||
{
|
||||
case BLENDOP_ADD:
|
||||
out[0] = FADD(srcBlend[0], dstBlend[0]);
|
||||
out[1] = FADD(srcBlend[1], dstBlend[1]);
|
||||
out[2] = FADD(srcBlend[2], dstBlend[2]);
|
||||
out[3] = FADD(srcBlend[3], dstBlend[3]);
|
||||
break;
|
||||
|
||||
case BLENDOP_SUBTRACT:
|
||||
out[0] = FSUB(srcBlend[0], dstBlend[0]);
|
||||
out[1] = FSUB(srcBlend[1], dstBlend[1]);
|
||||
out[2] = FSUB(srcBlend[2], dstBlend[2]);
|
||||
out[3] = FSUB(srcBlend[3], dstBlend[3]);
|
||||
break;
|
||||
|
||||
case BLENDOP_REVSUBTRACT:
|
||||
out[0] = FSUB(dstBlend[0], srcBlend[0]);
|
||||
out[1] = FSUB(dstBlend[1], srcBlend[1]);
|
||||
out[2] = FSUB(dstBlend[2], srcBlend[2]);
|
||||
out[3] = FSUB(dstBlend[3], srcBlend[3]);
|
||||
break;
|
||||
|
||||
case BLENDOP_MIN:
|
||||
out[0] = VMINPS(src[0], dst[0]);
|
||||
out[1] = VMINPS(src[1], dst[1]);
|
||||
out[2] = VMINPS(src[2], dst[2]);
|
||||
out[3] = VMINPS(src[3], dst[3]);
|
||||
break;
|
||||
|
||||
case BLENDOP_MAX:
|
||||
out[0] = VMAXPS(src[0], dst[0]);
|
||||
out[1] = VMAXPS(src[1], dst[1]);
|
||||
out[2] = VMAXPS(src[2], dst[2]);
|
||||
out[3] = VMAXPS(src[3], dst[3]);
|
||||
break;
|
||||
|
||||
default:
|
||||
SWR_ASSERT(false, "Unsupported blend operation: %d", blendOp);
|
||||
out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
|
||||
break;
|
||||
}
|
||||
|
||||
if (Color)
|
||||
{
|
||||
result[0] = out[0];
|
||||
result[1] = out[1];
|
||||
result[2] = out[2];
|
||||
}
|
||||
|
||||
if (Alpha)
|
||||
{
|
||||
result[3] = out[3];
|
||||
}
|
||||
}
|
||||
|
||||
void LogicOpFunc(SWR_LOGIC_OP logicOp, Value* src[4], Value* dst[4], Value* result[4])
|
||||
{
|
||||
// Op: (s == PS output, d = RT contents)
|
||||
switch(logicOp)
|
||||
{
|
||||
case LOGICOP_CLEAR:
|
||||
result[0] = VIMMED1(0);
|
||||
result[1] = VIMMED1(0);
|
||||
result[2] = VIMMED1(0);
|
||||
result[3] = VIMMED1(0);
|
||||
break;
|
||||
|
||||
case LOGICOP_NOR:
|
||||
// ~(s | d)
|
||||
result[0] = XOR(OR(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
|
||||
result[1] = XOR(OR(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
|
||||
result[2] = XOR(OR(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
|
||||
result[3] = XOR(OR(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
|
||||
break;
|
||||
|
||||
case LOGICOP_AND_INVERTED:
|
||||
// ~s & d
|
||||
// todo: use avx andnot instr when I can find the intrinsic to call
|
||||
result[0] = AND(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]);
|
||||
result[1] = AND(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]);
|
||||
result[2] = AND(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]);
|
||||
result[3] = AND(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]);
|
||||
break;
|
||||
|
||||
case LOGICOP_COPY_INVERTED:
|
||||
// ~s
|
||||
result[0] = XOR(src[0], VIMMED1(0xFFFFFFFF));
|
||||
result[1] = XOR(src[1], VIMMED1(0xFFFFFFFF));
|
||||
result[2] = XOR(src[2], VIMMED1(0xFFFFFFFF));
|
||||
result[3] = XOR(src[3], VIMMED1(0xFFFFFFFF));
|
||||
break;
|
||||
|
||||
case LOGICOP_AND_REVERSE:
|
||||
// s & ~d
|
||||
// todo: use avx andnot instr when I can find the intrinsic to call
|
||||
result[0] = AND(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]);
|
||||
result[1] = AND(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]);
|
||||
result[2] = AND(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]);
|
||||
result[3] = AND(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]);
|
||||
break;
|
||||
|
||||
case LOGICOP_INVERT:
|
||||
// ~d
|
||||
result[0] = XOR(dst[0], VIMMED1(0xFFFFFFFF));
|
||||
result[1] = XOR(dst[1], VIMMED1(0xFFFFFFFF));
|
||||
result[2] = XOR(dst[2], VIMMED1(0xFFFFFFFF));
|
||||
result[3] = XOR(dst[3], VIMMED1(0xFFFFFFFF));
|
||||
break;
|
||||
|
||||
case LOGICOP_XOR:
|
||||
// s ^ d
|
||||
result[0] = XOR(src[0], dst[0]);
|
||||
result[1] = XOR(src[1], dst[1]);
|
||||
result[2] = XOR(src[2], dst[2]);
|
||||
result[3] = XOR(src[3], dst[3]);
|
||||
break;
|
||||
|
||||
case LOGICOP_NAND:
|
||||
// ~(s & d)
|
||||
result[0] = XOR(AND(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
|
||||
result[1] = XOR(AND(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
|
||||
result[2] = XOR(AND(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
|
||||
result[3] = XOR(AND(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
|
||||
break;
|
||||
|
||||
case LOGICOP_AND:
|
||||
// s & d
|
||||
result[0] = AND(src[0], dst[0]);
|
||||
result[1] = AND(src[1], dst[1]);
|
||||
result[2] = AND(src[2], dst[2]);
|
||||
result[3] = AND(src[3], dst[3]);
|
||||
break;
|
||||
|
||||
case LOGICOP_EQUIV:
|
||||
// ~(s ^ d)
|
||||
result[0] = XOR(XOR(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
|
||||
result[1] = XOR(XOR(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
|
||||
result[2] = XOR(XOR(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
|
||||
result[3] = XOR(XOR(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
|
||||
break;
|
||||
|
||||
case LOGICOP_NOOP:
|
||||
result[0] = dst[0];
|
||||
result[1] = dst[1];
|
||||
result[2] = dst[2];
|
||||
result[3] = dst[3];
|
||||
break;
|
||||
|
||||
case LOGICOP_OR_INVERTED:
|
||||
// ~s | d
|
||||
result[0] = OR(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]);
|
||||
result[1] = OR(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]);
|
||||
result[2] = OR(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]);
|
||||
result[3] = OR(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]);
|
||||
break;
|
||||
|
||||
case LOGICOP_COPY:
|
||||
result[0] = src[0];
|
||||
result[1] = src[1];
|
||||
result[2] = src[2];
|
||||
result[3] = src[3];
|
||||
break;
|
||||
|
||||
case LOGICOP_OR_REVERSE:
|
||||
// s | ~d
|
||||
result[0] = OR(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]);
|
||||
result[1] = OR(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]);
|
||||
result[2] = OR(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]);
|
||||
result[3] = OR(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]);
|
||||
break;
|
||||
|
||||
case LOGICOP_OR:
|
||||
// s | d
|
||||
result[0] = OR(src[0], dst[0]);
|
||||
result[1] = OR(src[1], dst[1]);
|
||||
result[2] = OR(src[2], dst[2]);
|
||||
result[3] = OR(src[3], dst[3]);
|
||||
break;
|
||||
|
||||
case LOGICOP_SET:
|
||||
result[0] = VIMMED1(0xFFFFFFFF);
|
||||
result[1] = VIMMED1(0xFFFFFFFF);
|
||||
result[2] = VIMMED1(0xFFFFFFFF);
|
||||
result[3] = VIMMED1(0xFFFFFFFF);
|
||||
break;
|
||||
|
||||
default:
|
||||
SWR_ASSERT(false, "Unsupported logic operation: %d", logicOp);
|
||||
result[0] = result[1] = result[2] = result[3] = VIMMED1(0.0f);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* pAlpha, Value* ppMask)
|
||||
{
|
||||
// load uint32_t reference
|
||||
Value* pRef = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_alphaTestReference }));
|
||||
|
||||
Value* pTest = nullptr;
|
||||
if (state.alphaTestFormat == ALPHA_TEST_UNORM8)
|
||||
{
|
||||
// convert float alpha to unorm8
|
||||
Value* pAlphaU8 = FMUL(pAlpha, VIMMED1(256.0f));
|
||||
pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty);
|
||||
|
||||
// compare
|
||||
switch (state.alphaTestFunction)
|
||||
{
|
||||
case ZFUNC_ALWAYS: pTest = VIMMED1(true); break;
|
||||
case ZFUNC_NEVER: pTest = VIMMED1(false); break;
|
||||
case ZFUNC_LT: pTest = ICMP_ULT(pAlphaU8, pRef); break;
|
||||
case ZFUNC_EQ: pTest = ICMP_EQ(pAlphaU8, pRef); break;
|
||||
case ZFUNC_LE: pTest = ICMP_ULE(pAlphaU8, pRef); break;
|
||||
case ZFUNC_GT: pTest = ICMP_UGT(pAlphaU8, pRef); break;
|
||||
case ZFUNC_NE: pTest = ICMP_NE(pAlphaU8, pRef); break;
|
||||
case ZFUNC_GE: pTest = ICMP_UGE(pAlphaU8, pRef); break;
|
||||
default:
|
||||
SWR_ASSERT(false, "Invalid alpha test function");
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// cast ref to float
|
||||
pRef = BITCAST(pRef, mSimdFP32Ty);
|
||||
|
||||
// compare
|
||||
switch (state.alphaTestFunction)
|
||||
{
|
||||
case ZFUNC_ALWAYS: pTest = VIMMED1(true); break;
|
||||
case ZFUNC_NEVER: pTest = VIMMED1(false); break;
|
||||
case ZFUNC_LT: pTest = FCMP_OLT(pAlpha, pRef); break;
|
||||
case ZFUNC_EQ: pTest = FCMP_OEQ(pAlpha, pRef); break;
|
||||
case ZFUNC_LE: pTest = FCMP_OLE(pAlpha, pRef); break;
|
||||
case ZFUNC_GT: pTest = FCMP_OGT(pAlpha, pRef); break;
|
||||
case ZFUNC_NE: pTest = FCMP_ONE(pAlpha, pRef); break;
|
||||
case ZFUNC_GE: pTest = FCMP_OGE(pAlpha, pRef); break;
|
||||
default:
|
||||
SWR_ASSERT(false, "Invalid alpha test function");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// load current mask
|
||||
Value* pMask = LOAD(ppMask);
|
||||
|
||||
// convert to int1 mask
|
||||
pMask = MASK(pMask);
|
||||
|
||||
// and with alpha test result
|
||||
pMask = AND(pMask, pTest);
|
||||
|
||||
// convert back to vector mask
|
||||
pMask = VMASK(pMask);
|
||||
|
||||
// store new mask
|
||||
STORE(pMask, ppMask);
|
||||
}
|
||||
|
||||
Function* Create(const BLEND_COMPILE_STATE& state)
|
||||
{
|
||||
static std::size_t jitNum = 0;
|
||||
|
||||
std::stringstream fnName("BlendShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
|
||||
fnName << jitNum++;
|
||||
|
||||
// blend function signature
|
||||
//typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, BYTE*, simdvector&, simdscalari*, simdscalari*);
|
||||
|
||||
std::vector<Type*> args{
|
||||
PointerType::get(Gen_SWR_BLEND_STATE(JM()), 0), // SWR_BLEND_STATE*
|
||||
PointerType::get(mSimdFP32Ty, 0), // simdvector& src
|
||||
PointerType::get(mSimdFP32Ty, 0), // simdvector& src1
|
||||
Type::getInt32Ty(JM()->mContext), // sampleNum
|
||||
PointerType::get(mSimdFP32Ty, 0), // uint8_t* pDst
|
||||
PointerType::get(mSimdFP32Ty, 0), // simdvector& result
|
||||
PointerType::get(mSimdInt32Ty, 0), // simdscalari* oMask
|
||||
PointerType::get(mSimdInt32Ty, 0), // simdscalari* pMask
|
||||
};
|
||||
|
||||
FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
|
||||
Function* blendFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
|
||||
|
||||
BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc);
|
||||
|
||||
IRB()->SetInsertPoint(entry);
|
||||
|
||||
// arguments
|
||||
auto argitr = blendFunc->getArgumentList().begin();
|
||||
Value* pBlendState = &*argitr++;
|
||||
pBlendState->setName("pBlendState");
|
||||
Value* pSrc = &*argitr++;
|
||||
pSrc->setName("src");
|
||||
Value* pSrc1 = &*argitr++;
|
||||
pSrc1->setName("src1");
|
||||
Value* sampleNum = &*argitr++;
|
||||
sampleNum->setName("sampleNum");
|
||||
Value* pDst = &*argitr++;
|
||||
pDst->setName("pDst");
|
||||
Value* pResult = &*argitr++;
|
||||
pResult->setName("result");
|
||||
Value* ppoMask = &*argitr++;
|
||||
ppoMask->setName("ppoMask");
|
||||
Value* ppMask = &*argitr++;
|
||||
ppMask->setName("pMask");
|
||||
|
||||
static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
|
||||
Value* dst[4];
|
||||
Value* constantColor[4];
|
||||
Value* src[4];
|
||||
Value* src1[4];
|
||||
Value* result[4];
|
||||
for (uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
// load hot tile
|
||||
dst[i] = LOAD(pDst, { i });
|
||||
|
||||
// load constant color
|
||||
constantColor[i] = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_constantColor, i }));
|
||||
|
||||
// load src
|
||||
src[i] = LOAD(pSrc, { i });
|
||||
|
||||
// load src1
|
||||
src1[i] = LOAD(pSrc1, { i });
|
||||
}
|
||||
Value* currentMask = VIMMED1(-1);
|
||||
if(state.desc.alphaToCoverageEnable)
|
||||
{
|
||||
currentMask = FP_TO_SI(FMUL(src[3], VBROADCAST(C((float)state.desc.numSamples))), mSimdInt32Ty);
|
||||
}
|
||||
|
||||
// alpha test
|
||||
if (state.desc.alphaTestEnable)
|
||||
{
|
||||
AlphaTest(state, pBlendState, src[3], ppMask);
|
||||
}
|
||||
|
||||
// color blend
|
||||
if (state.blendState.blendEnable)
|
||||
{
|
||||
// clamp sources
|
||||
Clamp(state.format, src);
|
||||
Clamp(state.format, src1);
|
||||
Clamp(state.format, dst);
|
||||
Clamp(state.format, constantColor);
|
||||
|
||||
// apply defaults to hottile contents to take into account missing components
|
||||
ApplyDefaults(state.format, dst);
|
||||
|
||||
// Force defaults for unused 'X' components
|
||||
ApplyUnusedDefaults(state.format, dst);
|
||||
|
||||
// Quantize low precision components
|
||||
Quantize(state.format, dst);
|
||||
|
||||
// special case clamping for R11G11B10_float which has no sign bit
|
||||
if (state.format == R11G11B10_FLOAT)
|
||||
{
|
||||
dst[0] = VMAXPS(dst[0], VIMMED1(0.0f));
|
||||
dst[1] = VMAXPS(dst[1], VIMMED1(0.0f));
|
||||
dst[2] = VMAXPS(dst[2], VIMMED1(0.0f));
|
||||
dst[3] = VMAXPS(dst[3], VIMMED1(0.0f));
|
||||
}
|
||||
|
||||
Value* srcFactor[4];
|
||||
Value* dstFactor[4];
|
||||
if (state.desc.independentAlphaBlendEnable)
|
||||
{
|
||||
GenerateBlendFactor<true, false>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
|
||||
GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor, constantColor, src, src1, dst, srcFactor);
|
||||
|
||||
GenerateBlendFactor<true, false>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
|
||||
GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor, constantColor, src, src1, dst, dstFactor);
|
||||
|
||||
BlendFunc<true, false>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
|
||||
BlendFunc<false, true>(state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
|
||||
}
|
||||
else
|
||||
{
|
||||
GenerateBlendFactor<true, true>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
|
||||
GenerateBlendFactor<true, true>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
|
||||
|
||||
BlendFunc<true, true>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
|
||||
}
|
||||
|
||||
// store results out
|
||||
for (uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
STORE(result[i], pResult, { i });
|
||||
}
|
||||
}
|
||||
|
||||
if(state.blendState.logicOpEnable)
|
||||
{
|
||||
const SWR_FORMAT_INFO& info = GetFormatInfo(state.format);
|
||||
SWR_ASSERT(info.type[0] == SWR_TYPE_UINT);
|
||||
Value* vMask[4];
|
||||
for(uint32_t i = 0; i < 4; i++)
|
||||
{
|
||||
switch(info.bpc[i])
|
||||
{
|
||||
case 0: vMask[i] = VIMMED1(0x00000000); break;
|
||||
case 2: vMask[i] = VIMMED1(0x00000003); break;
|
||||
case 5: vMask[i] = VIMMED1(0x0000001F); break;
|
||||
case 6: vMask[i] = VIMMED1(0x0000003F); break;
|
||||
case 8: vMask[i] = VIMMED1(0x000000FF); break;
|
||||
case 10: vMask[i] = VIMMED1(0x000003FF); break;
|
||||
case 11: vMask[i] = VIMMED1(0x000007FF); break;
|
||||
case 16: vMask[i] = VIMMED1(0x0000FFFF); break;
|
||||
case 24: vMask[i] = VIMMED1(0x00FFFFFF); break;
|
||||
case 32: vMask[i] = VIMMED1(0xFFFFFFFF); break;
|
||||
default:
|
||||
vMask[i] = VIMMED1(0x0);
|
||||
SWR_ASSERT(0, "Unsupported bpc for logic op\n");
|
||||
break;
|
||||
}
|
||||
src[i] = BITCAST(src[i], mSimdInt32Ty);//, vMask[i]);
|
||||
dst[i] = BITCAST(dst[i], mSimdInt32Ty);
|
||||
}
|
||||
|
||||
LogicOpFunc(state.blendState.logicOpFunc, src, dst, result);
|
||||
|
||||
// store results out
|
||||
for(uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
// clear upper bits from PS output not in RT format after doing logic op
|
||||
result[i] = AND(result[i], vMask[i]);
|
||||
|
||||
STORE(BITCAST(result[i], mSimdFP32Ty), pResult, {i});
|
||||
}
|
||||
}
|
||||
|
||||
if(state.desc.oMaskEnable)
|
||||
{
|
||||
assert(!(state.desc.alphaToCoverageEnable));
|
||||
// load current mask
|
||||
Value* oMask = LOAD(ppoMask);
|
||||
Value* sampleMasked = VBROADCAST(SHL(C(1), sampleNum));
|
||||
oMask = AND(oMask, sampleMasked);
|
||||
currentMask = AND(oMask, currentMask);
|
||||
}
|
||||
|
||||
if(state.desc.sampleMaskEnable)
|
||||
{
|
||||
Value* sampleMask = LOAD(pBlendState, { 0, SWR_BLEND_STATE_sampleMask});
|
||||
Value* sampleMasked = SHL(C(1), sampleNum);
|
||||
sampleMask = AND(sampleMask, sampleMasked);
|
||||
sampleMask = VBROADCAST(ICMP_SGT(sampleMask, C(0)));
|
||||
sampleMask = S_EXT(sampleMask, mSimdInt32Ty);
|
||||
currentMask = AND(sampleMask, currentMask);
|
||||
}
|
||||
|
||||
if(state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable ||
|
||||
state.desc.oMaskEnable)
|
||||
{
|
||||
// load current mask
|
||||
Value* pMask = LOAD(ppMask);
|
||||
currentMask = S_EXT(ICMP_SGT(currentMask, VBROADCAST(C(0))), mSimdInt32Ty);
|
||||
Value* outputMask = AND(pMask, currentMask);
|
||||
// store new mask
|
||||
STORE(outputMask, GEP(ppMask, C(0)));
|
||||
}
|
||||
|
||||
RET_VOID();
|
||||
|
||||
JitManager::DumpToFile(blendFunc, "");
|
||||
|
||||
FunctionPassManager passes(JM()->mpCurrentModule);
|
||||
passes.add(createBreakCriticalEdgesPass());
|
||||
passes.add(createCFGSimplificationPass());
|
||||
passes.add(createEarlyCSEPass());
|
||||
passes.add(createPromoteMemoryToRegisterPass());
|
||||
passes.add(createCFGSimplificationPass());
|
||||
passes.add(createEarlyCSEPass());
|
||||
passes.add(createInstructionCombiningPass());
|
||||
passes.add(createInstructionSimplifierPass());
|
||||
passes.add(createConstantPropagationPass());
|
||||
passes.add(createSCCPPass());
|
||||
passes.add(createAggressiveDCEPass());
|
||||
|
||||
passes.run(*blendFunc);
|
||||
|
||||
JitManager::DumpToFile(blendFunc, "optimized");
|
||||
|
||||
return blendFunc;
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief JITs from fetch shader IR
|
||||
/// @param hJitMgr - JitManager handle
|
||||
/// @param func - LLVM function IR
|
||||
/// @return PFN_FETCH_FUNC - pointer to fetch code
|
||||
PFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc)
|
||||
{
|
||||
const llvm::Function *func = (const llvm::Function*)hFunc;
|
||||
JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
|
||||
PFN_BLEND_JIT_FUNC pfnBlend;
|
||||
pfnBlend = (PFN_BLEND_JIT_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
|
||||
// MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
|
||||
pJitMgr->mIsModuleFinalized = true;
|
||||
|
||||
return pfnBlend;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief JIT compiles blend shader
|
||||
/// @param hJitMgr - JitManager handle
|
||||
/// @param state - blend state to build function from
|
||||
extern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr, const BLEND_COMPILE_STATE& state)
|
||||
{
|
||||
JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
|
||||
|
||||
pJitMgr->SetupNewModule();
|
||||
|
||||
BlendJit theJit(pJitMgr);
|
||||
HANDLE hFunc = theJit.Create(state);
|
||||
|
||||
return JitBlendFunc(hJitMgr, hFunc);
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file blend_jit.h
|
||||
*
|
||||
* @brief Definition of the blend jitter
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include "common/formats.h"
|
||||
#include "core/context.h"
|
||||
#include "core/state.h"
|
||||
|
||||
struct RENDER_TARGET_BLEND_COMPILE_STATE
|
||||
{
|
||||
bool blendEnable;
|
||||
bool logicOpEnable;
|
||||
SWR_BLEND_FACTOR sourceAlphaBlendFactor;
|
||||
SWR_BLEND_FACTOR destAlphaBlendFactor;
|
||||
SWR_BLEND_FACTOR sourceBlendFactor;
|
||||
SWR_BLEND_FACTOR destBlendFactor;
|
||||
SWR_BLEND_OP colorBlendFunc;
|
||||
SWR_BLEND_OP alphaBlendFunc;
|
||||
SWR_LOGIC_OP logicOpFunc;
|
||||
};
|
||||
|
||||
enum ALPHA_TEST_FORMAT
|
||||
{
|
||||
ALPHA_TEST_UNORM8,
|
||||
ALPHA_TEST_FLOAT32
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// BLEND_DESC
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct BLEND_DESC
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t alphaTestEnable: 1;
|
||||
uint32_t independentAlphaBlendEnable: 1;
|
||||
uint32_t alphaToCoverageEnable: 1;
|
||||
uint32_t oMaskEnable:1;
|
||||
uint32_t inputCoverageEnable:1;
|
||||
uint32_t sampleMaskEnable:1;
|
||||
uint32_t numSamples:5;
|
||||
uint32_t _reserved : 21;
|
||||
};
|
||||
uint32_t bits;
|
||||
};
|
||||
};
|
||||
#define BLEND_ENABLE_MASK 0x3D // a2c | oMaskEnable | inputCoverageEnable | sampleMaskEnable
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// State required for blend jit
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct BLEND_COMPILE_STATE
|
||||
{
|
||||
SWR_FORMAT format; // format of render target being blended
|
||||
RENDER_TARGET_BLEND_COMPILE_STATE blendState;
|
||||
BLEND_DESC desc;
|
||||
|
||||
SWR_ZFUNCTION alphaTestFunction;
|
||||
ALPHA_TEST_FORMAT alphaTestFormat;
|
||||
|
||||
bool operator==(const BLEND_COMPILE_STATE& other) const
|
||||
{
|
||||
return memcmp(this, &other, sizeof(BLEND_COMPILE_STATE)) == 0;
|
||||
}
|
||||
};
|
|
@ -0,0 +1,71 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file builder.h
|
||||
*
|
||||
* @brief Includes all the builder related functionality
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "builder.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Contructor for Builder.
|
||||
/// @param pJitMgr - JitManager which contains modules, function passes, etc.
|
||||
Builder::Builder(JitManager *pJitMgr)
|
||||
: mpJitMgr(pJitMgr)
|
||||
{
|
||||
mpIRBuilder = &pJitMgr->mBuilder;
|
||||
|
||||
mVoidTy = Type::getVoidTy(pJitMgr->mContext);
|
||||
mFP16Ty = Type::getHalfTy(pJitMgr->mContext);
|
||||
mFP32Ty = Type::getFloatTy(pJitMgr->mContext);
|
||||
mDoubleTy = Type::getDoubleTy(pJitMgr->mContext);
|
||||
mInt1Ty = Type::getInt1Ty(pJitMgr->mContext);
|
||||
mInt8Ty = Type::getInt8Ty(pJitMgr->mContext);
|
||||
mInt16Ty = Type::getInt16Ty(pJitMgr->mContext);
|
||||
mInt32Ty = Type::getInt32Ty(pJitMgr->mContext);
|
||||
mInt64Ty = Type::getInt64Ty(pJitMgr->mContext);
|
||||
mV4FP32Ty = StructType::get(pJitMgr->mContext, std::vector<Type*>(4, mFP32Ty), false); // vector4 float type (represented as structure)
|
||||
mV4Int32Ty = StructType::get(pJitMgr->mContext, std::vector<Type*>(4, mInt32Ty), false); // vector4 int type
|
||||
mSimdInt16Ty = VectorType::get(mInt16Ty, mpJitMgr->mVWidth);
|
||||
mSimdInt32Ty = VectorType::get(mInt32Ty, mpJitMgr->mVWidth);
|
||||
mSimdInt64Ty = VectorType::get(mInt64Ty, mpJitMgr->mVWidth);
|
||||
mSimdFP16Ty = VectorType::get(mFP16Ty, mpJitMgr->mVWidth);
|
||||
mSimdFP32Ty = VectorType::get(mFP32Ty, mpJitMgr->mVWidth);
|
||||
|
||||
if (sizeof(uint32_t*) == 4)
|
||||
{
|
||||
mIntPtrTy = mInt32Ty;
|
||||
mSimdIntPtrTy = mSimdInt32Ty;
|
||||
}
|
||||
else
|
||||
{
|
||||
SWR_ASSERT(sizeof(uint32_t*) == 8);
|
||||
mIntPtrTy = mInt64Ty;
|
||||
mSimdIntPtrTy = mSimdInt64Ty;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file builder.h
|
||||
*
|
||||
* @brief Includes all the builder related functionality
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include "JitManager.h"
|
||||
#include "common/formats.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
struct Builder
|
||||
{
|
||||
Builder(JitManager *pJitMgr);
|
||||
IRBuilder<>* IRB() { return mpIRBuilder; };
|
||||
JitManager* JM() { return mpJitMgr; }
|
||||
|
||||
JitManager* mpJitMgr;
|
||||
IRBuilder<>* mpIRBuilder;
|
||||
|
||||
// Built in types.
|
||||
Type* mVoidTy;
|
||||
Type* mInt1Ty;
|
||||
Type* mInt8Ty;
|
||||
Type* mInt16Ty;
|
||||
Type* mInt32Ty;
|
||||
Type* mInt64Ty;
|
||||
Type* mIntPtrTy;
|
||||
Type* mFP16Ty;
|
||||
Type* mFP32Ty;
|
||||
Type* mDoubleTy;
|
||||
Type* mSimdFP16Ty;
|
||||
Type* mSimdFP32Ty;
|
||||
Type* mSimdInt16Ty;
|
||||
Type* mSimdInt32Ty;
|
||||
Type* mSimdInt64Ty;
|
||||
Type* mSimdIntPtrTy;
|
||||
StructType* mV4FP32Ty;
|
||||
StructType* mV4Int32Ty;
|
||||
|
||||
#include "builder_gen.h"
|
||||
#include "builder_x86.h"
|
||||
#include "builder_misc.h"
|
||||
#include "builder_math.h"
|
||||
|
||||
};
|
|
@ -0,0 +1,34 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file builder_math.h
|
||||
*
|
||||
* @brief math/alu builder functions
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
Value* VLOG2PS(Value* src);
|
||||
Value* VPOW24PS(Value* src);
|
||||
Value* VEXP2PS(Value* src);
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,149 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file builder_misc.h
|
||||
*
|
||||
* @brief miscellaneous builder functions
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
Constant *C(bool i);
|
||||
Constant *C(char i);
|
||||
Constant *C(uint8_t i);
|
||||
Constant *C(int i);
|
||||
Constant *C(int64_t i);
|
||||
Constant *C(uint16_t i);
|
||||
Constant *C(uint32_t i);
|
||||
Constant *C(float i);
|
||||
|
||||
template<typename Ty>
|
||||
Constant *C(const std::initializer_list<Ty> &constList)
|
||||
{
|
||||
std::vector<Constant*> vConsts;
|
||||
for(auto i : constList) {
|
||||
|
||||
vConsts.push_back(C((Ty)i));
|
||||
}
|
||||
return ConstantVector::get(vConsts);
|
||||
}
|
||||
|
||||
Constant *PRED(bool pred);
|
||||
Value *VIMMED1(int i);
|
||||
Value *VIMMED1(uint32_t i);
|
||||
Value *VIMMED1(float i);
|
||||
Value *VIMMED1(bool i);
|
||||
Value *VUNDEF(Type* t);
|
||||
Value *VUNDEF_F();
|
||||
Value *VUNDEF_I();
|
||||
Value *VUNDEF(Type* ty, uint32_t size);
|
||||
Value *VUNDEF_IPTR();
|
||||
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 6
|
||||
Value *VINSERT(Value *vec, Value *val, uint64_t index);
|
||||
#endif
|
||||
Value *VBROADCAST(Value *src);
|
||||
Value *VRCP(Value *va);
|
||||
Value *VPLANEPS(Value* vA, Value* vB, Value* vC, Value* &vX, Value* &vY);
|
||||
|
||||
uint32_t IMMED(Value* i);
|
||||
|
||||
Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
|
||||
Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
|
||||
CallInst *CALL(Value *Callee, const std::initializer_list<Value*> &args);
|
||||
|
||||
LoadInst *LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& name = "");
|
||||
LoadInst *LOADV(Value *BasePtr, const std::initializer_list<Value*> &offset, const llvm::Twine& name = "");
|
||||
StoreInst *STORE(Value *Val, Value *BasePtr, const std::initializer_list<uint32_t> &offset);
|
||||
StoreInst *STOREV(Value *Val, Value *BasePtr, const std::initializer_list<Value*> &offset);
|
||||
|
||||
Value *VCMPPS_EQ(Value* a, Value* b) { return VCMPPS(a, b, C((uint8_t)_CMP_EQ_OQ)); }
|
||||
Value *VCMPPS_LT(Value* a, Value* b) { return VCMPPS(a, b, C((uint8_t)_CMP_LT_OQ)); }
|
||||
Value *VCMPPS_LE(Value* a, Value* b) { return VCMPPS(a, b, C((uint8_t)_CMP_LE_OQ)); }
|
||||
Value *VCMPPS_ISNAN(Value* a, Value* b) { return VCMPPS(a, b, C((uint8_t)_CMP_UNORD_Q)); }
|
||||
Value *VCMPPS_NEQ(Value* a, Value* b) { return VCMPPS(a, b, C((uint8_t)_CMP_NEQ_OQ)); }
|
||||
Value *VCMPPS_GE(Value* a, Value* b) { return VCMPPS(a, b, C((uint8_t)_CMP_GE_OQ)); }
|
||||
Value *VCMPPS_GT(Value* a, Value* b) { return VCMPPS(a, b, C((uint8_t)_CMP_GT_OQ)); }
|
||||
Value *VCMPPS_NOTNAN(Value* a, Value* b){ return VCMPPS(a, b, C((uint8_t)_CMP_ORD_Q)); }
|
||||
|
||||
Value *MASK(Value* vmask);
|
||||
Value *VMASK(Value* mask);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief functions that build IR to call x86 intrinsics directly, or
|
||||
/// emulate them with other instructions if not available on the host
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
Value *MASKLOADD(Value* src, Value* mask);
|
||||
|
||||
void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* mask, Value* vGatherComponents[], bool bPackedOutput);
|
||||
|
||||
Value *GATHERPS(Value* src, Value* pBase, Value* indices, Value* mask, Value* scale);
|
||||
void GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* mask, Value* vGatherComponents[], bool bPackedOutput);
|
||||
|
||||
Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, Value* scale);
|
||||
void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* mask, Value* vGatherComponents[], bool bPackedOutput);
|
||||
|
||||
void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask);
|
||||
|
||||
void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput);
|
||||
void Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[], Value* vGatherOutput[], bool bPackedOutput);
|
||||
|
||||
Value *PSHUFB(Value* a, Value* b);
|
||||
Value *PMOVSXBD(Value* a);
|
||||
Value *PMOVSXWD(Value* a);
|
||||
Value *PERMD(Value* a, Value* idx);
|
||||
Value *CVTPH2PS(Value* a);
|
||||
Value *CVTPS2PH(Value* a, Value* rounding);
|
||||
Value *PMAXSD(Value* a, Value* b);
|
||||
Value *PMINSD(Value* a, Value* b);
|
||||
Value *VABSPS(Value* a);
|
||||
Value *FMADDPS(Value* a, Value* b, Value* c);
|
||||
|
||||
// LLVM removed VPCMPGTD x86 intrinsic. This emulates that behavior
|
||||
Value *VPCMPGTD(Value* a, Value* b)
|
||||
{
|
||||
Value* vIndexMask = ICMP_UGT(a,b);
|
||||
|
||||
// need to set the high bit for x86 intrinsic masks
|
||||
return S_EXT(vIndexMask,VectorType::get(mInt32Ty,JM()->mVWidth));
|
||||
}
|
||||
|
||||
Value *ICLAMP(Value* src, Value* low, Value* high);
|
||||
Value *FCLAMP(Value* src, Value* low, Value* high);
|
||||
Value *FCLAMP(Value* src, float low, float high);
|
||||
|
||||
CallInst *PRINT(const std::string &printStr);
|
||||
CallInst *PRINT(const std::string &printStr,const std::initializer_list<Value*> &printArgs);
|
||||
Value* STACKSAVE();
|
||||
void STACKRESTORE(Value* pSaved);
|
||||
|
||||
Value* POPCNT(Value* a);
|
||||
|
||||
Value* INT3() { return INTERRUPT(C((uint8_t)3)); }
|
||||
|
||||
|
||||
Value *VEXTRACTI128(Value* a, Constant* imm8);
|
||||
Value *VINSERTI128(Value* a, Value* b, Constant* imm8);
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,128 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file fetch_jit.h
|
||||
*
|
||||
* @brief Definition of the fetch jitter
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include "common/formats.h"
|
||||
#include "core/state.h"
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// INPUT_ELEMENT_DESC
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct INPUT_ELEMENT_DESC
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t AlignedByteOffset : 12;
|
||||
uint32_t Format : 10;
|
||||
uint32_t StreamIndex : 6;
|
||||
uint32_t InstanceEnable : 1;
|
||||
uint32_t ComponentControl0 : 3;
|
||||
uint32_t ComponentControl1 : 3;
|
||||
uint32_t ComponentControl2 : 3;
|
||||
uint32_t ComponentControl3 : 3;
|
||||
uint32_t ComponentPacking : 4;
|
||||
uint32_t _reserved : 19;
|
||||
};
|
||||
uint64_t bits;
|
||||
};
|
||||
uint32_t InstanceDataStepRate;
|
||||
};
|
||||
|
||||
// used to set ComponentPacking
|
||||
enum ComponentEnable
|
||||
{
|
||||
NONE = 0x0,
|
||||
X = 0x1,
|
||||
Y = 0x2,
|
||||
XY = 0x3,
|
||||
Z = 0x4,
|
||||
XZ = 0x5,
|
||||
YZ = 0x6,
|
||||
XYZ = 0x7,
|
||||
W = 0x8,
|
||||
XW = 0x9,
|
||||
YW = 0xA,
|
||||
XYW = 0xB,
|
||||
ZW = 0xC,
|
||||
XZW = 0xD,
|
||||
YZW = 0xE,
|
||||
XYZW = 0xF,
|
||||
};
|
||||
|
||||
enum ComponentControl
|
||||
{
|
||||
NoStore = 0,
|
||||
StoreSrc = 1,
|
||||
Store0 = 2,
|
||||
Store1Fp = 3,
|
||||
Store1Int = 4,
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// State required for fetch shader jit compile.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct FETCH_COMPILE_STATE
|
||||
{
|
||||
uint32_t numAttribs;
|
||||
INPUT_ELEMENT_DESC layout[KNOB_NUM_ATTRIBUTES];
|
||||
SWR_FORMAT indexType;
|
||||
uint32_t cutIndex{ 0xffffffff };
|
||||
|
||||
// Options that effect the JIT'd code
|
||||
bool bDisableVGATHER; // if enabled, FetchJit will generate loads/shuffles instead of VGATHERs
|
||||
bool bDisableIndexOOBCheck; // if enabled, FetchJit will exclude index OOB check
|
||||
bool bEnableCutIndex{ false }; // compares indices with the cut index and returns a cut mask
|
||||
|
||||
FETCH_COMPILE_STATE(bool useVGATHER = false, bool indexOOBCheck = false) :
|
||||
bDisableVGATHER(useVGATHER), bDisableIndexOOBCheck(indexOOBCheck){};
|
||||
|
||||
bool operator==(const FETCH_COMPILE_STATE &other) const
|
||||
{
|
||||
if (numAttribs != other.numAttribs) return false;
|
||||
if (indexType != other.indexType) return false;
|
||||
if (bDisableVGATHER != other.bDisableVGATHER) return false;
|
||||
if (bDisableIndexOOBCheck != other.bDisableIndexOOBCheck) return false;
|
||||
if (bEnableCutIndex != other.bEnableCutIndex) return false;
|
||||
if (cutIndex != other.cutIndex) return false;
|
||||
|
||||
for(uint32_t i = 0; i < numAttribs; ++i)
|
||||
{
|
||||
if((layout[i].bits != other.layout[i].bits) ||
|
||||
((layout[i].InstanceEnable == 1) &&
|
||||
(layout[i].InstanceDataStepRate != other.layout[i].InstanceDataStepRate))){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
|
@ -0,0 +1,108 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file jit_api.h
|
||||
*
|
||||
* @brief Platform independent JIT interface
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
#include "common/os.h"
|
||||
|
||||
#include "fetch_jit.h"
|
||||
#include "streamout_jit.h"
|
||||
#include "blend_jit.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define EXCEPTION_PRINT_STACK(ret) ret
|
||||
#endif // _WIN32
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define JITCALL __stdcall
|
||||
#else
|
||||
#define JITCALL
|
||||
#endif
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
struct ShaderInfo;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Jit Compile Info Input
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct JIT_COMPILE_INPUT
|
||||
{
|
||||
SWR_SHADER_TYPE type;
|
||||
|
||||
const void* pIR; ///< Pointer to LLVM IR text.
|
||||
|
||||
bool enableJitSampler;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Create JIT context.
|
||||
HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char* arch);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Destroy JIT context.
|
||||
void JITCALL JitDestroyContext(HANDLE hJitContext);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief JIT compile shader.
|
||||
/// @param hJitContext - Jit Context
|
||||
/// @param input - Input containing LLVM IR and other information
|
||||
/// @param output - Output containing information about JIT shader
|
||||
ShaderInfo* JITCALL JitCompileShader(
|
||||
HANDLE hJitContext,
|
||||
const JIT_COMPILE_INPUT& input);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief JIT destroy shader.
|
||||
/// @param hJitContext - Jit Context
|
||||
/// @param pShaderInfo - pointer to shader object.
|
||||
void JITCALL JitDestroyShader(
|
||||
HANDLE hJitContext,
|
||||
ShaderInfo*& pShaderInfo);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief JIT compiles fetch shader
|
||||
/// @param hJitContext - Jit Context
|
||||
/// @param state - Fetch state to build function from
|
||||
PFN_FETCH_FUNC JITCALL JitCompileFetch(HANDLE hJitContext, const FETCH_COMPILE_STATE& state);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief JIT compiles streamout shader
|
||||
/// @param hJitContext - Jit Context
|
||||
/// @param state - SO state to build function from
|
||||
PFN_SO_FUNC JITCALL JitCompileStreamout(HANDLE hJitContext, const STREAMOUT_COMPILE_STATE& state);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief JIT compiles blend shader
|
||||
/// @param hJitContext - Jit Context
|
||||
/// @param state - blend state to build function from
|
||||
PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitContext, const BLEND_COMPILE_STATE& state);
|
||||
|
||||
|
||||
}; // extern "C"
|
|
@ -0,0 +1,401 @@
|
|||
# Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
#!deps/python32/python.exe
|
||||
|
||||
import os, sys, re
|
||||
import argparse
|
||||
import json as JSON
|
||||
import operator
|
||||
|
||||
header = r"""/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file %s
|
||||
*
|
||||
* @brief auto-generated file
|
||||
*
|
||||
* DO NOT EDIT
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
"""
|
||||
|
||||
"""
|
||||
"""
|
||||
def gen_file_header(filename):
|
||||
global header
|
||||
headerStr = header % filename
|
||||
return headerStr.splitlines()
|
||||
|
||||
|
||||
inst_aliases = {
|
||||
'SHUFFLE_VECTOR': 'VSHUFFLE',
|
||||
'INSERT_ELEMENT': 'VINSERT',
|
||||
'EXTRACT_ELEMENT': 'VEXTRACT',
|
||||
'MEM_SET': 'MEMSET',
|
||||
'MEM_CPY': 'MEMCPY',
|
||||
'MEM_MOVE': 'MEMMOVE',
|
||||
'L_SHR': 'LSHR',
|
||||
'A_SHR': 'ASHR',
|
||||
'BIT_CAST': 'BITCAST',
|
||||
'U_DIV': 'UDIV',
|
||||
'S_DIV': 'SDIV',
|
||||
'U_REM': 'UREM',
|
||||
'S_REM': 'SREM',
|
||||
'BIN_OP': 'BINOP',
|
||||
}
|
||||
|
||||
intrinsics = [
|
||||
["VGATHERPS", "x86_avx2_gather_d_ps_256", ["src", "pBase", "indices", "mask", "scale"]],
|
||||
["VGATHERDD", "x86_avx2_gather_d_d_256", ["src", "pBase", "indices", "mask", "scale"]],
|
||||
["VSQRTPS", "x86_avx_sqrt_ps_256", ["a"]],
|
||||
["VRSQRTPS", "x86_avx_rsqrt_ps_256", ["a"]],
|
||||
["VRCPPS", "x86_avx_rcp_ps_256", ["a"]],
|
||||
["VMINPS", "x86_avx_min_ps_256", ["a", "b"]],
|
||||
["VMAXPS", "x86_avx_max_ps_256", ["a", "b"]],
|
||||
["VPMINSD", "x86_avx2_pmins_d", ["a", "b"]],
|
||||
["VPMAXSD", "x86_avx2_pmaxs_d", ["a", "b"]],
|
||||
["VROUND", "x86_avx_round_ps_256", ["a", "rounding"]],
|
||||
["VCMPPS", "x86_avx_cmp_ps_256", ["a", "b", "cmpop"]],
|
||||
["VBLENDVPS", "x86_avx_blendv_ps_256", ["a", "b", "mask"]],
|
||||
["BEXTR_32", "x86_bmi_bextr_32", ["src", "control"]],
|
||||
["VMASKLOADD", "x86_avx2_maskload_d_256", ["src", "mask"]],
|
||||
["VMASKMOVPS", "x86_avx_maskload_ps_256", ["src", "mask"]],
|
||||
["VPSHUFB", "x86_avx2_pshuf_b", ["a", "b"]],
|
||||
["VPMOVSXBD", "x86_avx2_pmovsxbd", ["a"]], # sign extend packed 8bit components
|
||||
["VPMOVSXWD", "x86_avx2_pmovsxwd", ["a"]], # sign extend packed 16bit components
|
||||
["VPERMD", "x86_avx2_permd", ["idx", "a"]],
|
||||
["VCVTPH2PS", "x86_vcvtph2ps_256", ["a"]],
|
||||
["VCVTPS2PH", "x86_vcvtps2ph_256", ["a", "round"]],
|
||||
["VHSUBPS", "x86_avx_hsub_ps_256", ["a", "b"]],
|
||||
["VPTESTC", "x86_avx_ptestc_256", ["a", "b"]],
|
||||
["VPTESTZ", "x86_avx_ptestz_256", ["a", "b"]],
|
||||
["VFMADDPS", "x86_fma_vfmadd_ps_256", ["a", "b", "c"]],
|
||||
["VCVTTPS2DQ", "x86_avx_cvtt_ps2dq_256", ["a"]],
|
||||
["VMOVMSKPS", "x86_avx_movmsk_ps_256", ["a"]],
|
||||
["INTERRUPT", "x86_int", ["a"]],
|
||||
]
|
||||
|
||||
def convert_uppercamel(name):
|
||||
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
||||
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).upper()
|
||||
|
||||
"""
|
||||
Given an input file (e.g. IRBuilder.h) generates function dictionary.
|
||||
"""
|
||||
def parse_ir_builder(input_file):
|
||||
|
||||
functions = []
|
||||
|
||||
lines = input_file.readlines()
|
||||
|
||||
idx = 0
|
||||
while idx < len(lines) - 1:
|
||||
line = lines[idx].rstrip()
|
||||
idx += 1
|
||||
|
||||
#match = re.search(r"\*Create", line)
|
||||
match = re.search(r"[\*\s]Create(\w*)\(", line)
|
||||
if match is not None:
|
||||
#print("Line: %s" % match.group(1))
|
||||
|
||||
if re.search(r"^\s*Create", line) is not None:
|
||||
func_sig = lines[idx-2].rstrip() + line
|
||||
else:
|
||||
func_sig = line
|
||||
|
||||
end_of_args = False
|
||||
while not end_of_args:
|
||||
end_paren = re.search(r"\)", line)
|
||||
if end_paren is not None:
|
||||
end_of_args = True
|
||||
else:
|
||||
line = lines[idx].rstrip()
|
||||
func_sig += line
|
||||
idx += 1
|
||||
|
||||
delfunc = re.search(r"LLVM_DELETED_FUNCTION|= delete;", func_sig)
|
||||
|
||||
if not delfunc:
|
||||
func = re.search(r"(.*?)\*[\n\s]*(Create\w*)\((.*?)\)", func_sig)
|
||||
if func is not None:
|
||||
|
||||
return_type = func.group(1).lstrip() + '*'
|
||||
func_name = func.group(2)
|
||||
arguments = func.group(3)
|
||||
|
||||
func_args = ''
|
||||
func_args_nodefs = ''
|
||||
|
||||
num_args = arguments.count(',')
|
||||
|
||||
arg_names = []
|
||||
num_args = 0
|
||||
args = arguments.split(',')
|
||||
for arg in args:
|
||||
arg = arg.lstrip()
|
||||
if arg:
|
||||
if num_args > 0:
|
||||
func_args += ', '
|
||||
func_args_nodefs += ', '
|
||||
func_args += arg
|
||||
func_args_nodefs += arg.split(' =')[0]
|
||||
|
||||
split_args = arg.split('=')
|
||||
arg_name = split_args[0].rsplit(None, 1)[-1]
|
||||
|
||||
#print("Before ArgName = %s" % arg_name)
|
||||
|
||||
reg_arg = re.search(r"[\&\*]*(\w*)", arg_name)
|
||||
if reg_arg:
|
||||
#print("Arg Name = %s" % reg_arg.group(1))
|
||||
arg_names += [reg_arg.group(1)]
|
||||
|
||||
num_args += 1
|
||||
|
||||
ignore = False
|
||||
|
||||
# The following functions need to be ignored.
|
||||
if func_name == 'CreateInsertNUWNSWBinOp':
|
||||
ignore = True
|
||||
|
||||
if func_name == 'CreateMaskedIntrinsic':
|
||||
ignore = True
|
||||
|
||||
# Convert CamelCase to CAMEL_CASE
|
||||
func_mod = re.search(r"Create(\w*)", func_name)
|
||||
if func_mod:
|
||||
func_mod = func_mod.group(1)
|
||||
func_mod = convert_uppercamel(func_mod)
|
||||
if func_mod[0:2] == 'F_' or func_mod[0:2] == 'I_':
|
||||
func_mod = func_mod[0] + func_mod[2:]
|
||||
|
||||
# Substitute alias based on CAMEL_CASE name.
|
||||
func_alias = inst_aliases.get(func_mod)
|
||||
if not func_alias:
|
||||
func_alias = func_mod
|
||||
|
||||
if func_name == 'CreateCall' or func_name == 'CreateGEP':
|
||||
arglist = re.search(r'ArrayRef', func_args)
|
||||
if arglist:
|
||||
func_alias = func_alias + 'A'
|
||||
|
||||
if not ignore:
|
||||
functions.append({
|
||||
"name": func_name,
|
||||
"alias": func_alias,
|
||||
"return": return_type,
|
||||
"args": func_args,
|
||||
"args_nodefs": func_args_nodefs,
|
||||
"arg_names": arg_names
|
||||
})
|
||||
|
||||
return functions
|
||||
|
||||
"""
|
||||
Auto-generates macros for LLVM IR
|
||||
"""
|
||||
def generate_gen_h(functions, output_file):
|
||||
output_lines = gen_file_header(os.path.basename(output_file.name))
|
||||
|
||||
output_lines += [
|
||||
'#pragma once',
|
||||
'',
|
||||
'//////////////////////////////////////////////////////////////////////////',
|
||||
'/// Auto-generated Builder IR declarations',
|
||||
'//////////////////////////////////////////////////////////////////////////',
|
||||
]
|
||||
|
||||
for func in functions:
|
||||
name = func['name']
|
||||
if func['alias']:
|
||||
name = func['alias']
|
||||
output_lines += [
|
||||
'%s%s(%s);' % (func['return'], name, func['args'])
|
||||
]
|
||||
|
||||
output_file.write('\n'.join(output_lines) + '\n')
|
||||
|
||||
"""
|
||||
Auto-generates macros for LLVM IR
|
||||
"""
|
||||
def generate_gen_cpp(functions, output_file):
|
||||
output_lines = gen_file_header(os.path.basename(output_file.name))
|
||||
|
||||
output_lines += [
|
||||
'#include \"builder.h\"',
|
||||
''
|
||||
]
|
||||
|
||||
for func in functions:
|
||||
name = func['name']
|
||||
if func['alias']:
|
||||
name = func['alias']
|
||||
|
||||
args = func['arg_names']
|
||||
func_args = ''
|
||||
first_arg = True
|
||||
for arg in args:
|
||||
if not first_arg:
|
||||
func_args += ', '
|
||||
func_args += arg
|
||||
first_arg = False
|
||||
|
||||
output_lines += [
|
||||
'//////////////////////////////////////////////////////////////////////////',
|
||||
'%sBuilder::%s(%s)' % (func['return'], name, func['args_nodefs']),
|
||||
'{',
|
||||
' return IRB()->%s(%s);' % (func['name'], func_args),
|
||||
'}',
|
||||
'',
|
||||
]
|
||||
|
||||
output_file.write('\n'.join(output_lines) + '\n')
|
||||
|
||||
"""
|
||||
Auto-generates macros for LLVM IR
|
||||
"""
|
||||
def generate_x86_h(output_file):
|
||||
output_lines = gen_file_header(os.path.basename(output_file.name))
|
||||
|
||||
output_lines += [
|
||||
'#pragma once',
|
||||
'',
|
||||
'//////////////////////////////////////////////////////////////////////////',
|
||||
'/// Auto-generated x86 intrinsics',
|
||||
'//////////////////////////////////////////////////////////////////////////',
|
||||
]
|
||||
|
||||
for inst in intrinsics:
|
||||
#print("Inst: %s, x86: %s numArgs: %d" % (inst[0], inst[1], len(inst[2])))
|
||||
|
||||
args = ''
|
||||
first = True
|
||||
for arg in inst[2]:
|
||||
if not first:
|
||||
args += ', '
|
||||
args += ("Value* %s" % arg)
|
||||
first = False
|
||||
|
||||
output_lines += [
|
||||
'Value *%s(%s);' % (inst[0], args)
|
||||
]
|
||||
|
||||
output_file.write('\n'.join(output_lines) + '\n')
|
||||
|
||||
"""
|
||||
Auto-generates macros for LLVM IR
|
||||
"""
|
||||
def generate_x86_cpp(output_file):
|
||||
output_lines = gen_file_header(os.path.basename(output_file.name))
|
||||
|
||||
output_lines += [
|
||||
'#include \"builder.h\"',
|
||||
''
|
||||
]
|
||||
|
||||
for inst in intrinsics:
|
||||
#print("Inst: %s, x86: %s numArgs: %d" % (inst[0], inst[1], len(inst[2])))
|
||||
|
||||
args = ''
|
||||
pass_args = ''
|
||||
first = True
|
||||
for arg in inst[2]:
|
||||
if not first:
|
||||
args += ', '
|
||||
pass_args += ', '
|
||||
args += ("Value* %s" % arg)
|
||||
pass_args += arg
|
||||
first = False
|
||||
|
||||
output_lines += [
|
||||
'//////////////////////////////////////////////////////////////////////////',
|
||||
'Value *Builder::%s(%s)' % (inst[0], args),
|
||||
'{',
|
||||
' Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::%s);' % inst[1],
|
||||
' return CALL(func, std::initializer_list<Value*>{%s});' % pass_args,
|
||||
'}',
|
||||
'',
|
||||
]
|
||||
|
||||
output_file.write('\n'.join(output_lines) + '\n')
|
||||
|
||||
"""
|
||||
Function which is invoked when this script is started from a command line.
|
||||
Will present and consume a set of arguments which will tell this script how
|
||||
to behave
|
||||
"""
|
||||
def main():
|
||||
|
||||
# Parse args...
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--input", "-i", type=argparse.FileType('r'), help="Path to IRBuilder.h", required=False)
|
||||
parser.add_argument("--output", "-o", type=argparse.FileType('w'), help="Path to output file", required=True)
|
||||
parser.add_argument("--gen_h", "-gen_h", help="Generate builder_gen.h", action="store_true", default=False)
|
||||
parser.add_argument("--gen_cpp", "-gen_cpp", help="Generate builder_gen.cpp", action="store_true", default=False)
|
||||
parser.add_argument("--gen_x86_h", "-gen_x86_h", help="Generate x86 intrinsics. No input is needed.", action="store_true", default=False)
|
||||
parser.add_argument("--gen_x86_cpp", "-gen_x86_cpp", help="Generate x86 intrinsics. No input is needed.", action="store_true", default=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.input:
|
||||
functions = parse_ir_builder(args.input)
|
||||
|
||||
if args.gen_h:
|
||||
generate_gen_h(functions, args.output)
|
||||
|
||||
if args.gen_cpp:
|
||||
generate_gen_cpp(functions, args.output)
|
||||
else:
|
||||
if args.gen_x86_h:
|
||||
generate_x86_h(args.output)
|
||||
|
||||
if args.gen_x86_cpp:
|
||||
generate_x86_cpp(args.output)
|
||||
|
||||
if args.gen_h:
|
||||
print("Need to specify --input for --gen_h!")
|
||||
|
||||
if args.gen_cpp:
|
||||
print("Need to specify --input for --gen_cpp!")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
# END OF FILE
|
|
@ -0,0 +1,341 @@
|
|||
# Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
#!deps/python32/python.exe
|
||||
|
||||
import os, sys, re
|
||||
import argparse
|
||||
import json as JSON
|
||||
import operator
|
||||
|
||||
header = r"""
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file %s
|
||||
*
|
||||
* @brief auto-generated file
|
||||
*
|
||||
* DO NOT EDIT
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
"""
|
||||
|
||||
"""
|
||||
"""
|
||||
def gen_file_header(filename):
|
||||
global header
|
||||
headerStr = header % filename
|
||||
return headerStr.splitlines()
|
||||
|
||||
"""
|
||||
"""
|
||||
def gen_llvm_type(type, name, postfix_name, is_pointer, is_pointer_pointer, is_array, is_array_array, array_count, array_count1, is_llvm_struct, is_llvm_enum, is_llvm_pfn, output_file):
|
||||
|
||||
llvm_type = ''
|
||||
|
||||
if is_llvm_struct:
|
||||
if is_pointer or is_pointer_pointer:
|
||||
llvm_type = 'Type::getInt32Ty(ctx)'
|
||||
else:
|
||||
llvm_type = 'ArrayType::get(Type::getInt8Ty(ctx), sizeof(%s))' % type
|
||||
elif is_llvm_enum:
|
||||
llvm_type = 'Type::getInt32Ty(ctx)'
|
||||
elif is_llvm_pfn:
|
||||
llvm_type = 'PointerType::get(Type::getInt8Ty(ctx), 0)'
|
||||
else:
|
||||
if type == "BYTE" or type == "char" or type == "uint8_t" or type == "int8_t" or type == 'bool':
|
||||
llvm_type = 'Type::getInt8Ty(ctx)'
|
||||
elif type == 'UINT64' or type == 'INT64' or type == 'uint64_t' or type == 'int64_t':
|
||||
llvm_type = 'Type::getInt64Ty(ctx)'
|
||||
elif type == 'UINT16' or type == 'int16_t' or type == 'uint16_t':
|
||||
llvm_type = 'Type::getInt16Ty(ctx)'
|
||||
elif type == 'UINT' or type == 'INT' or type == 'int' or type == 'BOOL' or type == 'uint32_t' or type == 'int32_t':
|
||||
llvm_type = 'Type::getInt32Ty(ctx)'
|
||||
elif type == 'float' or type == 'FLOAT':
|
||||
llvm_type = 'Type::getFloatTy(ctx)'
|
||||
elif type == 'double' or type == 'DOUBLE':
|
||||
llvm_type = 'Type::getDoubleTy(ctx)'
|
||||
elif type == 'void' or type == 'VOID':
|
||||
llvm_type = 'Type::getInt32Ty(ctx)'
|
||||
elif type == 'HANDLE':
|
||||
llvm_type = 'PointerType::get(Type::getInt32Ty(ctx), 0)'
|
||||
elif type == 'simdscalar':
|
||||
llvm_type = 'VectorType::get(Type::getFloatTy(ctx), pJitMgr->mVWidth)'
|
||||
elif type == 'simdscalari':
|
||||
llvm_type = 'VectorType::get(Type::getInt32Ty(ctx), pJitMgr->mVWidth)'
|
||||
elif type == 'simdvector':
|
||||
llvm_type = 'ArrayType::get(VectorType::get(Type::getFloatTy(ctx), pJitMgr->mVWidth), 4)'
|
||||
else:
|
||||
llvm_type = 'Gen_%s%s(pJitMgr)' % (type, postfix_name)
|
||||
|
||||
if is_pointer:
|
||||
llvm_type = 'PointerType::get(%s, 0)' % llvm_type
|
||||
|
||||
if is_pointer_pointer:
|
||||
llvm_type = 'PointerType::get(%s, 0)' % llvm_type
|
||||
|
||||
if is_array_array:
|
||||
llvm_type = 'ArrayType::get(ArrayType::get(%s, %s), %s)' % (llvm_type, array_count1, array_count)
|
||||
elif is_array:
|
||||
llvm_type = 'ArrayType::get(%s, %s)' % (llvm_type, array_count)
|
||||
|
||||
return [' members.push_back( %s ); // %s' % (llvm_type, name)]
|
||||
|
||||
"""
|
||||
"""
|
||||
def gen_llvm_types(input_file, output_file):
|
||||
|
||||
output_lines = gen_file_header(os.path.basename(output_file.name))
|
||||
|
||||
lines = input_file.readlines()
|
||||
|
||||
postfix_name = ""
|
||||
|
||||
for idx in range(len(lines)):
|
||||
line = lines[idx].rstrip()
|
||||
|
||||
match = re.match(r"(\s*)struct(\s*)(\w+)", line)
|
||||
if match:
|
||||
llvm_args = []
|
||||
|
||||
# Detect start of structure
|
||||
is_fwd_decl = re.search(r";", line)
|
||||
|
||||
if not is_fwd_decl:
|
||||
|
||||
# Extract the command name
|
||||
struct_name = match.group(3).strip()
|
||||
|
||||
output_lines += [
|
||||
'//////////////////////////////////////////////////////////////////////////',
|
||||
'/// Generate LLVM type information for %s' % struct_name,
|
||||
'INLINE static StructType *Gen_%s%s(JitManager* pJitMgr)' % (struct_name, postfix_name),
|
||||
'{',
|
||||
' LLVMContext& ctx = pJitMgr->mContext;',
|
||||
' std::vector<Type*> members;',
|
||||
'',
|
||||
]
|
||||
|
||||
end_of_struct = False
|
||||
|
||||
while not end_of_struct and idx < len(lines)-1:
|
||||
idx += 1
|
||||
line = lines[idx].rstrip()
|
||||
|
||||
is_llvm_typedef = re.search(r"@llvm_typedef", line)
|
||||
if is_llvm_typedef is not None:
|
||||
is_llvm_typedef = True
|
||||
else:
|
||||
is_llvm_typedef = False
|
||||
|
||||
###########################################
|
||||
# Is field a llvm struct? Tells script to treat type as array of bytes that is size of structure.
|
||||
is_llvm_struct = re.search(r"@llvm_struct", line)
|
||||
|
||||
if is_llvm_struct is not None:
|
||||
is_llvm_struct = True
|
||||
else:
|
||||
is_llvm_struct = False
|
||||
|
||||
###########################################
|
||||
# Is field a llvm enum? Tells script to treat type as an enum and replaced with uint32 type.
|
||||
is_llvm_enum = re.search(r"@llvm_enum", line)
|
||||
|
||||
if is_llvm_enum is not None:
|
||||
is_llvm_enum = True
|
||||
else:
|
||||
is_llvm_enum = False
|
||||
|
||||
###########################################
|
||||
# Is field a llvm function pointer? Tells script to treat type as an enum and replaced with uint32 type.
|
||||
is_llvm_pfn = re.search(r"@llvm_pfn", line)
|
||||
|
||||
if is_llvm_pfn is not None:
|
||||
is_llvm_pfn = True
|
||||
else:
|
||||
is_llvm_pfn = False
|
||||
|
||||
###########################################
|
||||
# Is field const?
|
||||
is_const = re.search(r"\s+const\s+", line)
|
||||
|
||||
if is_const is not None:
|
||||
is_const = True
|
||||
else:
|
||||
is_const = False
|
||||
|
||||
###########################################
|
||||
# Is field a pointer?
|
||||
is_pointer_pointer = re.search("\*\*", line)
|
||||
|
||||
if is_pointer_pointer is not None:
|
||||
is_pointer_pointer = True
|
||||
else:
|
||||
is_pointer_pointer = False
|
||||
|
||||
###########################################
|
||||
# Is field a pointer?
|
||||
is_pointer = re.search("\*", line)
|
||||
|
||||
if is_pointer is not None:
|
||||
is_pointer = True
|
||||
else:
|
||||
is_pointer = False
|
||||
|
||||
###########################################
|
||||
# Is field an array of arrays?
|
||||
# TODO: Can add this to a list.
|
||||
is_array_array = re.search("\[(\w*)\]\[(\w*)\]", line)
|
||||
array_count = '0'
|
||||
array_count1 = '0'
|
||||
|
||||
if is_array_array is not None:
|
||||
array_count = is_array_array.group(1)
|
||||
array_count1 = is_array_array.group(2)
|
||||
is_array_array = True
|
||||
else:
|
||||
is_array_array = False
|
||||
|
||||
###########################################
|
||||
# Is field an array?
|
||||
is_array = re.search("\[(\w*)\]", line)
|
||||
|
||||
if is_array is not None:
|
||||
array_count = is_array.group(1)
|
||||
is_array = True
|
||||
else:
|
||||
is_array = False
|
||||
|
||||
is_scoped = re.search("::", line)
|
||||
|
||||
if is_scoped is not None:
|
||||
is_scoped = True
|
||||
else:
|
||||
is_scoped = False
|
||||
|
||||
type = None
|
||||
name = None
|
||||
if is_const and is_pointer:
|
||||
|
||||
if is_scoped:
|
||||
field_match = re.match(r"(\s*)(\w+\<*\w*\>*)(\s+)(\w+::)(\w+)(\s*\**\s*)(\w+)", line)
|
||||
|
||||
type = "%s%s" % (field_match.group(4), field_match.group(5))
|
||||
name = field_match.group(7)
|
||||
else:
|
||||
field_match = re.match(r"(\s*)(\w+\<*\w*\>*)(\s+)(\w+)(\s*\**\s*)(\w+)", line)
|
||||
|
||||
type = field_match.group(4)
|
||||
name = field_match.group(6)
|
||||
|
||||
elif is_pointer:
|
||||
field_match = re.match(r"(\s*)(\s+)(\w+\<*\w*\>*)(\s*\**\s*)(\w+)", line)
|
||||
|
||||
if field_match:
|
||||
type = field_match.group(3)
|
||||
name = field_match.group(5)
|
||||
elif is_const:
|
||||
field_match = re.match(r"(\s*)(\w+\<*\w*\>*)(\s+)(\w+)(\s*)(\w+)", line)
|
||||
|
||||
if field_match:
|
||||
type = field_match.group(4)
|
||||
name = field_match.group(6)
|
||||
else:
|
||||
if is_scoped:
|
||||
field_match = re.match(r"\s*(\w+\<*\w*\>*)\s*::\s*(\w+\<*\w*\>*)\s+(\w+)", line)
|
||||
|
||||
if field_match:
|
||||
type = field_match.group(1) + '::' + field_match.group(2)
|
||||
name = field_match.group(3)
|
||||
else:
|
||||
field_match = re.match(r"(\s*)(\w+\<*\w*\>*)(\s+)(\w+)", line)
|
||||
|
||||
if field_match:
|
||||
type = field_match.group(2)
|
||||
name = field_match.group(4)
|
||||
|
||||
if is_llvm_typedef is False:
|
||||
if type is not None:
|
||||
output_lines += gen_llvm_type(type, name, postfix_name, is_pointer, is_pointer_pointer, is_array, is_array_array, array_count, array_count1, is_llvm_struct, is_llvm_enum, is_llvm_pfn, output_file)
|
||||
llvm_args.append(name)
|
||||
|
||||
# Detect end of structure
|
||||
end_of_struct = re.match(r"(\s*)};", line)
|
||||
|
||||
if (end_of_struct):
|
||||
output_lines += [
|
||||
'',
|
||||
' return StructType::get(ctx, members, false);',
|
||||
'}',
|
||||
'',
|
||||
]
|
||||
|
||||
for i in range(len(llvm_args)):
|
||||
output_lines.append('static const uint32_t %s%s_%s = %s;' % (struct_name, postfix_name, llvm_args[i], i))
|
||||
|
||||
output_lines.append('')
|
||||
|
||||
output_file.write('\n'.join(output_lines) + '\n')
|
||||
|
||||
"""
|
||||
Function which is invoked when this script is started from a command line.
|
||||
Will present and consume a set of arguments which will tell this script how
|
||||
to behave
|
||||
"""
|
||||
def main():
|
||||
|
||||
# Parse args...
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--input", "-i", type=argparse.FileType('r'),
|
||||
help="Path to input file containing structs", required=True)
|
||||
parser.add_argument("--output", "-o", type=argparse.FileType('w'),
|
||||
help="Path to output file", required=True)
|
||||
parser.add_argument("--scalar", "-scalar", help="Generates scalar files with all enums", action="store_true", default=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
gen_llvm_types(args.input, args.output)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
# END OF FILE
|
|
@ -0,0 +1,357 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file streamout_jit.cpp
|
||||
*
|
||||
* @brief Implementation of the streamout jitter
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#include "jit_api.h"
|
||||
#include "streamout_jit.h"
|
||||
#include "builder.h"
|
||||
#include "state_llvm.h"
|
||||
#include "common/containers.hpp"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <unordered_set>
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Interface to Jitting a fetch shader
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct StreamOutJit : public Builder
|
||||
{
|
||||
StreamOutJit(JitManager* pJitMgr) : Builder(pJitMgr){};
|
||||
|
||||
// returns pointer to SWR_STREAMOUT_BUFFER
|
||||
Value* getSOBuffer(Value* pSoCtx, uint32_t buffer)
|
||||
{
|
||||
return LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_pBuffer, buffer });
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// @brief checks if streamout buffer is oob
|
||||
// @return <i1> true/false
|
||||
Value* oob(const STREAMOUT_COMPILE_STATE& state, Value* pSoCtx, uint32_t buffer)
|
||||
{
|
||||
Value* returnMask = C(false);
|
||||
|
||||
Value* pBuf = getSOBuffer(pSoCtx, buffer);
|
||||
|
||||
// load enable
|
||||
// @todo bool data types should generate <i1> llvm type
|
||||
Value* enabled = TRUNC(LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_enable }), IRB()->getInt1Ty());
|
||||
|
||||
// load buffer size
|
||||
Value* bufferSize = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_bufferSize });
|
||||
|
||||
// load current streamOffset
|
||||
Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
|
||||
|
||||
// load buffer pitch
|
||||
Value* pitch = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pitch });
|
||||
|
||||
// buffer is considered oob if in use in a decl but not enabled
|
||||
returnMask = OR(returnMask, NOT(enabled));
|
||||
|
||||
// buffer is oob if cannot fit a prims worth of verts
|
||||
Value* newOffset = ADD(streamOffset, MUL(pitch, C(state.numVertsPerPrim)));
|
||||
returnMask = OR(returnMask, ICMP_SGT(newOffset, bufferSize));
|
||||
|
||||
return returnMask;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// @brief converts scalar bitmask to <4 x i32> suitable for shuffle vector,
|
||||
// packing the active mask bits
|
||||
// ex. bitmask 0011 -> (0, 1, 0, 0)
|
||||
// bitmask 1000 -> (3, 0, 0, 0)
|
||||
// bitmask 1100 -> (2, 3, 0, 0)
|
||||
Value* PackMask(uint32_t bitmask)
|
||||
{
|
||||
std::vector<Constant*> indices(4, C(0));
|
||||
DWORD index;
|
||||
uint32_t elem = 0;
|
||||
while (_BitScanForward(&index, bitmask))
|
||||
{
|
||||
indices[elem++] = C((int)index);
|
||||
bitmask &= ~(1 << index);
|
||||
}
|
||||
|
||||
return ConstantVector::get(indices);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// @brief convert scalar bitmask to <4xfloat> bitmask
|
||||
Value* ToMask(uint32_t bitmask)
|
||||
{
|
||||
std::vector<Constant*> indices;
|
||||
for (uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
if (bitmask & (1 << i))
|
||||
{
|
||||
indices.push_back(C(-1.0f));
|
||||
}
|
||||
else
|
||||
{
|
||||
indices.push_back(C(0.0f));
|
||||
}
|
||||
}
|
||||
return ConstantVector::get(indices);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// @brief processes a single decl from the streamout stream. Reads 4 components from the input
|
||||
// stream and writes N components to the output buffer given the componentMask or if
|
||||
// a hole, just increments the buffer pointer
|
||||
// @param pStream - pointer to current attribute
|
||||
// @param pOutBuffers - pointers to the current location of each output buffer
|
||||
// @param decl - input decl
|
||||
void buildDecl(Value* pStream, Value* pOutBuffers[4], const STREAMOUT_DECL& decl)
|
||||
{
|
||||
// @todo add this to x86 macros
|
||||
Function* maskStore = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx_maskstore_ps);
|
||||
|
||||
uint32_t numComponents = _mm_popcnt_u32(decl.componentMask);
|
||||
uint32_t packedMask = (1 << numComponents) - 1;
|
||||
if (!decl.hole)
|
||||
{
|
||||
// increment stream pointer to correct slot
|
||||
Value* pAttrib = GEP(pStream, C(4 * decl.attribSlot));
|
||||
|
||||
// load 4 components from stream
|
||||
Type* simd4Ty = VectorType::get(IRB()->getFloatTy(), 4);
|
||||
Type* simd4PtrTy = PointerType::get(simd4Ty, 0);
|
||||
pAttrib = BITCAST(pAttrib, simd4PtrTy);
|
||||
Value *vattrib = LOAD(pAttrib);
|
||||
|
||||
// shuffle/pack enabled components
|
||||
Value* vpackedAttrib = VSHUFFLE(vattrib, vattrib, PackMask(decl.componentMask));
|
||||
|
||||
// store to output buffer
|
||||
// cast SO buffer to i8*, needed by maskstore
|
||||
Value* pOut = BITCAST(pOutBuffers[decl.bufferIndex], PointerType::get(mInt8Ty, 0));
|
||||
|
||||
// cast input to <4xfloat>
|
||||
Value* src = BITCAST(vpackedAttrib, simd4Ty);
|
||||
CALL(maskStore, {pOut, ToMask(packedMask), src});
|
||||
}
|
||||
|
||||
// increment SO buffer
|
||||
pOutBuffers[decl.bufferIndex] = GEP(pOutBuffers[decl.bufferIndex], C(numComponents));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// @brief builds a single vertex worth of data for the given stream
|
||||
// @param streamState - state for this stream
|
||||
// @param pCurVertex - pointer to src stream vertex data
|
||||
// @param pOutBuffer - pointers to up to 4 SO buffers
|
||||
void buildVertex(const STREAMOUT_STREAM& streamState, Value* pCurVertex, Value* pOutBuffer[4])
|
||||
{
|
||||
for (uint32_t d = 0; d < streamState.numDecls; ++d)
|
||||
{
|
||||
const STREAMOUT_DECL& decl = streamState.decl[d];
|
||||
buildDecl(pCurVertex, pOutBuffer, decl);
|
||||
}
|
||||
}
|
||||
|
||||
void buildStream(const STREAMOUT_COMPILE_STATE& state, const STREAMOUT_STREAM& streamState, Value* pSoCtx, BasicBlock* returnBB, Function* soFunc)
|
||||
{
|
||||
// get list of active SO buffers
|
||||
std::unordered_set<uint32_t> activeSOBuffers;
|
||||
for (uint32_t d = 0; d < streamState.numDecls; ++d)
|
||||
{
|
||||
const STREAMOUT_DECL& decl = streamState.decl[d];
|
||||
activeSOBuffers.insert(decl.bufferIndex);
|
||||
}
|
||||
|
||||
// always increment numPrimStorageNeeded
|
||||
Value *numPrimStorageNeeded = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded });
|
||||
numPrimStorageNeeded = ADD(numPrimStorageNeeded, C(1));
|
||||
STORE(numPrimStorageNeeded, pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded });
|
||||
|
||||
// check OOB on active SO buffers. If any buffer is out of bound, don't write
|
||||
// the primitive to any buffer
|
||||
Value* oobMask = C(false);
|
||||
for (uint32_t buffer : activeSOBuffers)
|
||||
{
|
||||
oobMask = OR(oobMask, oob(state, pSoCtx, buffer));
|
||||
}
|
||||
|
||||
BasicBlock* validBB = BasicBlock::Create(JM()->mContext, "valid", soFunc);
|
||||
|
||||
// early out if OOB
|
||||
COND_BR(oobMask, returnBB, validBB);
|
||||
|
||||
IRB()->SetInsertPoint(validBB);
|
||||
|
||||
Value* numPrimsWritten = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimsWritten });
|
||||
numPrimsWritten = ADD(numPrimsWritten, C(1));
|
||||
STORE(numPrimsWritten, pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimsWritten });
|
||||
|
||||
// compute start pointer for each output buffer
|
||||
Value* pOutBuffer[4];
|
||||
Value* pOutBufferStartVertex[4];
|
||||
Value* outBufferPitch[4];
|
||||
for (uint32_t b: activeSOBuffers)
|
||||
{
|
||||
Value* pBuf = getSOBuffer(pSoCtx, b);
|
||||
Value* pData = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pBuffer });
|
||||
Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
|
||||
pOutBuffer[b] = GEP(pData, streamOffset);
|
||||
pOutBufferStartVertex[b] = pOutBuffer[b];
|
||||
|
||||
outBufferPitch[b] = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pitch });
|
||||
}
|
||||
|
||||
// loop over the vertices of the prim
|
||||
Value* pStreamData = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_pPrimData });
|
||||
for (uint32_t v = 0; v < state.numVertsPerPrim; ++v)
|
||||
{
|
||||
buildVertex(streamState, pStreamData, pOutBuffer);
|
||||
|
||||
// increment stream and output buffer pointers
|
||||
// stream verts are always 32*4 dwords apart
|
||||
pStreamData = GEP(pStreamData, C(KNOB_NUM_ATTRIBUTES * 4));
|
||||
|
||||
// output buffers offset using pitch in buffer state
|
||||
for (uint32_t b : activeSOBuffers)
|
||||
{
|
||||
pOutBufferStartVertex[b] = GEP(pOutBufferStartVertex[b], outBufferPitch[b]);
|
||||
pOutBuffer[b] = pOutBufferStartVertex[b];
|
||||
}
|
||||
}
|
||||
|
||||
// update each active buffer's streamOffset
|
||||
for (uint32_t b : activeSOBuffers)
|
||||
{
|
||||
Value* pBuf = getSOBuffer(pSoCtx, b);
|
||||
Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
|
||||
streamOffset = ADD(streamOffset, MUL(C(state.numVertsPerPrim), outBufferPitch[b]));
|
||||
STORE(streamOffset, pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
|
||||
}
|
||||
}
|
||||
|
||||
Function* Create(const STREAMOUT_COMPILE_STATE& state)
|
||||
{
|
||||
static std::size_t soNum = 0;
|
||||
|
||||
std::stringstream fnName("SOShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
|
||||
fnName << soNum++;
|
||||
|
||||
// SO function signature
|
||||
// typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT*)
|
||||
|
||||
std::vector<Type*> args{
|
||||
PointerType::get(Gen_SWR_STREAMOUT_CONTEXT(JM()), 0), // SWR_STREAMOUT_CONTEXT*
|
||||
};
|
||||
|
||||
FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
|
||||
Function* soFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
|
||||
|
||||
// create return basic block
|
||||
BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", soFunc);
|
||||
BasicBlock* returnBB = BasicBlock::Create(JM()->mContext, "return", soFunc);
|
||||
|
||||
IRB()->SetInsertPoint(entry);
|
||||
|
||||
// arguments
|
||||
auto argitr = soFunc->getArgumentList().begin();
|
||||
Value* pSoCtx = &*argitr++;
|
||||
pSoCtx->setName("pSoCtx");
|
||||
|
||||
const STREAMOUT_STREAM& streamState = state.stream;
|
||||
buildStream(state, streamState, pSoCtx, returnBB, soFunc);
|
||||
|
||||
BR(returnBB);
|
||||
|
||||
IRB()->SetInsertPoint(returnBB);
|
||||
RET_VOID();
|
||||
|
||||
JitManager::DumpToFile(soFunc, "SoFunc");
|
||||
|
||||
FunctionPassManager passes(JM()->mpCurrentModule);
|
||||
passes.add(createBreakCriticalEdgesPass());
|
||||
passes.add(createCFGSimplificationPass());
|
||||
passes.add(createEarlyCSEPass());
|
||||
passes.add(createPromoteMemoryToRegisterPass());
|
||||
passes.add(createCFGSimplificationPass());
|
||||
passes.add(createEarlyCSEPass());
|
||||
passes.add(createInstructionCombiningPass());
|
||||
passes.add(createInstructionSimplifierPass());
|
||||
passes.add(createConstantPropagationPass());
|
||||
passes.add(createSCCPPass());
|
||||
passes.add(createAggressiveDCEPass());
|
||||
|
||||
passes.run(*soFunc);
|
||||
|
||||
JitManager::DumpToFile(soFunc, "SoFunc_optimized");
|
||||
|
||||
return soFunc;
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief JITs from streamout shader IR
|
||||
/// @param hJitMgr - JitManager handle
|
||||
/// @param func - LLVM function IR
|
||||
/// @return PFN_SO_FUNC - pointer to SOS function
|
||||
PFN_SO_FUNC JitStreamoutFunc(HANDLE hJitMgr, const HANDLE hFunc)
|
||||
{
|
||||
const llvm::Function *func = (const llvm::Function*)hFunc;
|
||||
JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
|
||||
PFN_SO_FUNC pfnStreamOut;
|
||||
pfnStreamOut = (PFN_SO_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
|
||||
// MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
|
||||
pJitMgr->mIsModuleFinalized = true;
|
||||
|
||||
return pfnStreamOut;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief JIT compiles streamout shader
|
||||
/// @param hJitMgr - JitManager handle
|
||||
/// @param state - SO state to build function from
|
||||
extern "C" PFN_SO_FUNC JITCALL JitCompileStreamout(HANDLE hJitMgr, const STREAMOUT_COMPILE_STATE& state)
|
||||
{
|
||||
JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
|
||||
|
||||
STREAMOUT_COMPILE_STATE soState = state;
|
||||
if (soState.offsetAttribs)
|
||||
{
|
||||
for (uint32_t i = 0; i < soState.stream.numDecls; ++i)
|
||||
{
|
||||
soState.stream.decl[i].attribSlot -= soState.offsetAttribs;
|
||||
}
|
||||
}
|
||||
|
||||
pJitMgr->SetupNewModule();
|
||||
|
||||
StreamOutJit theJit(pJitMgr);
|
||||
HANDLE hFunc = theJit.Create(soState);
|
||||
|
||||
return JitStreamoutFunc(hJitMgr, hFunc);
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file streamout_jit.h
|
||||
*
|
||||
* @brief Definition of the streamout jitter
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include "common/formats.h"
|
||||
#include "core/state.h"
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// STREAMOUT_DECL - Stream decl
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct STREAMOUT_DECL
|
||||
{
|
||||
// Buffer that stream maps to.
|
||||
DWORD bufferIndex;
|
||||
|
||||
// attribute to stream
|
||||
uint32_t attribSlot;
|
||||
|
||||
// attribute component mask
|
||||
uint32_t componentMask;
|
||||
|
||||
// indicates this decl is a hole
|
||||
bool hole;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// STREAMOUT_STREAM - Stream decls
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct STREAMOUT_STREAM
|
||||
{
|
||||
// numnber of decls for this stream
|
||||
uint32_t numDecls;
|
||||
|
||||
// array of numDecls decls
|
||||
STREAMOUT_DECL decl[128];
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// State required for streamout jit
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct STREAMOUT_COMPILE_STATE
|
||||
{
|
||||
// number of verts per primitive
|
||||
uint32_t numVertsPerPrim;
|
||||
uint32_t offsetAttribs; ///< attrib offset to subtract from all STREAMOUT_DECL::attribSlot values.
|
||||
|
||||
uint64_t streamMask;
|
||||
|
||||
// stream decls
|
||||
STREAMOUT_STREAM stream;
|
||||
|
||||
bool operator==(const STREAMOUT_COMPILE_STATE &other) const
|
||||
{
|
||||
if (numVertsPerPrim != other.numVertsPerPrim) return false;
|
||||
if (stream.numDecls != other.stream.numDecls) return false;
|
||||
|
||||
for (uint32_t i = 0; i < stream.numDecls; ++i)
|
||||
{
|
||||
if (stream.decl[i].bufferIndex != other.stream.decl[i].bufferIndex) return false;
|
||||
if (stream.decl[i].attribSlot != other.stream.decl[i].attribSlot) return false;
|
||||
if (stream.decl[i].componentMask != other.stream.decl[i].componentMask) return false;
|
||||
if (stream.decl[i].hole != other.stream.decl[i].hole) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
|
@ -0,0 +1,287 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file ClearTile.cpp
|
||||
*
|
||||
* @brief Functionality for ClearTile. StoreHotTileClear clears a single macro
|
||||
* tile in the destination.
|
||||
*
|
||||
******************************************************************************/
|
||||
#include "common/os.h"
|
||||
#include "core/context.h"
|
||||
#include "common/formats.h"
|
||||
#include "memory/TilingFunctions.h"
|
||||
#include "memory/tilingtraits.h"
|
||||
#include "memory/Convert.h"
|
||||
|
||||
typedef void(*PFN_STORE_TILES_CLEAR)(const FLOAT*, SWR_SURFACE_STATE*, UINT, UINT);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Clear Raster Tile Function Tables.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
static PFN_STORE_TILES_CLEAR sStoreTilesClearColorTable[NUM_SWR_FORMATS];
|
||||
|
||||
static PFN_STORE_TILES_CLEAR sStoreTilesClearDepthTable[NUM_SWR_FORMATS];
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// StoreRasterTileClear
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
|
||||
struct StoreRasterTileClear
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Stores an 8x8 raster tile to the destination surface.
|
||||
/// @param pColor - Pointer to clear color.
|
||||
/// @param pDstSurface - Destination surface state
|
||||
/// @param x, y - Coordinates to raster tile.
|
||||
INLINE static void StoreClear(
|
||||
const BYTE* dstFormattedColor,
|
||||
UINT dstBytesPerPixel,
|
||||
SWR_SURFACE_STATE* pDstSurface,
|
||||
UINT x, UINT y) // (x, y) pixel coordinate to start of raster tile.
|
||||
{
|
||||
// Compute destination address for raster tile.
|
||||
BYTE* pDstTile = (BYTE*)pDstSurface->pBaseAddress +
|
||||
(y * pDstSurface->pitch) + (x * dstBytesPerPixel);
|
||||
|
||||
// start of first row
|
||||
BYTE* pDst = pDstTile;
|
||||
UINT dstBytesPerRow = 0;
|
||||
|
||||
// For each raster tile pixel in row 0 (rx, 0)
|
||||
for (UINT rx = 0; (rx < KNOB_TILE_X_DIM) && ((x + rx) < pDstSurface->width); ++rx)
|
||||
{
|
||||
memcpy(pDst, dstFormattedColor, dstBytesPerPixel);
|
||||
|
||||
// Increment pointer to next pixel in row.
|
||||
pDst += dstBytesPerPixel;
|
||||
dstBytesPerRow += dstBytesPerPixel;
|
||||
}
|
||||
|
||||
// start of second row
|
||||
pDst = pDstTile + pDstSurface->pitch;
|
||||
|
||||
// For each remaining row in the rest of the raster tile
|
||||
for (UINT ry = 1; (ry < KNOB_TILE_Y_DIM) && ((y + ry) < pDstSurface->height); ++ry)
|
||||
{
|
||||
// copy row
|
||||
memcpy(pDst, pDstTile, dstBytesPerRow);
|
||||
|
||||
// Increment pointer to first pixel in next row.
|
||||
pDst += pDstSurface->pitch;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// StoreMacroTileClear - Stores a macro tile clear to its raster tiles.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
|
||||
struct StoreMacroTileClear
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Stores a macrotile to the destination surface.
|
||||
/// @param pColor - Pointer to color to write to pixels.
|
||||
/// @param pDstSurface - Destination surface state
|
||||
/// @param x, y - Coordinates to macro tile
|
||||
static void StoreClear(
|
||||
const FLOAT *pColor,
|
||||
SWR_SURFACE_STATE* pDstSurface,
|
||||
UINT x, UINT y)
|
||||
{
|
||||
UINT dstBytesPerPixel = (FormatTraits<DstFormat>::bpp / 8);
|
||||
|
||||
BYTE dstFormattedColor[16]; // max bpp is 128, so 16 is all we need here for one pixel
|
||||
|
||||
FLOAT srcColor[4];
|
||||
|
||||
for (UINT comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
|
||||
{
|
||||
srcColor[comp] = pColor[FormatTraits<DstFormat>::swizzle(comp)];
|
||||
}
|
||||
|
||||
// using this helper function, but the Tiling Traits is unused inside it so just using a dummy value
|
||||
ConvertPixelFromFloat<DstFormat>(dstFormattedColor, srcColor);
|
||||
|
||||
// Store each raster tile from the hot tile to the destination surface.
|
||||
// TODO: Put in check for partial coverage on x/y -- SWR_ASSERT if it happens.
|
||||
// Intent is for this function to only handle full tiles.
|
||||
for (UINT row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
|
||||
{
|
||||
for (UINT col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
|
||||
{
|
||||
StoreRasterTileClear<SrcFormat, DstFormat>::StoreClear(dstFormattedColor, dstBytesPerPixel, pDstSurface, (x + col), (y + row));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Writes clear color to every pixel of a render surface
|
||||
/// @param hPrivateContext - Handle to private DC
|
||||
/// @param renderTargetIndex - Index to destination render target
|
||||
/// @param x, y - Coordinates to raster tile.
|
||||
/// @param pClearColor - Pointer to clear color
|
||||
void StoreHotTileClear(
|
||||
SWR_SURFACE_STATE *pDstSurface,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
UINT x,
|
||||
UINT y,
|
||||
const float* pClearColor)
|
||||
{
|
||||
PFN_STORE_TILES_CLEAR pfnStoreTilesClear = NULL;
|
||||
|
||||
SWR_ASSERT(renderTargetIndex != SWR_ATTACHMENT_STENCIL); ///@todo Not supported yet.
|
||||
|
||||
if (renderTargetIndex != SWR_ATTACHMENT_DEPTH)
|
||||
{
|
||||
pfnStoreTilesClear = sStoreTilesClearColorTable[pDstSurface->format];
|
||||
}
|
||||
else
|
||||
{
|
||||
pfnStoreTilesClear = sStoreTilesClearDepthTable[pDstSurface->format];
|
||||
}
|
||||
|
||||
SWR_ASSERT(pfnStoreTilesClear != NULL);
|
||||
|
||||
// Store a macro tile.
|
||||
/// @todo Once all formats are supported then if check can go away. This is to help us near term to make progress.
|
||||
if (pfnStoreTilesClear != NULL)
|
||||
{
|
||||
pfnStoreTilesClear(pClearColor, pDstSurface, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// INIT_STORE_TILES_TABLE - Helper macro for setting up the tables.
|
||||
#define INIT_STORE_TILES_CLEAR_COLOR_TABLE() \
|
||||
memset(sStoreTilesClearColorTable, 0, sizeof(sStoreTilesClearColorTable)); \
|
||||
\
|
||||
sStoreTilesClearColorTable[R32G32B32A32_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32A32_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R32G32B32A32_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32A32_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R32G32B32A32_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32A32_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R32G32B32X32_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32X32_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R32G32B32_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R32G32B32_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R32G32B32_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16B16A16_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16A16_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16B16A16_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16A16_SNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16B16A16_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16A16_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16B16A16_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16A16_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16B16A16_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16A16_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R32G32_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R32G32_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R32G32_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16B16X16_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16X16_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16B16X16_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16X16_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B8G8R8A8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B8G8R8A8_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B8G8R8A8_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B8G8R8A8_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R10G10B10A2_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R10G10B10A2_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R10G10B10A2_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, R10G10B10A2_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R10G10B10A2_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R10G10B10A2_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8B8A8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8A8_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8B8A8_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8A8_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8B8A8_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8A8_SNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8B8A8_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8A8_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8B8A8_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8A8_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16_SNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B10G10R10A2_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B10G10R10A2_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B10G10R10A2_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B10G10R10A2_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R11G11B10_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R11G11B10_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R32_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R32_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R32_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[A32_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, A32_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B8G8R8X8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B8G8R8X8_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B8G8R8X8_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B8G8R8X8_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8B8X8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8X8_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8B8X8_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8X8_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B10G10R10X2_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B10G10R10X2_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B5G6R5_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B5G6R5_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B5G6R5_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B5G6R5_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B5G5R5A1_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B5G5R5A1_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B5G5R5A1_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B5G5R5A1_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B4G4R4A4_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B4G4R4A4_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B4G4R4A4_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B4G4R4A4_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8_SNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16_SNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[A16_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, A16_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[A16_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, A16_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B5G5R5X1_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B5G5R5X1_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B5G5R5X1_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B5G5R5X1_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8_SNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[A8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, A8_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[BC1_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC1_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[BC2_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC2_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[BC3_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC3_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[BC4_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC4_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[BC5_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC5_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[BC1_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC1_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[BC2_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC2_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[BC3_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC3_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8B8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8B8_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8_SNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[BC4_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC4_SNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[BC5_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC5_SNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16B16_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16B16_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16_UNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16B16_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16_SNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8B8_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8_UNORM_SRGB>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16B16_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R16G16B16_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R10G10B10A2_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R10G10B10A2_SNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R10G10B10A2_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R10G10B10A2_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B10G10R10A2_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B10G10R10A2_SNORM>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B10G10R10A2_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, B10G10R10A2_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[B10G10R10A2_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, B10G10R10A2_SINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8B8_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8_UINT>::StoreClear; \
|
||||
sStoreTilesClearColorTable[R8G8B8_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8_SINT>::StoreClear; \
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// INIT_STORE_TILES_TABLE - Helper macro for setting up the tables.
|
||||
#define INIT_STORE_TILES_CLEAR_DEPTH_TABLE() \
|
||||
memset(sStoreTilesClearDepthTable, 0, sizeof(sStoreTilesClearDepthTable)); \
|
||||
\
|
||||
sStoreTilesClearDepthTable[R32_FLOAT] = StoreMacroTileClear<R32_FLOAT, R32_FLOAT>::StoreClear; \
|
||||
sStoreTilesClearDepthTable[R24_UNORM_X8_TYPELESS] = StoreMacroTileClear<R32_FLOAT, R24_UNORM_X8_TYPELESS>::StoreClear; \
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Sets up tables for ClearTile
|
||||
void InitSimClearTilesTable()
|
||||
{
|
||||
INIT_STORE_TILES_CLEAR_COLOR_TABLE();
|
||||
INIT_STORE_TILES_CLEAR_DEPTH_TABLE();
|
||||
}
|
|
@ -0,0 +1,698 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file Convert.h
|
||||
*
|
||||
* @brief Conversion utility functions
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#if defined(_WIN32)
|
||||
// disable "potential divide by 0"
|
||||
#pragma warning(disable: 4723)
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Convert an IEEE 754 16-bit float to an 32-bit single precision
|
||||
/// float
|
||||
/// @param val - 16-bit float
|
||||
/// @todo Maybe move this outside of this file into a header?
|
||||
static float ConvertSmallFloatTo32(UINT val)
|
||||
{
|
||||
UINT result;
|
||||
if ((val & 0x7fff) == 0)
|
||||
{
|
||||
result = ((uint32_t)(val & 0x8000)) << 16;
|
||||
}
|
||||
else if ((val & 0x7c00) == 0x7c00)
|
||||
{
|
||||
result = ((val & 0x3ff) == 0) ? 0x7f800000 : 0x7fc00000;
|
||||
result |= ((uint32_t)val & 0x8000) << 16;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t sign = (val & 0x8000) << 16;
|
||||
uint32_t mant = (val & 0x3ff) << 13;
|
||||
uint32_t exp = (val >> 10) & 0x1f;
|
||||
if ((exp == 0) && (mant != 0)) // Adjust exponent and mantissa for denormals
|
||||
{
|
||||
mant <<= 1;
|
||||
while (mant < (0x400 << 13))
|
||||
{
|
||||
exp--;
|
||||
mant <<= 1;
|
||||
}
|
||||
mant &= (0x3ff << 13);
|
||||
}
|
||||
exp = ((exp - 15 + 127) & 0xff) << 23;
|
||||
result = sign | exp | mant;
|
||||
}
|
||||
|
||||
return *(float*)&result;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Convert an IEEE 754 32-bit single precision float to an
|
||||
/// unsigned small float with 5 exponent bits and a variable
|
||||
/// number of mantissa bits.
|
||||
/// @param val - 32-bit float
|
||||
/// @todo Maybe move this outside of this file into a header?
|
||||
template<UINT numMantissaBits>
|
||||
static UINT Convert32ToSmallFloat(float val)
|
||||
{
|
||||
uint32_t sign, exp, mant;
|
||||
uint32_t roundBits;
|
||||
|
||||
// Extract the sign, exponent, and mantissa
|
||||
UINT uf = *(UINT*)&val;
|
||||
|
||||
sign = (uf & 0x80000000) >> 31;
|
||||
exp = (uf & 0x7F800000) >> 23;
|
||||
mant = uf & 0x007FFFFF;
|
||||
|
||||
// 10/11 bit floats are unsigned. Negative values are clamped to 0.
|
||||
if (sign != 0)
|
||||
{
|
||||
exp = mant = 0;
|
||||
}
|
||||
// Check for out of range
|
||||
else if ((exp == 0xFF) && (mant != 0)) // NaN
|
||||
{
|
||||
exp = 0x1F;
|
||||
mant = 1 << numMantissaBits;
|
||||
}
|
||||
else if ((exp == 0xFF) && (mant == 0)) // INF
|
||||
{
|
||||
exp = 0x1F;
|
||||
mant = 0;
|
||||
}
|
||||
else if (exp > (0x70 + 0x1E)) // Too big to represent
|
||||
{
|
||||
exp = 0x1Eu;
|
||||
mant = (1 << numMantissaBits) - 1; // 0x3F for 6 bit mantissa.
|
||||
}
|
||||
else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm
|
||||
{
|
||||
mant |= 0x00800000;
|
||||
for (; exp <= 0x70; mant >>= 1, exp++)
|
||||
;
|
||||
exp = 0;
|
||||
mant = mant >> (23 - numMantissaBits);
|
||||
}
|
||||
else if (exp < 0x66) // Too small to represent -> Zero
|
||||
{
|
||||
exp = 0;
|
||||
mant = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Saves bits that will be shifted off for rounding
|
||||
roundBits = mant & 0x1FFFu;
|
||||
// convert exponent and mantissa to 16 bit format
|
||||
exp = exp - 0x70u;
|
||||
mant = mant >> (23 - numMantissaBits);
|
||||
|
||||
// Essentially RTZ, but round up if off by only 1 lsb
|
||||
if (roundBits == 0x1FFFu)
|
||||
{
|
||||
mant++;
|
||||
// check for overflow
|
||||
if ((mant & (0x3 << numMantissaBits)) != 0) // 0x60 = 0x3 << (num Mantissa Bits)
|
||||
exp++;
|
||||
// make sure only the needed bits are used
|
||||
mant &= (1 << numMantissaBits) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
UINT tmpVal = (exp << numMantissaBits) | mant;
|
||||
return tmpVal;
|
||||
}
|
||||
|
||||
#if KNOB_ARCH == KNOB_ARCH_AVX
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Convert an IEEE 754 32-bit single precision float to an
|
||||
/// 16 bit float with 5 exponent bits and a variable
|
||||
/// number of mantissa bits.
|
||||
/// @param val - 32-bit float
|
||||
/// @todo Maybe move this outside of this file into a header?
|
||||
static uint16_t Convert32To16Float(float val)
|
||||
{
|
||||
uint32_t sign, exp, mant;
|
||||
uint32_t roundBits;
|
||||
|
||||
// Extract the sign, exponent, and mantissa
|
||||
uint32_t uf = *(uint32_t*)&val;
|
||||
sign = (uf & 0x80000000) >> 31;
|
||||
exp = (uf & 0x7F800000) >> 23;
|
||||
mant = uf & 0x007FFFFF;
|
||||
|
||||
// Check for out of range
|
||||
if (std::isnan(val))
|
||||
{
|
||||
exp = 0x1F;
|
||||
mant = 0x200;
|
||||
sign = 1; // set the sign bit for NANs
|
||||
}
|
||||
else if (std::isinf(val))
|
||||
{
|
||||
exp = 0x1f;
|
||||
mant = 0x0;
|
||||
}
|
||||
else if (exp > (0x70 + 0x1E)) // Too big to represent -> max representable value
|
||||
{
|
||||
exp = 0x1E;
|
||||
mant = 0x3FF;
|
||||
}
|
||||
else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm
|
||||
{
|
||||
mant |= 0x00800000;
|
||||
for (; exp <= 0x70; mant >>= 1, exp++)
|
||||
;
|
||||
exp = 0;
|
||||
mant = mant >> 13;
|
||||
}
|
||||
else if (exp < 0x66) // Too small to represent -> Zero
|
||||
{
|
||||
exp = 0;
|
||||
mant = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Saves bits that will be shifted off for rounding
|
||||
roundBits = mant & 0x1FFFu;
|
||||
// convert exponent and mantissa to 16 bit format
|
||||
exp = exp - 0x70;
|
||||
mant = mant >> 13;
|
||||
|
||||
// Essentially RTZ, but round up if off by only 1 lsb
|
||||
if (roundBits == 0x1FFFu)
|
||||
{
|
||||
mant++;
|
||||
// check for overflow
|
||||
if ((mant & 0xC00u) != 0)
|
||||
exp++;
|
||||
// make sure only the needed bits are used
|
||||
mant &= 0x3FF;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t tmpVal = (sign << 15) | (exp << 10) | mant;
|
||||
return (uint16_t)tmpVal;
|
||||
}
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Retrieve color from hot tile source which is always float.
|
||||
/// @param pDstPixel - Pointer to destination pixel.
|
||||
/// @param srcPixel - Pointer to source pixel (pre-swizzled according to dest).
|
||||
template<SWR_FORMAT DstFormat>
|
||||
static void ConvertPixelFromFloat(
|
||||
BYTE* pDstPixel,
|
||||
const float srcPixel[4])
|
||||
{
|
||||
UINT outColor[4]; // typeless bits
|
||||
|
||||
// Store component
|
||||
for (UINT comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
|
||||
{
|
||||
SWR_TYPE type = FormatTraits<DstFormat>::GetType(comp);
|
||||
|
||||
float src = srcPixel[comp];
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case SWR_TYPE_UNORM:
|
||||
{
|
||||
// Force NaN to 0. IEEE standard, comparisons involving NaN always evaluate to false.
|
||||
src = (src != src) ? 0.0f : src;
|
||||
|
||||
// Clamp [0, 1]
|
||||
src = std::max(src, 0.0f);
|
||||
src = std::min(src, 1.0f);
|
||||
|
||||
// SRGB
|
||||
if (FormatTraits<DstFormat>::isSRGB && comp != 3)
|
||||
{
|
||||
src = (src <= 0.0031308f) ? (12.92f * src) : (1.055f * powf(src, (1.0f / 2.4f)) - 0.055f);
|
||||
}
|
||||
|
||||
// Float scale to integer scale.
|
||||
UINT scale = (1 << FormatTraits<DstFormat>::GetBPC(comp)) - 1;
|
||||
src = (float)scale * src;
|
||||
src = roundf(src);
|
||||
outColor[comp] = (UINT)src; // Drop fractional part.
|
||||
break;
|
||||
}
|
||||
case SWR_TYPE_SNORM:
|
||||
{
|
||||
SWR_ASSERT(!FormatTraits<DstFormat>::isSRGB);
|
||||
|
||||
// Force NaN to 0. IEEE standard, comparisons involving NaN always evaluate to false.
|
||||
src = (src != src) ? 0.0f : src;
|
||||
|
||||
// Clamp [-1, 1]
|
||||
src = std::max(src, -1.0f);
|
||||
src = std::min(src, 1.0f);
|
||||
|
||||
// Float scale to integer scale.
|
||||
UINT scale = (1 << (FormatTraits<DstFormat>::GetBPC(comp) - 1)) - 1;
|
||||
src = (float)scale * src;
|
||||
|
||||
// Round
|
||||
src += (src >= 0) ? 0.5f : -0.5f;
|
||||
|
||||
INT out = (INT)src;
|
||||
|
||||
outColor[comp] = *(UINT*)&out;
|
||||
|
||||
break;
|
||||
}
|
||||
case SWR_TYPE_UINT:
|
||||
{
|
||||
///@note The *(UINT*)& is currently necessary as the hot tile appears to always be float.
|
||||
// However, the number in the hot tile should be unsigned integer. So doing this
|
||||
// to preserve bits intead of doing a float -> integer conversion.
|
||||
if (FormatTraits<DstFormat>::GetBPC(comp) == 32)
|
||||
{
|
||||
outColor[comp] = *(UINT*)&src;
|
||||
}
|
||||
else
|
||||
{
|
||||
outColor[comp] = *(UINT*)&src;
|
||||
UINT max = (1 << FormatTraits<DstFormat>::GetBPC(comp)) - 1; // 2^numBits - 1
|
||||
|
||||
outColor[comp] = std::min(max, outColor[comp]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SWR_TYPE_SINT:
|
||||
{
|
||||
if (FormatTraits<DstFormat>::GetBPC(comp) == 32)
|
||||
{
|
||||
outColor[comp] = *(UINT*)&src;
|
||||
}
|
||||
else
|
||||
{
|
||||
INT out = *(INT*)&src; // Hot tile format is SINT?
|
||||
INT max = (1 << (FormatTraits<DstFormat>::GetBPC(comp) - 1)) - 1;
|
||||
INT min = -1 - max;
|
||||
|
||||
///@note The output is unsigned integer (bag of bits) and so performing
|
||||
// the clamping here based on range of output component. Also, manually adding
|
||||
// the sign bit in the appropriate spot. Maybe a better way?
|
||||
out = std::max(out, min);
|
||||
out = std::min(out, max);
|
||||
|
||||
outColor[comp] = *(UINT*)&out;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SWR_TYPE_FLOAT:
|
||||
{
|
||||
if (FormatTraits<DstFormat>::GetBPC(comp) == 16)
|
||||
{
|
||||
// Convert from 32-bit float to 16-bit float using _mm_cvtps_ph
|
||||
// @todo 16bit float instruction support is orthogonal to avx support. need to
|
||||
// add check for F16C support instead.
|
||||
#if KNOB_ARCH == KNOB_ARCH_AVX2
|
||||
__m128 src128 = _mm_set1_ps(src);
|
||||
__m128i srci128 = _mm_cvtps_ph(src128, _MM_FROUND_TRUNC);
|
||||
UINT value = _mm_extract_epi16(srci128, 0);
|
||||
#else
|
||||
UINT value = Convert32To16Float(src);
|
||||
#endif
|
||||
|
||||
outColor[comp] = value;
|
||||
}
|
||||
else if (FormatTraits<DstFormat>::GetBPC(comp) == 11)
|
||||
{
|
||||
outColor[comp] = Convert32ToSmallFloat<6>(src);
|
||||
}
|
||||
else if (FormatTraits<DstFormat>::GetBPC(comp) == 10)
|
||||
{
|
||||
outColor[comp] = Convert32ToSmallFloat<5>(src);
|
||||
}
|
||||
else
|
||||
{
|
||||
outColor[comp] = *(UINT*)&src;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
SWR_ASSERT(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
typename FormatTraits<DstFormat>::FormatT* pPixel = (typename FormatTraits<DstFormat>::FormatT*)pDstPixel;
|
||||
|
||||
switch (FormatTraits<DstFormat>::numComps)
|
||||
{
|
||||
case 4:
|
||||
pPixel->a = outColor[3];
|
||||
case 3:
|
||||
pPixel->b = outColor[2];
|
||||
case 2:
|
||||
pPixel->g = outColor[1];
|
||||
case 1:
|
||||
pPixel->r = outColor[0];
|
||||
break;
|
||||
default:
|
||||
SWR_ASSERT(0);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Convert pixel in any format to float32
|
||||
/// @param pDstPixel - Pointer to destination pixel.
|
||||
/// @param srcPixel - Pointer to source pixel
|
||||
template<SWR_FORMAT SrcFormat>
|
||||
INLINE static void ConvertPixelToFloat(
|
||||
float dstPixel[4],
|
||||
const BYTE* pSrc)
|
||||
{
|
||||
UINT srcColor[4]; // typeless bits
|
||||
|
||||
// unpack src pixel
|
||||
typename FormatTraits<SrcFormat>::FormatT* pPixel = (typename FormatTraits<SrcFormat>::FormatT*)pSrc;
|
||||
|
||||
// apply format defaults
|
||||
for (uint32_t comp = 0; comp < 4; ++comp)
|
||||
{
|
||||
uint32_t def = FormatTraits<SrcFormat>::GetDefault(comp);
|
||||
dstPixel[comp] = *(float*)&def;
|
||||
}
|
||||
|
||||
// load format data
|
||||
switch (FormatTraits<SrcFormat>::numComps)
|
||||
{
|
||||
case 4:
|
||||
srcColor[3] = pPixel->a;
|
||||
case 3:
|
||||
srcColor[2] = pPixel->b;
|
||||
case 2:
|
||||
srcColor[1] = pPixel->g;
|
||||
case 1:
|
||||
srcColor[0] = pPixel->r;
|
||||
break;
|
||||
default:
|
||||
SWR_ASSERT(0);
|
||||
}
|
||||
|
||||
// Convert components
|
||||
for (UINT comp = 0; comp < FormatTraits<SrcFormat>::numComps; ++comp)
|
||||
{
|
||||
SWR_TYPE type = FormatTraits<SrcFormat>::GetType(comp);
|
||||
|
||||
UINT src = srcColor[comp];
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case SWR_TYPE_UNORM:
|
||||
{
|
||||
float dst;
|
||||
if (FormatTraits<SrcFormat>::isSRGB && comp != 3)
|
||||
{
|
||||
dst = *(float*)&srgb8Table[src];
|
||||
}
|
||||
else
|
||||
{
|
||||
// component sizes > 16 must use fp divide to maintain ulp requirements
|
||||
if (FormatTraits<SrcFormat>::GetBPC(comp) > 16)
|
||||
{
|
||||
dst = (float)src / (float)((1 << FormatTraits<SrcFormat>::GetBPC(comp)) - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
const float scale = (1.0f / (float)((1 << FormatTraits<SrcFormat>::GetBPC(comp)) - 1));
|
||||
dst = (float)src * scale;
|
||||
}
|
||||
}
|
||||
dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = dst;
|
||||
break;
|
||||
}
|
||||
case SWR_TYPE_SNORM:
|
||||
{
|
||||
SWR_ASSERT(!FormatTraits<SrcFormat>::isSRGB);
|
||||
|
||||
float dst;
|
||||
if (src == 0x10)
|
||||
{
|
||||
dst = -1.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (FormatTraits<SrcFormat>::GetBPC(comp))
|
||||
{
|
||||
case 8:
|
||||
dst = (float)((int8_t)src);
|
||||
break;
|
||||
case 16:
|
||||
dst = (float)((int16_t)src);
|
||||
break;
|
||||
case 32:
|
||||
dst = (float)((int32_t)src);
|
||||
break;
|
||||
default:
|
||||
assert(0 && "attempted to load from SNORM with unsupported bpc");
|
||||
dst = 0.0f;
|
||||
break;
|
||||
}
|
||||
dst = dst * (1.0f / ((1 << (FormatTraits<SrcFormat>::GetBPC(comp) - 1)) - 1));
|
||||
}
|
||||
dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = dst;
|
||||
break;
|
||||
}
|
||||
case SWR_TYPE_UINT:
|
||||
{
|
||||
UINT dst = (UINT)src;
|
||||
dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = *(float*)&dst;
|
||||
break;
|
||||
}
|
||||
case SWR_TYPE_SINT:
|
||||
{
|
||||
int dst;
|
||||
switch (FormatTraits<SrcFormat>::GetBPC(comp))
|
||||
{
|
||||
case 8:
|
||||
dst = (int8_t)src;
|
||||
break;
|
||||
case 16:
|
||||
dst = (int16_t)src;
|
||||
break;
|
||||
case 32:
|
||||
dst = (int32_t)src;
|
||||
break;
|
||||
default:
|
||||
assert(0 && "attempted to load from SINT with unsupported bpc");
|
||||
dst = 0;
|
||||
break;
|
||||
}
|
||||
dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = *(float*)&dst;
|
||||
break;
|
||||
}
|
||||
case SWR_TYPE_FLOAT:
|
||||
{
|
||||
float dst;
|
||||
if (FormatTraits<SrcFormat>::GetBPC(comp) == 16)
|
||||
{
|
||||
#if KNOB_ARCH == KNOB_ARCH_AVX2
|
||||
// Convert from 16-bit float to 32-bit float using _mm_cvtph_ps
|
||||
// @todo 16bit float instruction support is orthogonal to avx support. need to
|
||||
// add check for F16C support instead.
|
||||
__m128i src128 = _mm_set1_epi32(src);
|
||||
__m128 res = _mm_cvtph_ps(src128);
|
||||
_mm_store_ss(&dst, res);
|
||||
#else
|
||||
dst = ConvertSmallFloatTo32(src);
|
||||
#endif
|
||||
}
|
||||
else if (FormatTraits<SrcFormat>::GetBPC(comp) == 11)
|
||||
{
|
||||
dst = ConvertSmallFloatTo32(src << 4);
|
||||
}
|
||||
else if (FormatTraits<SrcFormat>::GetBPC(comp) == 10)
|
||||
{
|
||||
dst = ConvertSmallFloatTo32(src << 5);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst = *(float*)&src;
|
||||
}
|
||||
|
||||
dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = *(float*)&dst;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
SWR_ASSERT(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// non-templated version of conversion functions
|
||||
INLINE static void ConvertPixelFromFloat(
|
||||
SWR_FORMAT format,
|
||||
uint8_t* pDst,
|
||||
const float srcPixel[4])
|
||||
{
|
||||
switch (format)
|
||||
{
|
||||
case R32G32B32A32_FLOAT: ConvertPixelFromFloat<R32G32B32A32_FLOAT>(pDst, srcPixel); break;
|
||||
case R32G32B32A32_SINT: ConvertPixelFromFloat<R32G32B32A32_SINT>(pDst, srcPixel); break;
|
||||
case R32G32B32A32_UINT: ConvertPixelFromFloat<R32G32B32A32_UINT>(pDst, srcPixel); break;
|
||||
case R32G32B32X32_FLOAT: ConvertPixelFromFloat<R32G32B32X32_FLOAT>(pDst, srcPixel); break;
|
||||
case R32G32B32A32_SSCALED: ConvertPixelFromFloat<R32G32B32A32_SSCALED>(pDst, srcPixel); break;
|
||||
case R32G32B32A32_USCALED: ConvertPixelFromFloat<R32G32B32A32_USCALED>(pDst, srcPixel); break;
|
||||
case R32G32B32_FLOAT: ConvertPixelFromFloat<R32G32B32_FLOAT>(pDst, srcPixel); break;
|
||||
case R32G32B32_SINT: ConvertPixelFromFloat<R32G32B32_SINT>(pDst, srcPixel); break;
|
||||
case R32G32B32_UINT: ConvertPixelFromFloat<R32G32B32_UINT>(pDst, srcPixel); break;
|
||||
case R32G32B32_SSCALED: ConvertPixelFromFloat<R32G32B32_SSCALED>(pDst, srcPixel); break;
|
||||
case R32G32B32_USCALED: ConvertPixelFromFloat<R32G32B32_USCALED>(pDst, srcPixel); break;
|
||||
case R16G16B16A16_UNORM: ConvertPixelFromFloat<R16G16B16A16_UNORM>(pDst, srcPixel); break;
|
||||
case R16G16B16A16_SNORM: ConvertPixelFromFloat<R16G16B16A16_SNORM>(pDst, srcPixel); break;
|
||||
case R16G16B16A16_SINT: ConvertPixelFromFloat<R16G16B16A16_SINT>(pDst, srcPixel); break;
|
||||
case R16G16B16A16_UINT: ConvertPixelFromFloat<R16G16B16A16_UINT>(pDst, srcPixel); break;
|
||||
case R16G16B16A16_FLOAT: ConvertPixelFromFloat<R16G16B16A16_FLOAT>(pDst, srcPixel); break;
|
||||
case R32G32_FLOAT: ConvertPixelFromFloat<R32G32_FLOAT>(pDst, srcPixel); break;
|
||||
case R32G32_SINT: ConvertPixelFromFloat<R32G32_SINT>(pDst, srcPixel); break;
|
||||
case R32G32_UINT: ConvertPixelFromFloat<R32G32_UINT>(pDst, srcPixel); break;
|
||||
case R32_FLOAT_X8X24_TYPELESS: ConvertPixelFromFloat<R32_FLOAT_X8X24_TYPELESS>(pDst, srcPixel); break;
|
||||
case R16G16B16X16_UNORM: ConvertPixelFromFloat<R16G16B16X16_UNORM>(pDst, srcPixel); break;
|
||||
case R16G16B16X16_FLOAT: ConvertPixelFromFloat<R16G16B16X16_FLOAT>(pDst, srcPixel); break;
|
||||
case R16G16B16A16_SSCALED: ConvertPixelFromFloat<R16G16B16A16_SSCALED>(pDst, srcPixel); break;
|
||||
case R16G16B16A16_USCALED: ConvertPixelFromFloat<R16G16B16A16_USCALED>(pDst, srcPixel); break;
|
||||
case R32G32_SSCALED: ConvertPixelFromFloat<R32G32_SSCALED>(pDst, srcPixel); break;
|
||||
case R32G32_USCALED: ConvertPixelFromFloat<R32G32_USCALED>(pDst, srcPixel); break;
|
||||
case R32_FLOAT_X8X24_TYPELESS_LD: ConvertPixelFromFloat<R32_FLOAT_X8X24_TYPELESS_LD>(pDst, srcPixel); break;
|
||||
case B8G8R8A8_UNORM: ConvertPixelFromFloat<B8G8R8A8_UNORM>(pDst, srcPixel); break;
|
||||
case B8G8R8A8_UNORM_SRGB: ConvertPixelFromFloat<B8G8R8A8_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case R10G10B10A2_UNORM: ConvertPixelFromFloat<R10G10B10A2_UNORM>(pDst, srcPixel); break;
|
||||
case R10G10B10A2_UNORM_SRGB: ConvertPixelFromFloat<R10G10B10A2_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case R10G10B10A2_UINT: ConvertPixelFromFloat<R10G10B10A2_UINT>(pDst, srcPixel); break;
|
||||
case R8G8B8A8_UNORM: ConvertPixelFromFloat<R8G8B8A8_UNORM>(pDst, srcPixel); break;
|
||||
case R8G8B8A8_UNORM_SRGB: ConvertPixelFromFloat<R8G8B8A8_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case R8G8B8A8_SNORM: ConvertPixelFromFloat<R8G8B8A8_SNORM>(pDst, srcPixel); break;
|
||||
case R8G8B8A8_SINT: ConvertPixelFromFloat<R8G8B8A8_SINT>(pDst, srcPixel); break;
|
||||
case R8G8B8A8_UINT: ConvertPixelFromFloat<R8G8B8A8_UINT>(pDst, srcPixel); break;
|
||||
case R16G16_UNORM: ConvertPixelFromFloat<R16G16_UNORM>(pDst, srcPixel); break;
|
||||
case R16G16_SNORM: ConvertPixelFromFloat<R16G16_SNORM>(pDst, srcPixel); break;
|
||||
case R16G16_SINT: ConvertPixelFromFloat<R16G16_SINT>(pDst, srcPixel); break;
|
||||
case R16G16_UINT: ConvertPixelFromFloat<R16G16_UINT>(pDst, srcPixel); break;
|
||||
case R16G16_FLOAT: ConvertPixelFromFloat<R16G16_FLOAT>(pDst, srcPixel); break;
|
||||
case B10G10R10A2_UNORM: ConvertPixelFromFloat<B10G10R10A2_UNORM>(pDst, srcPixel); break;
|
||||
case B10G10R10A2_UNORM_SRGB: ConvertPixelFromFloat<B10G10R10A2_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case R11G11B10_FLOAT: ConvertPixelFromFloat<R11G11B10_FLOAT>(pDst, srcPixel); break;
|
||||
case R32_SINT: ConvertPixelFromFloat<R32_SINT>(pDst, srcPixel); break;
|
||||
case R32_UINT: ConvertPixelFromFloat<R32_UINT>(pDst, srcPixel); break;
|
||||
case R32_FLOAT: ConvertPixelFromFloat<R32_FLOAT>(pDst, srcPixel); break;
|
||||
case R24_UNORM_X8_TYPELESS: ConvertPixelFromFloat<R24_UNORM_X8_TYPELESS>(pDst, srcPixel); break;
|
||||
case R24_UNORM_X8_TYPELESS_LD: ConvertPixelFromFloat<R24_UNORM_X8_TYPELESS_LD>(pDst, srcPixel); break;
|
||||
case A32_FLOAT: ConvertPixelFromFloat<A32_FLOAT>(pDst, srcPixel); break;
|
||||
case B8G8R8X8_UNORM: ConvertPixelFromFloat<B8G8R8X8_UNORM>(pDst, srcPixel); break;
|
||||
case B8G8R8X8_UNORM_SRGB: ConvertPixelFromFloat<B8G8R8X8_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case R8G8B8X8_UNORM: ConvertPixelFromFloat<R8G8B8X8_UNORM>(pDst, srcPixel); break;
|
||||
case R8G8B8X8_UNORM_SRGB: ConvertPixelFromFloat<R8G8B8X8_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case R9G9B9E5_SHAREDEXP: ConvertPixelFromFloat<R9G9B9E5_SHAREDEXP>(pDst, srcPixel); break;
|
||||
case B10G10R10X2_UNORM: ConvertPixelFromFloat<B10G10R10X2_UNORM>(pDst, srcPixel); break;
|
||||
case R10G10B10X2_USCALED: ConvertPixelFromFloat<R10G10B10X2_USCALED>(pDst, srcPixel); break;
|
||||
case R8G8B8A8_SSCALED: ConvertPixelFromFloat<R8G8B8A8_SSCALED>(pDst, srcPixel); break;
|
||||
case R8G8B8A8_USCALED: ConvertPixelFromFloat<R8G8B8A8_USCALED>(pDst, srcPixel); break;
|
||||
case R16G16_SSCALED: ConvertPixelFromFloat<R16G16_SSCALED>(pDst, srcPixel); break;
|
||||
case R16G16_USCALED: ConvertPixelFromFloat<R16G16_USCALED>(pDst, srcPixel); break;
|
||||
case R32_SSCALED: ConvertPixelFromFloat<R32_SSCALED>(pDst, srcPixel); break;
|
||||
case R32_USCALED: ConvertPixelFromFloat<R32_USCALED>(pDst, srcPixel); break;
|
||||
case B5G6R5_UNORM: ConvertPixelFromFloat<B5G6R5_UNORM>(pDst, srcPixel); break;
|
||||
case B5G6R5_UNORM_SRGB: ConvertPixelFromFloat<B5G6R5_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case B5G5R5A1_UNORM: ConvertPixelFromFloat<B5G5R5A1_UNORM>(pDst, srcPixel); break;
|
||||
case B5G5R5A1_UNORM_SRGB: ConvertPixelFromFloat<B5G5R5A1_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case B4G4R4A4_UNORM: ConvertPixelFromFloat<B4G4R4A4_UNORM>(pDst, srcPixel); break;
|
||||
case B4G4R4A4_UNORM_SRGB: ConvertPixelFromFloat<B4G4R4A4_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case R8G8_UNORM: ConvertPixelFromFloat<R8G8_UNORM>(pDst, srcPixel); break;
|
||||
case R8G8_SNORM: ConvertPixelFromFloat<R8G8_SNORM>(pDst, srcPixel); break;
|
||||
case R8G8_SINT: ConvertPixelFromFloat<R8G8_SINT>(pDst, srcPixel); break;
|
||||
case R8G8_UINT: ConvertPixelFromFloat<R8G8_UINT>(pDst, srcPixel); break;
|
||||
case R16_UNORM: ConvertPixelFromFloat<R16_UNORM>(pDst, srcPixel); break;
|
||||
case R16_SNORM: ConvertPixelFromFloat<R16_SNORM>(pDst, srcPixel); break;
|
||||
case R16_SINT: ConvertPixelFromFloat<R16_SINT>(pDst, srcPixel); break;
|
||||
case R16_UINT: ConvertPixelFromFloat<R16_UINT>(pDst, srcPixel); break;
|
||||
case R16_FLOAT: ConvertPixelFromFloat<R16_FLOAT>(pDst, srcPixel); break;
|
||||
case A16_UNORM: ConvertPixelFromFloat<A16_UNORM>(pDst, srcPixel); break;
|
||||
case A16_FLOAT: ConvertPixelFromFloat<A16_FLOAT>(pDst, srcPixel); break;
|
||||
case B5G5R5X1_UNORM: ConvertPixelFromFloat<B5G5R5X1_UNORM>(pDst, srcPixel); break;
|
||||
case B5G5R5X1_UNORM_SRGB: ConvertPixelFromFloat<B5G5R5X1_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case R8G8_SSCALED: ConvertPixelFromFloat<R8G8_SSCALED>(pDst, srcPixel); break;
|
||||
case R8G8_USCALED: ConvertPixelFromFloat<R8G8_USCALED>(pDst, srcPixel); break;
|
||||
case R16_SSCALED: ConvertPixelFromFloat<R16_SSCALED>(pDst, srcPixel); break;
|
||||
case R16_USCALED: ConvertPixelFromFloat<R16_USCALED>(pDst, srcPixel); break;
|
||||
case R8_UNORM: ConvertPixelFromFloat<R8_UNORM>(pDst, srcPixel); break;
|
||||
case R8_SNORM: ConvertPixelFromFloat<R8_SNORM>(pDst, srcPixel); break;
|
||||
case R8_SINT: ConvertPixelFromFloat<R8_SINT>(pDst, srcPixel); break;
|
||||
case R8_UINT: ConvertPixelFromFloat<R8_UINT>(pDst, srcPixel); break;
|
||||
case A8_UNORM: ConvertPixelFromFloat<A8_UNORM>(pDst, srcPixel); break;
|
||||
case R8_SSCALED: ConvertPixelFromFloat<R8_SSCALED>(pDst, srcPixel); break;
|
||||
case R8_USCALED: ConvertPixelFromFloat<R8_USCALED>(pDst, srcPixel); break;
|
||||
case YCRCB_SWAPUVY: ConvertPixelFromFloat<YCRCB_SWAPUVY>(pDst, srcPixel); break;
|
||||
case BC1_UNORM: ConvertPixelFromFloat<BC1_UNORM>(pDst, srcPixel); break;
|
||||
case BC2_UNORM: ConvertPixelFromFloat<BC2_UNORM>(pDst, srcPixel); break;
|
||||
case BC3_UNORM: ConvertPixelFromFloat<BC3_UNORM>(pDst, srcPixel); break;
|
||||
case BC4_UNORM: ConvertPixelFromFloat<BC4_UNORM>(pDst, srcPixel); break;
|
||||
case BC5_UNORM: ConvertPixelFromFloat<BC5_UNORM>(pDst, srcPixel); break;
|
||||
case BC1_UNORM_SRGB: ConvertPixelFromFloat<BC1_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case BC2_UNORM_SRGB: ConvertPixelFromFloat<BC2_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case BC3_UNORM_SRGB: ConvertPixelFromFloat<BC3_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case YCRCB_SWAPUV: ConvertPixelFromFloat<YCRCB_SWAPUV>(pDst, srcPixel); break;
|
||||
case R8G8B8_UNORM: ConvertPixelFromFloat<R8G8B8_UNORM>(pDst, srcPixel); break;
|
||||
case R8G8B8_SNORM: ConvertPixelFromFloat<R8G8B8_SNORM>(pDst, srcPixel); break;
|
||||
case R8G8B8_SSCALED: ConvertPixelFromFloat<R8G8B8_SSCALED>(pDst, srcPixel); break;
|
||||
case R8G8B8_USCALED: ConvertPixelFromFloat<R8G8B8_USCALED>(pDst, srcPixel); break;
|
||||
case BC4_SNORM: ConvertPixelFromFloat<BC4_SNORM>(pDst, srcPixel); break;
|
||||
case BC5_SNORM: ConvertPixelFromFloat<BC5_SNORM>(pDst, srcPixel); break;
|
||||
case R16G16B16_FLOAT: ConvertPixelFromFloat<R16G16B16_FLOAT>(pDst, srcPixel); break;
|
||||
case R16G16B16_UNORM: ConvertPixelFromFloat<R16G16B16_UNORM>(pDst, srcPixel); break;
|
||||
case R16G16B16_SNORM: ConvertPixelFromFloat<R16G16B16_SNORM>(pDst, srcPixel); break;
|
||||
case R16G16B16_SSCALED: ConvertPixelFromFloat<R16G16B16_SSCALED>(pDst, srcPixel); break;
|
||||
case R16G16B16_USCALED: ConvertPixelFromFloat<R16G16B16_USCALED>(pDst, srcPixel); break;
|
||||
case BC7_UNORM: ConvertPixelFromFloat<BC7_UNORM>(pDst, srcPixel); break;
|
||||
case BC7_UNORM_SRGB: ConvertPixelFromFloat<BC7_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case R8G8B8_UNORM_SRGB: ConvertPixelFromFloat<R8G8B8_UNORM_SRGB>(pDst, srcPixel); break;
|
||||
case R16G16B16_UINT: ConvertPixelFromFloat<R16G16B16_UINT>(pDst, srcPixel); break;
|
||||
case R16G16B16_SINT: ConvertPixelFromFloat<R16G16B16_SINT>(pDst, srcPixel); break;
|
||||
case R10G10B10A2_SNORM: ConvertPixelFromFloat<R10G10B10A2_SNORM>(pDst, srcPixel); break;
|
||||
case R10G10B10A2_USCALED: ConvertPixelFromFloat<R10G10B10A2_USCALED>(pDst, srcPixel); break;
|
||||
case R10G10B10A2_SSCALED: ConvertPixelFromFloat<R10G10B10A2_SSCALED>(pDst, srcPixel); break;
|
||||
case R10G10B10A2_SINT: ConvertPixelFromFloat<R10G10B10A2_SINT>(pDst, srcPixel); break;
|
||||
case B10G10R10A2_SNORM: ConvertPixelFromFloat<B10G10R10A2_SNORM>(pDst, srcPixel); break;
|
||||
case B10G10R10A2_USCALED: ConvertPixelFromFloat<B10G10R10A2_USCALED>(pDst, srcPixel); break;
|
||||
case B10G10R10A2_SSCALED: ConvertPixelFromFloat<B10G10R10A2_SSCALED>(pDst, srcPixel); break;
|
||||
case B10G10R10A2_UINT: ConvertPixelFromFloat<B10G10R10A2_UINT>(pDst, srcPixel); break;
|
||||
case B10G10R10A2_SINT: ConvertPixelFromFloat<B10G10R10A2_SINT>(pDst, srcPixel); break;
|
||||
case R8G8B8_UINT: ConvertPixelFromFloat<R8G8B8_UINT>(pDst, srcPixel); break;
|
||||
case R8G8B8_SINT: ConvertPixelFromFloat<R8G8B8_SINT>(pDst, srcPixel); break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,396 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file LoadTile.cpp
|
||||
*
|
||||
* @brief Functionality for Load
|
||||
*
|
||||
******************************************************************************/
|
||||
#include "common/os.h"
|
||||
#include "common/formats.h"
|
||||
#include "core/context.h"
|
||||
#include "core/rdtsc_core.h"
|
||||
#include "memory/TilingFunctions.h"
|
||||
#include "memory/tilingtraits.h"
|
||||
#include "memory/Convert.h"
|
||||
|
||||
typedef void(*PFN_LOAD_TILES)(SWR_SURFACE_STATE*, uint8_t*, uint32_t, uint32_t, uint32_t);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Load Raster Tile Function Tables.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
static PFN_LOAD_TILES sLoadTilesColorTable_SWR_TILE_NONE[NUM_SWR_FORMATS];
|
||||
static PFN_LOAD_TILES sLoadTilesDepthTable_SWR_TILE_NONE[NUM_SWR_FORMATS];
|
||||
|
||||
static PFN_LOAD_TILES sLoadTilesColorTable_SWR_TILE_MODE_YMAJOR[NUM_SWR_FORMATS];
|
||||
static PFN_LOAD_TILES sLoadTilesColorTable_SWR_TILE_MODE_XMAJOR[NUM_SWR_FORMATS];
|
||||
|
||||
static PFN_LOAD_TILES sLoadTilesDepthTable_SWR_TILE_MODE_YMAJOR[NUM_SWR_FORMATS];
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// LoadRasterTile
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
|
||||
struct LoadRasterTile
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Retrieve color from hot tile source which is always float.
|
||||
/// @param pSrc - Pointer to raster tile.
|
||||
/// @param x, y - Coordinates to raster tile.
|
||||
/// @param output - output color
|
||||
INLINE static void SetSwizzledDstColor(
|
||||
const float srcColor[4],
|
||||
uint32_t x, uint32_t y,
|
||||
uint8_t* pDst)
|
||||
{
|
||||
typedef SimdTile<DstFormat, SrcFormat> SimdT;
|
||||
|
||||
SimdT* pDstSimdTiles = (SimdT*)pDst;
|
||||
|
||||
// Compute which simd tile we're accessing within 8x8 tile.
|
||||
// i.e. Compute linear simd tile coordinate given (x, y) in pixel coordinates.
|
||||
uint32_t simdIndex = (y / SIMD_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD_TILE_X_DIM) + (x / SIMD_TILE_X_DIM);
|
||||
|
||||
SimdT* pSimdTile = &pDstSimdTiles[simdIndex];
|
||||
|
||||
uint32_t simdOffset = (y % SIMD_TILE_Y_DIM) * SIMD_TILE_X_DIM + (x % SIMD_TILE_X_DIM);
|
||||
|
||||
pSimdTile->SetSwizzledColor(simdOffset, srcColor);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Loads an 8x8 raster tile from the src surface.
|
||||
/// @param pSrcSurface - Src surface state
|
||||
/// @param pDst - Destination hot tile pointer
|
||||
/// @param x, y - Coordinates to raster tile.
|
||||
INLINE static void Load(
|
||||
SWR_SURFACE_STATE* pSrcSurface,
|
||||
uint8_t* pDst,
|
||||
uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
|
||||
{
|
||||
uint32_t lodWidth = (pSrcSurface->width == 1) ? 1 : pSrcSurface->width >> pSrcSurface->lod;
|
||||
uint32_t lodHeight = (pSrcSurface->height == 1) ? 1 : pSrcSurface->height >> pSrcSurface->lod;
|
||||
|
||||
// For each raster tile pixel (rx, ry)
|
||||
for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
|
||||
{
|
||||
for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
|
||||
{
|
||||
if (((x + rx) < lodWidth) &&
|
||||
((y + ry) < lodHeight))
|
||||
{
|
||||
uint8_t* pSrc = (uint8_t*)ComputeSurfaceAddress<false>(x + rx, y + ry, pSrcSurface->arrayIndex + renderTargetArrayIndex,
|
||||
pSrcSurface->arrayIndex + renderTargetArrayIndex, sampleNum,
|
||||
pSrcSurface->lod, pSrcSurface);
|
||||
|
||||
float srcColor[4];
|
||||
ConvertPixelToFloat<SrcFormat>(srcColor, pSrc);
|
||||
|
||||
// store pixel to hottile
|
||||
SetSwizzledDstColor(srcColor, rx, ry, pDst);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// LoadMacroTile - Loads a macro tile which consists of raster tiles.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
|
||||
struct LoadMacroTile
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Load a macrotile to the destination surface.
|
||||
/// @param pSrc - Pointer to macro tile.
|
||||
/// @param pDstSurface - Destination surface state
|
||||
/// @param x, y - Coordinates to macro tile
|
||||
static void Load(
|
||||
SWR_SURFACE_STATE* pSrcSurface,
|
||||
uint8_t *pDstHotTile,
|
||||
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex)
|
||||
{
|
||||
// Load each raster tile from the hot tile to the destination surface.
|
||||
for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
|
||||
{
|
||||
for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
|
||||
{
|
||||
for (uint32_t sampleNum = 0; sampleNum < pSrcSurface->numSamples; sampleNum++)
|
||||
{
|
||||
LoadRasterTile<TTraits, SrcFormat, DstFormat>::Load(pSrcSurface, pDstHotTile,
|
||||
(x + col), (y + row), sampleNum, renderTargetArrayIndex);
|
||||
pDstHotTile += KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<DstFormat>::bpp / 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void BUCKETS_START(UINT id)
|
||||
{
|
||||
#ifdef KNOB_ENABLE_RDTSC
|
||||
gBucketMgr.StartBucket(id);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void BUCKETS_STOP(UINT id)
|
||||
{
|
||||
#ifdef KNOB_ENABLE_RDTSC
|
||||
gBucketMgr.StopBucket(id);
|
||||
#endif
|
||||
}
|
||||
|
||||
// on demand buckets for load tiles
|
||||
static std::vector<int> sBuckets(NUM_SWR_FORMATS, -1);
|
||||
static std::mutex sBucketMutex;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Loads a full hottile from a render surface
|
||||
/// @param hPrivateContext - Handle to private DC
|
||||
/// @param dstFormat - Format for hot tile.
|
||||
/// @param renderTargetIndex - Index to src render target
|
||||
/// @param x, y - Coordinates to raster tile.
|
||||
/// @param pDstHotTile - Pointer to Hot Tile
|
||||
void LoadHotTile(
|
||||
SWR_SURFACE_STATE *pSrcSurface,
|
||||
SWR_FORMAT dstFormat,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex,
|
||||
uint8_t *pDstHotTile)
|
||||
{
|
||||
PFN_LOAD_TILES pfnLoadTiles = NULL;
|
||||
|
||||
// don't need to load null surfaces
|
||||
if (pSrcSurface->type == SURFACE_NULL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// force 0 if requested renderTargetArrayIndex is OOB
|
||||
if (renderTargetArrayIndex >= pSrcSurface->depth)
|
||||
{
|
||||
renderTargetArrayIndex = 0;
|
||||
}
|
||||
|
||||
if (renderTargetIndex < SWR_ATTACHMENT_DEPTH)
|
||||
{
|
||||
switch (pSrcSurface->tileMode)
|
||||
{
|
||||
case SWR_TILE_NONE:
|
||||
pfnLoadTiles = sLoadTilesColorTable_SWR_TILE_NONE[pSrcSurface->format];
|
||||
break;
|
||||
case SWR_TILE_MODE_YMAJOR:
|
||||
pfnLoadTiles = sLoadTilesColorTable_SWR_TILE_MODE_YMAJOR[pSrcSurface->format];
|
||||
break;
|
||||
case SWR_TILE_MODE_XMAJOR:
|
||||
pfnLoadTiles = sLoadTilesColorTable_SWR_TILE_MODE_XMAJOR[pSrcSurface->format];
|
||||
break;
|
||||
case SWR_TILE_MODE_WMAJOR:
|
||||
SWR_ASSERT(pSrcSurface->format == R8_UINT);
|
||||
pfnLoadTiles = LoadMacroTile<TilingTraits<SWR_TILE_MODE_WMAJOR, 8>, R8_UINT, R8_UINT>::Load;
|
||||
break;
|
||||
default:
|
||||
SWR_ASSERT(0, "Unsupported tiling mode");
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (renderTargetIndex == SWR_ATTACHMENT_DEPTH)
|
||||
{
|
||||
// Currently depth can map to linear and tile-y.
|
||||
switch (pSrcSurface->tileMode)
|
||||
{
|
||||
case SWR_TILE_NONE:
|
||||
pfnLoadTiles = sLoadTilesDepthTable_SWR_TILE_NONE[pSrcSurface->format];
|
||||
break;
|
||||
case SWR_TILE_MODE_YMAJOR:
|
||||
pfnLoadTiles = sLoadTilesDepthTable_SWR_TILE_MODE_YMAJOR[pSrcSurface->format];
|
||||
break;
|
||||
default:
|
||||
SWR_ASSERT(0, "Unsupported tiling mode");
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
SWR_ASSERT(renderTargetIndex == SWR_ATTACHMENT_STENCIL);
|
||||
SWR_ASSERT(pSrcSurface->format == R8_UINT);
|
||||
switch (pSrcSurface->tileMode)
|
||||
{
|
||||
case SWR_TILE_NONE:
|
||||
pfnLoadTiles = LoadMacroTile<TilingTraits<SWR_TILE_NONE, 8>, R8_UINT, R8_UINT>::Load;
|
||||
break;
|
||||
case SWR_TILE_MODE_WMAJOR:
|
||||
pfnLoadTiles = LoadMacroTile<TilingTraits<SWR_TILE_MODE_WMAJOR, 8>, R8_UINT, R8_UINT>::Load;
|
||||
break;
|
||||
default:
|
||||
SWR_ASSERT(0, "Unsupported tiling mode");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (pfnLoadTiles == nullptr)
|
||||
{
|
||||
SWR_ASSERT(false, "Unsupported format for load tile");
|
||||
return;
|
||||
}
|
||||
|
||||
// Load a macro tile.
|
||||
#ifdef KNOB_ENABLE_RDTSC
|
||||
if (sBuckets[pSrcSurface->format] == -1)
|
||||
{
|
||||
// guard sBuckets update since storetiles is called by multiple threads
|
||||
sBucketMutex.lock();
|
||||
if (sBuckets[pSrcSurface->format] == -1)
|
||||
{
|
||||
const SWR_FORMAT_INFO& info = GetFormatInfo(pSrcSurface->format);
|
||||
BUCKET_DESC desc{ info.name, "", false, 0xffffffff };
|
||||
sBuckets[pSrcSurface->format] = gBucketMgr.RegisterBucket(desc);
|
||||
}
|
||||
sBucketMutex.unlock();
|
||||
}
|
||||
#endif
|
||||
|
||||
BUCKETS_START(sBuckets[pSrcSurface->format]);
|
||||
pfnLoadTiles(pSrcSurface, pDstHotTile, x, y, renderTargetArrayIndex);
|
||||
BUCKETS_STOP(sBuckets[pSrcSurface->format]);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// INIT_LOAD_TILES_TABLE - Helper macro for setting up the tables.
|
||||
#define INIT_LOAD_TILES_COLOR_TABLE(tilemode) \
|
||||
memset(sLoadTilesColorTable_##tilemode, 0, sizeof(sLoadTilesColorTable_##tilemode)); \
|
||||
\
|
||||
sLoadTilesColorTable_##tilemode[R32G32B32A32_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R32G32B32A32_SINT] = LoadMacroTile<TilingTraits<tilemode, 128>, R32G32B32A32_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R32G32B32A32_UINT] = LoadMacroTile<TilingTraits<tilemode, 128>, R32G32B32A32_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R32G32B32X32_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 128>, R32G32B32X32_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R32G32B32_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 96>, R32G32B32_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R32G32B32_SINT] = LoadMacroTile<TilingTraits<tilemode, 96>, R32G32B32_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R32G32B32_UINT] = LoadMacroTile<TilingTraits<tilemode, 96>, R32G32B32_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16B16A16_UNORM] = LoadMacroTile<TilingTraits<tilemode, 64>, R16G16B16A16_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16B16A16_SNORM] = LoadMacroTile<TilingTraits<tilemode, 64>, R16G16B16A16_SNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16B16A16_SINT] = LoadMacroTile<TilingTraits<tilemode, 64>, R16G16B16A16_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16B16A16_UINT] = LoadMacroTile<TilingTraits<tilemode, 64>, R16G16B16A16_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16B16A16_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 64>, R16G16B16A16_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R32G32_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 64>, R32G32_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R32G32_SINT] = LoadMacroTile<TilingTraits<tilemode, 64>, R32G32_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R32G32_UINT] = LoadMacroTile<TilingTraits<tilemode, 64>, R32G32_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16B16X16_UNORM] = LoadMacroTile<TilingTraits<tilemode, 64>, R16G16B16X16_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16B16X16_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 64>, R16G16B16X16_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B8G8R8A8_UNORM] = LoadMacroTile<TilingTraits<tilemode, 32>, B8G8R8A8_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B8G8R8A8_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 32>, B8G8R8A8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R10G10B10A2_UNORM] = LoadMacroTile<TilingTraits<tilemode, 32>, R10G10B10A2_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R10G10B10A2_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 32>, R10G10B10A2_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R10G10B10A2_UINT] = LoadMacroTile<TilingTraits<tilemode, 32>, R10G10B10A2_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8B8A8_UNORM] = LoadMacroTile<TilingTraits<tilemode, 32>, R8G8B8A8_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8B8A8_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 32>, R8G8B8A8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8B8A8_SNORM] = LoadMacroTile<TilingTraits<tilemode, 32>, R8G8B8A8_SNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8B8A8_SINT] = LoadMacroTile<TilingTraits<tilemode, 32>, R8G8B8A8_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8B8A8_UINT] = LoadMacroTile<TilingTraits<tilemode, 32>, R8G8B8A8_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16_UNORM] = LoadMacroTile<TilingTraits<tilemode, 32>, R16G16_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16_SNORM] = LoadMacroTile<TilingTraits<tilemode, 32>, R16G16_SNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16_SINT] = LoadMacroTile<TilingTraits<tilemode, 32>, R16G16_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16_UINT] = LoadMacroTile<TilingTraits<tilemode, 32>, R16G16_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 32>, R16G16_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B10G10R10A2_UNORM] = LoadMacroTile<TilingTraits<tilemode, 32>, B10G10R10A2_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B10G10R10A2_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 32>, B10G10R10A2_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R11G11B10_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 32>, R11G11B10_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R32_SINT] = LoadMacroTile<TilingTraits<tilemode, 32>, R32_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R32_UINT] = LoadMacroTile<TilingTraits<tilemode, 32>, R32_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R32_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 32>, R32_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[A32_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 32>, A32_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B8G8R8X8_UNORM] = LoadMacroTile<TilingTraits<tilemode, 32>, B8G8R8X8_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B8G8R8X8_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 32>, B8G8R8X8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8B8X8_UNORM] = LoadMacroTile<TilingTraits<tilemode, 32>, R8G8B8X8_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8B8X8_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 32>, R8G8B8X8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B10G10R10X2_UNORM] = LoadMacroTile<TilingTraits<tilemode, 32>, B10G10R10X2_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B5G6R5_UNORM] = LoadMacroTile<TilingTraits<tilemode, 16>, B5G6R5_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B5G6R5_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 16>, B5G6R5_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B5G5R5A1_UNORM] = LoadMacroTile<TilingTraits<tilemode, 16>, B5G5R5A1_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B5G5R5A1_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 16>, B5G5R5A1_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B4G4R4A4_UNORM] = LoadMacroTile<TilingTraits<tilemode, 16>, B4G4R4A4_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B4G4R4A4_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 16>, B4G4R4A4_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8_UNORM] = LoadMacroTile<TilingTraits<tilemode, 16>, R8G8_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8_SNORM] = LoadMacroTile<TilingTraits<tilemode, 16>, R8G8_SNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8_SINT] = LoadMacroTile<TilingTraits<tilemode, 16>, R8G8_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8_UINT] = LoadMacroTile<TilingTraits<tilemode, 16>, R8G8_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16_UNORM] = LoadMacroTile<TilingTraits<tilemode, 16>, R16_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16_SNORM] = LoadMacroTile<TilingTraits<tilemode, 16>, R16_SNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16_SINT] = LoadMacroTile<TilingTraits<tilemode, 16>, R16_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16_UINT] = LoadMacroTile<TilingTraits<tilemode, 16>, R16_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 16>, R16_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[A16_UNORM] = LoadMacroTile<TilingTraits<tilemode, 16>, A16_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[A16_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 16>, A16_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B5G5R5X1_UNORM] = LoadMacroTile<TilingTraits<tilemode, 16>, B5G5R5X1_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B5G5R5X1_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 16>, B5G5R5X1_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8_UNORM] = LoadMacroTile<TilingTraits<tilemode, 8>, R8_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8_SNORM] = LoadMacroTile<TilingTraits<tilemode, 8>, R8_SNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8_SINT] = LoadMacroTile<TilingTraits<tilemode, 8>, R8_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8_UINT] = LoadMacroTile<TilingTraits<tilemode, 8>, R8_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[A8_UNORM] = LoadMacroTile<TilingTraits<tilemode, 8>, A8_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[BC1_UNORM] = LoadMacroTile<TilingTraits<tilemode, 64>, BC1_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[BC2_UNORM] = LoadMacroTile<TilingTraits<tilemode, 128>, BC2_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[BC3_UNORM] = LoadMacroTile<TilingTraits<tilemode, 128>, BC3_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[BC4_UNORM] = LoadMacroTile<TilingTraits<tilemode, 64>, BC4_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[BC5_UNORM] = LoadMacroTile<TilingTraits<tilemode, 128>, BC5_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[BC1_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 64>, BC1_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[BC2_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 128>, BC2_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[BC3_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 128>, BC3_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8B8_UNORM] = LoadMacroTile<TilingTraits<tilemode, 24>, R8G8B8_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8B8_SNORM] = LoadMacroTile<TilingTraits<tilemode, 24>, R8G8B8_SNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[BC4_SNORM] = LoadMacroTile<TilingTraits<tilemode, 64>, BC4_SNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[BC5_SNORM] = LoadMacroTile<TilingTraits<tilemode, 128>, BC5_SNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16B16_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 48>, R16G16B16_FLOAT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16B16_UNORM] = LoadMacroTile<TilingTraits<tilemode, 48>, R16G16B16_UNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16B16_SNORM] = LoadMacroTile<TilingTraits<tilemode, 48>, R16G16B16_SNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8B8_UNORM_SRGB] = LoadMacroTile<TilingTraits<tilemode, 24>, R8G8B8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16B16_UINT] = LoadMacroTile<TilingTraits<tilemode, 48>, R16G16B16_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R16G16B16_SINT] = LoadMacroTile<TilingTraits<tilemode, 48>, R16G16B16_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R10G10B10A2_SNORM] = LoadMacroTile<TilingTraits<tilemode, 32>, R10G10B10A2_SNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R10G10B10A2_SINT] = LoadMacroTile<TilingTraits<tilemode, 32>, R10G10B10A2_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B10G10R10A2_SNORM] = LoadMacroTile<TilingTraits<tilemode, 32>, B10G10R10A2_SNORM, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B10G10R10A2_UINT] = LoadMacroTile<TilingTraits<tilemode, 32>, B10G10R10A2_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[B10G10R10A2_SINT] = LoadMacroTile<TilingTraits<tilemode, 32>, B10G10R10A2_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8B8_UINT] = LoadMacroTile<TilingTraits<tilemode, 24>, R8G8B8_UINT, R32G32B32A32_FLOAT>::Load; \
|
||||
sLoadTilesColorTable_##tilemode[R8G8B8_SINT] = LoadMacroTile<TilingTraits<tilemode, 24>, R8G8B8_SINT, R32G32B32A32_FLOAT>::Load; \
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// INIT_LOAD_TILES_TABLE - Helper macro for setting up the tables.
|
||||
#define INIT_LOAD_TILES_DEPTH_TABLE(tilemode) \
|
||||
memset(sLoadTilesDepthTable_##tilemode, 0, sizeof(sLoadTilesDepthTable_##tilemode)); \
|
||||
\
|
||||
sLoadTilesDepthTable_##tilemode[R16_UNORM] = LoadMacroTile<TilingTraits<tilemode, 16>, R16_UNORM, R32_FLOAT>::Load; \
|
||||
sLoadTilesDepthTable_##tilemode[R32_FLOAT] = LoadMacroTile<TilingTraits<tilemode, 32>, R32_FLOAT, R32_FLOAT>::Load; \
|
||||
sLoadTilesDepthTable_##tilemode[R24_UNORM_X8_TYPELESS] = LoadMacroTile<TilingTraits<tilemode, 32>, R24_UNORM_X8_TYPELESS, R32_FLOAT>::Load; \
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Sets up tables for LoadTile
|
||||
void InitSimLoadTilesTable()
|
||||
{
|
||||
INIT_LOAD_TILES_COLOR_TABLE(SWR_TILE_NONE);
|
||||
INIT_LOAD_TILES_DEPTH_TABLE(SWR_TILE_NONE);
|
||||
|
||||
INIT_LOAD_TILES_COLOR_TABLE(SWR_TILE_MODE_YMAJOR);
|
||||
INIT_LOAD_TILES_COLOR_TABLE(SWR_TILE_MODE_XMAJOR);
|
||||
|
||||
INIT_LOAD_TILES_DEPTH_TABLE(SWR_TILE_MODE_YMAJOR);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,581 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file TilingFunctions.h
|
||||
*
|
||||
* @brief Tiling functions.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include "core/state.h"
|
||||
#include "core/format_traits.h"
|
||||
#include "memory/tilingtraits.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#define MAX_NUM_LOD 15
|
||||
|
||||
#define GFX_ALIGN(x, a) (((x) + ((a) - 1)) - (((x) + ((a) - 1)) & ((a) - 1))) // Alt implementation with bitwise not (~) has issue with uint32 align used with 64-bit value, since ~'ed value will remain 32-bit.
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// SimdTile SSE(2x2), AVX(4x2), or AVX-512(4x4?)
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
|
||||
struct SimdTile
|
||||
{
|
||||
// SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
|
||||
float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD_WIDTH];
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Retrieve color from simd.
|
||||
/// @param index - linear index to color within simd.
|
||||
/// @param outputColor - output color
|
||||
INLINE void GetSwizzledColor(
|
||||
uint32_t index,
|
||||
float outputColor[4])
|
||||
{
|
||||
// SOA pattern for 2x2 is a subset of 4x2.
|
||||
// 0 1 4 5
|
||||
// 2 3 6 7
|
||||
// The offset converts pattern to linear
|
||||
#if (SIMD_TILE_X_DIM == 4)
|
||||
static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
|
||||
#elif (SIMD_TILE_X_DIM == 2)
|
||||
static const uint32_t offset[] = { 0, 1, 2, 3 };
|
||||
#endif
|
||||
|
||||
for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
|
||||
{
|
||||
outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Retrieve color from simd.
|
||||
/// @param index - linear index to color within simd.
|
||||
/// @param outputColor - output color
|
||||
INLINE void SetSwizzledColor(
|
||||
uint32_t index,
|
||||
const float src[4])
|
||||
{
|
||||
// SOA pattern for 2x2 is a subset of 4x2.
|
||||
// 0 1 4 5
|
||||
// 2 3 6 7
|
||||
// The offset converts pattern to linear
|
||||
#if (SIMD_TILE_X_DIM == 4)
|
||||
static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
|
||||
#elif (SIMD_TILE_X_DIM == 2)
|
||||
static const uint32_t offset[] = { 0, 1, 2, 3 };
|
||||
#endif
|
||||
|
||||
// Only loop over the components needed for destination.
|
||||
for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
|
||||
{
|
||||
this->color[i][offset[index]] = src[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct SimdTile <R8_UINT,R8_UINT>
|
||||
{
|
||||
// SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
|
||||
uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD_WIDTH];
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Retrieve color from simd.
|
||||
/// @param index - linear index to color within simd.
|
||||
/// @param outputColor - output color
|
||||
INLINE void GetSwizzledColor(
|
||||
uint32_t index,
|
||||
float outputColor[4])
|
||||
{
|
||||
// SOA pattern for 2x2 is a subset of 4x2.
|
||||
// 0 1 4 5
|
||||
// 2 3 6 7
|
||||
// The offset converts pattern to linear
|
||||
#if (SIMD_TILE_X_DIM == 4)
|
||||
static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
|
||||
#elif (SIMD_TILE_X_DIM == 2)
|
||||
static const uint32_t offset[] = { 0, 1, 2, 3 };
|
||||
#endif
|
||||
|
||||
for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
|
||||
{
|
||||
uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
|
||||
outputColor[i] = *(float*)&src;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Retrieve color from simd.
|
||||
/// @param index - linear index to color within simd.
|
||||
/// @param outputColor - output color
|
||||
INLINE void SetSwizzledColor(
|
||||
uint32_t index,
|
||||
const float src[4])
|
||||
{
|
||||
// SOA pattern for 2x2 is a subset of 4x2.
|
||||
// 0 1 4 5
|
||||
// 2 3 6 7
|
||||
// The offset converts pattern to linear
|
||||
#if (SIMD_TILE_X_DIM == 4)
|
||||
static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
|
||||
#elif (SIMD_TILE_X_DIM == 2)
|
||||
static const uint32_t offset[] = { 0, 1, 2, 3 };
|
||||
#endif
|
||||
|
||||
// Only loop over the components needed for destination.
|
||||
for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
|
||||
{
|
||||
this->color[i][offset[index]] = *(uint8_t*)&src[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Computes lod offset for 1D surface at specified lod.
|
||||
/// @param baseWidth - width of basemip (mip 0).
|
||||
/// @param hAlign - horizontal alignment per miip, in texels
|
||||
/// @param lod - lod index
|
||||
/// @param offset - output offset.
|
||||
INLINE void ComputeLODOffset1D(
|
||||
const SWR_FORMAT_INFO& info,
|
||||
uint32_t baseWidth,
|
||||
uint32_t hAlign,
|
||||
uint32_t lod,
|
||||
uint32_t &offset)
|
||||
{
|
||||
if (lod == 0)
|
||||
{
|
||||
offset = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t curWidth = baseWidth;
|
||||
// translate mip width from pixels to blocks for block compressed formats
|
||||
// @note hAlign is already in blocks for compressed formats so no need to convert
|
||||
if (info.isBC) curWidth /= info.bcWidth;
|
||||
|
||||
offset = GFX_ALIGN(curWidth, hAlign);
|
||||
for (uint32_t l = 1; l < lod; ++l)
|
||||
{
|
||||
curWidth = GFX_ALIGN(std::max<uint32_t>(curWidth >> 1, 1U), hAlign);
|
||||
offset += curWidth;
|
||||
}
|
||||
|
||||
if (info.isSubsampled)
|
||||
{
|
||||
offset /= info.bcWidth;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Computes x lod offset for 2D surface at specified lod.
|
||||
/// @param baseWidth - width of basemip (mip 0).
|
||||
/// @param hAlign - horizontal alignment per mip, in texels
|
||||
/// @param lod - lod index
|
||||
/// @param offset - output offset.
|
||||
INLINE void ComputeLODOffsetX(
|
||||
const SWR_FORMAT_INFO& info,
|
||||
uint32_t baseWidth,
|
||||
uint32_t hAlign,
|
||||
uint32_t lod,
|
||||
uint32_t &offset)
|
||||
{
|
||||
if (lod < 2)
|
||||
{
|
||||
offset = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t curWidth = baseWidth;
|
||||
// convert mip width from pixels to blocks for block compressed formats
|
||||
// @note hAlign is already in blocks for compressed formats so no need to convert
|
||||
if (info.isBC) curWidth /= info.bcWidth;
|
||||
|
||||
curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
|
||||
curWidth = GFX_ALIGN(curWidth, hAlign);
|
||||
|
||||
if (info.isSubsampled)
|
||||
{
|
||||
curWidth /= info.bcWidth;
|
||||
}
|
||||
|
||||
offset = curWidth;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Computes y lod offset for 2D surface at specified lod.
|
||||
/// @param baseWidth - width of basemip (mip 0).
|
||||
/// @param vAlign - vertical alignment per mip, in rows
|
||||
/// @param lod - lod index
|
||||
/// @param offset - output offset.
|
||||
INLINE void ComputeLODOffsetY(
|
||||
const SWR_FORMAT_INFO& info,
|
||||
uint32_t baseHeight,
|
||||
uint32_t vAlign,
|
||||
uint32_t lod,
|
||||
uint32_t &offset)
|
||||
{
|
||||
if (lod == 0)
|
||||
{
|
||||
offset = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
offset = 0;
|
||||
uint32_t mipHeight = baseHeight;
|
||||
|
||||
// translate mip height from pixels to blocks for block compressed formats
|
||||
// @note VAlign is already in blocks for compressed formats so no need to convert
|
||||
if (info.isBC) mipHeight /= info.bcHeight;
|
||||
|
||||
for (uint32_t l = 1; l <= lod; ++l)
|
||||
{
|
||||
uint32_t alignedMipHeight = GFX_ALIGN(mipHeight, vAlign);
|
||||
offset += ((l != 2) ? alignedMipHeight : 0);
|
||||
mipHeight = std::max<uint32_t>(mipHeight >> 1, 1U);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Computes 1D surface offset
|
||||
/// @param x - offset from start of array slice at given lod.
|
||||
/// @param array - array slice index
|
||||
/// @param lod - lod index
|
||||
/// @param pState - surface state
|
||||
/// @param xOffsetBytes - output offset in bytes.
|
||||
template<bool UseCachedOffsets>
|
||||
INLINE void ComputeSurfaceOffset1D(
|
||||
uint32_t x,
|
||||
uint32_t array,
|
||||
uint32_t lod,
|
||||
const SWR_SURFACE_STATE *pState,
|
||||
uint32_t &xOffsetBytes)
|
||||
{
|
||||
const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
|
||||
uint32_t lodOffset;
|
||||
|
||||
if (UseCachedOffsets)
|
||||
{
|
||||
lodOffset = pState->lodOffsets[0][lod];
|
||||
}
|
||||
else
|
||||
{
|
||||
ComputeLODOffset1D(info, pState->width, pState->halign, lod, lodOffset);
|
||||
}
|
||||
|
||||
xOffsetBytes = (array * pState->qpitch + lodOffset + x) * info.Bpp;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Adjusts the array slice for legacy TileY MSAA
|
||||
/// @param pState - surface state
|
||||
/// @param array - array slice index
|
||||
/// @param sampleNum - requested sample
|
||||
INLINE void AdjustCoordsForMSAA(const SWR_SURFACE_STATE *pState, uint32_t& x, uint32_t& y, uint32_t& arrayIndex, uint32_t sampleNum)
|
||||
{
|
||||
/// @todo: might want to templatize adjusting for sample slices when we support tileYS/tileYF.
|
||||
if((pState->tileMode == SWR_TILE_MODE_YMAJOR ||
|
||||
pState->tileMode == SWR_TILE_MODE_WMAJOR) &&
|
||||
pState->bInterleavedSamples)
|
||||
{
|
||||
uint32_t newX, newY, newSampleX, newSampleY;
|
||||
switch(pState->numSamples)
|
||||
{
|
||||
case 1:
|
||||
newX = x;
|
||||
newY = y;
|
||||
newSampleX = newSampleY = 0;
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
assert(pState->type == SURFACE_2D);
|
||||
static const uint32_t xMask = 0xFFFFFFFD;
|
||||
static const uint32_t sampleMaskX = 0x1;
|
||||
newX = pdep_u32(x, xMask);
|
||||
newY = y;
|
||||
newSampleX = pext_u32(sampleNum, sampleMaskX);
|
||||
newSampleY = 0;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
{
|
||||
assert(pState->type == SURFACE_2D);
|
||||
static const uint32_t mask = 0xFFFFFFFD;
|
||||
static const uint32_t sampleMaskX = 0x1;
|
||||
static const uint32_t sampleMaskY = 0x2;
|
||||
newX = pdep_u32(x, mask);
|
||||
newY = pdep_u32(y, mask);
|
||||
newSampleX = pext_u32(sampleNum, sampleMaskX);
|
||||
newSampleY = pext_u32(sampleNum, sampleMaskY);
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
{
|
||||
assert(pState->type == SURFACE_2D);
|
||||
static const uint32_t xMask = 0xFFFFFFF9;
|
||||
static const uint32_t yMask = 0xFFFFFFFD;
|
||||
static const uint32_t sampleMaskX = 0x5;
|
||||
static const uint32_t sampleMaskY = 0x2;
|
||||
newX = pdep_u32(x, xMask);
|
||||
newY = pdep_u32(y, yMask);
|
||||
newSampleX = pext_u32(sampleNum, sampleMaskX);
|
||||
newSampleY = pext_u32(sampleNum, sampleMaskY);
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
{
|
||||
assert(pState->type == SURFACE_2D);
|
||||
static const uint32_t mask = 0xFFFFFFF9;
|
||||
static const uint32_t sampleMaskX = 0x5;
|
||||
static const uint32_t sampleMaskY = 0xA;
|
||||
newX = pdep_u32(x, mask);
|
||||
newY = pdep_u32(y, mask);
|
||||
newSampleX = pext_u32(sampleNum, sampleMaskX);
|
||||
newSampleY = pext_u32(sampleNum, sampleMaskY);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Unsupported sample count");
|
||||
newX = newY = 0;
|
||||
newSampleX = newSampleY = 0;
|
||||
break;
|
||||
}
|
||||
x = newX | (newSampleX << 1);
|
||||
y = newY | (newSampleY << 1);
|
||||
}
|
||||
else if(pState->tileMode == SWR_TILE_MODE_YMAJOR ||
|
||||
pState->tileMode == SWR_TILE_NONE)
|
||||
{
|
||||
uint32_t sampleShift;
|
||||
switch(pState->numSamples)
|
||||
{
|
||||
case 1:
|
||||
assert(sampleNum == 0);
|
||||
sampleShift = 0;
|
||||
break;
|
||||
case 2:
|
||||
assert(pState->type == SURFACE_2D);
|
||||
sampleShift = 1;
|
||||
break;
|
||||
case 4:
|
||||
assert(pState->type == SURFACE_2D);
|
||||
sampleShift = 2;
|
||||
break;
|
||||
case 8:
|
||||
assert(pState->type == SURFACE_2D);
|
||||
sampleShift = 3;
|
||||
break;
|
||||
case 16:
|
||||
assert(pState->type == SURFACE_2D);
|
||||
sampleShift = 4;
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Unsupported sample count");
|
||||
sampleShift = 0;
|
||||
break;
|
||||
}
|
||||
arrayIndex = (arrayIndex << sampleShift) | sampleNum;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Computes 2D surface offset
|
||||
/// @param x - horizontal offset from start of array slice and lod.
|
||||
/// @param y - vertical offset from start of array slice and lod.
|
||||
/// @param array - array slice index
|
||||
/// @param lod - lod index
|
||||
/// @param pState - surface state
|
||||
/// @param xOffsetBytes - output x offset in bytes.
|
||||
/// @param yOffsetRows - output y offset in bytes.
|
||||
template<bool UseCachedOffsets>
|
||||
INLINE void ComputeSurfaceOffset2D(uint32_t x, uint32_t y, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows)
|
||||
{
|
||||
const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
|
||||
uint32_t lodOffsetX, lodOffsetY;
|
||||
|
||||
if (UseCachedOffsets)
|
||||
{
|
||||
lodOffsetX = pState->lodOffsets[0][lod];
|
||||
lodOffsetY = pState->lodOffsets[1][lod];
|
||||
}
|
||||
else
|
||||
{
|
||||
ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
|
||||
ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
|
||||
}
|
||||
|
||||
AdjustCoordsForMSAA(pState, x, y, array, sampleNum);
|
||||
xOffsetBytes = (x + lodOffsetX + pState->xOffset) * info.Bpp;
|
||||
yOffsetRows = (array * pState->qpitch) + lodOffsetY + y + pState->yOffset;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Computes 3D surface offset
|
||||
/// @param x - horizontal offset from start of array slice and lod.
|
||||
/// @param y - vertical offset from start of array slice and lod.
|
||||
/// @param z - depth offset from start of array slice and lod.
|
||||
/// @param lod - lod index
|
||||
/// @param pState - surface state
|
||||
/// @param xOffsetBytes - output x offset in bytes.
|
||||
/// @param yOffsetRows - output y offset in rows.
|
||||
/// @param zOffsetSlices - output y offset in slices.
|
||||
template<bool UseCachedOffsets>
|
||||
INLINE void ComputeSurfaceOffset3D(uint32_t x, uint32_t y, uint32_t z, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows, uint32_t &zOffsetSlices)
|
||||
{
|
||||
const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
|
||||
uint32_t lodOffsetX, lodOffsetY;
|
||||
|
||||
if (UseCachedOffsets)
|
||||
{
|
||||
lodOffsetX = pState->lodOffsets[0][lod];
|
||||
lodOffsetY = pState->lodOffsets[1][lod];
|
||||
}
|
||||
else
|
||||
{
|
||||
ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
|
||||
ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
|
||||
}
|
||||
|
||||
xOffsetBytes = (x + lodOffsetX) * info.Bpp;
|
||||
yOffsetRows = lodOffsetY + y;
|
||||
zOffsetSlices = z;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Swizzles the linear x,y offsets depending on surface tiling mode
|
||||
/// and returns final surface address
|
||||
/// @param xOffsetBytes - x offset from base of surface in bytes
|
||||
/// @param yOffsetRows - y offset from base of surface in rows
|
||||
/// @param pState - pointer to the surface state
|
||||
template<typename TTraits>
|
||||
INLINE uint32_t ComputeTileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
|
||||
{
|
||||
return ComputeOffset2D<TTraits>(pState->pitch, xOffsetBytes, yOffsetRows);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Swizzles the linear x,y offsets depending on surface tiling mode
|
||||
/// and returns final surface address
|
||||
/// @param xOffsetBytes - x offset from base of surface in bytes
|
||||
/// @param yOffsetRows - y offset from base of surface in rows
|
||||
/// @param pState - pointer to the surface state
|
||||
template<typename TTraits>
|
||||
INLINE uint32_t ComputeTileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
|
||||
{
|
||||
return ComputeOffset3D<TTraits>(pState->qpitch, pState->pitch, xOffsetBytes, yOffsetRows, zOffsetSlices);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Swizzles the linear x,y offsets depending on surface tiling mode
|
||||
/// and returns final surface address
|
||||
/// @param xOffsetBytes - x offset from base of surface in bytes
|
||||
/// @param yOffsetRows - y offset from base of surface in rows
|
||||
/// @param pState - pointer to the surface state
|
||||
INLINE
|
||||
uint32_t TileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
|
||||
{
|
||||
switch (pState->tileMode)
|
||||
{
|
||||
case SWR_TILE_NONE: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, pState);
|
||||
case SWR_TILE_SWRZ: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, pState);
|
||||
case SWR_TILE_MODE_XMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_XMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
|
||||
case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, pState);
|
||||
case SWR_TILE_MODE_WMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_WMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
|
||||
default: SWR_ASSERT(0, "Unsupported tiling mode");
|
||||
}
|
||||
return (uint32_t) NULL;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Swizzles the linear x,y,z offsets depending on surface tiling mode
|
||||
/// and returns final surface address
|
||||
/// @param xOffsetBytes - x offset from base of surface in bytes
|
||||
/// @param yOffsetRows - y offset from base of surface in rows
|
||||
/// @param zOffsetSlices - z offset from base of surface in slices
|
||||
/// @param pState - pointer to the surface state
|
||||
INLINE
|
||||
uint32_t TileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
|
||||
{
|
||||
switch (pState->tileMode)
|
||||
{
|
||||
case SWR_TILE_NONE: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
|
||||
case SWR_TILE_SWRZ: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
|
||||
case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
|
||||
default: SWR_ASSERT(0, "Unsupported tiling mode");
|
||||
}
|
||||
return (uint32_t) NULL;
|
||||
}
|
||||
|
||||
template<bool UseCachedOffsets>
|
||||
INLINE
|
||||
uint32_t ComputeSurfaceOffset(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
|
||||
{
|
||||
uint32_t offsetX = 0, offsetY = 0, offsetZ = 0;
|
||||
switch (pState->type)
|
||||
{
|
||||
case SURFACE_BUFFER:
|
||||
case SURFACE_STRUCTURED_BUFFER:
|
||||
offsetX = x * pState->pitch;
|
||||
return offsetX;
|
||||
break;
|
||||
case SURFACE_1D:
|
||||
ComputeSurfaceOffset1D<UseCachedOffsets>(x, array, lod, pState, offsetX);
|
||||
return TileSwizzle2D(offsetX, 0, pState);
|
||||
break;
|
||||
case SURFACE_2D:
|
||||
ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
|
||||
return TileSwizzle2D(offsetX, offsetY, pState);
|
||||
case SURFACE_3D:
|
||||
ComputeSurfaceOffset3D<UseCachedOffsets>(x, y, z, lod, pState, offsetX, offsetY, offsetZ);
|
||||
return TileSwizzle3D(offsetX, offsetY, offsetZ, pState);
|
||||
break;
|
||||
case SURFACE_CUBE:
|
||||
ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
|
||||
return TileSwizzle2D(offsetX, offsetY, pState);
|
||||
break;
|
||||
default: SWR_ASSERT(0, "Unsupported format");
|
||||
}
|
||||
|
||||
return (uint32_t) NULL;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Computes surface address at the given location and lod
|
||||
/// @param x - x location in pixels
|
||||
/// @param y - y location in rows
|
||||
/// @param z - z location for 3D surfaces
|
||||
/// @param array - array slice for 1D and 2D surfaces
|
||||
/// @param lod - level of detail
|
||||
/// @param pState - pointer to the surface state
|
||||
template<bool UseCachedOffsets>
|
||||
INLINE
|
||||
void* ComputeSurfaceAddress(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
|
||||
{
|
||||
return pState->pBaseAddress + ComputeSurfaceOffset<UseCachedOffsets>(x, y, z, array, sampleNum, lod, pState);
|
||||
}
|
|
@ -0,0 +1,263 @@
|
|||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file tilingtraits.h
|
||||
*
|
||||
* @brief Tiling traits.
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
#include "core/state.h"
|
||||
|
||||
template<SWR_TILE_MODE mode, int>
|
||||
struct TilingTraits
|
||||
{
|
||||
static const SWR_TILE_MODE TileMode{ mode };
|
||||
static UINT GetCu() { SWR_ASSERT(0); return 0; }
|
||||
static UINT GetCv() { SWR_ASSERT(0); return 0; }
|
||||
static UINT GetCr() { SWR_ASSERT(0); return 0; }
|
||||
static UINT GetTileIDShift() { SWR_ASSERT(0); return 0; }
|
||||
|
||||
/// @todo correct pdep shifts for all rastertile dims. Unused for now
|
||||
static UINT GetPdepX() { SWR_ASSERT(0); return 0x37; }
|
||||
static UINT GetPdepY() { SWR_ASSERT(0); return 0xC8; }
|
||||
};
|
||||
|
||||
template<int X> struct TilingTraits <SWR_TILE_NONE, X>
|
||||
{
|
||||
static const SWR_TILE_MODE TileMode{ SWR_TILE_NONE };
|
||||
static UINT GetCu() { return 0; }
|
||||
static UINT GetCv() { return 0; }
|
||||
static UINT GetCr() { return 0; }
|
||||
static UINT GetTileIDShift() { return 0; }
|
||||
static UINT GetPdepX() { return 0x00; }
|
||||
static UINT GetPdepY() { return 0x00; }
|
||||
};
|
||||
|
||||
template<> struct TilingTraits <SWR_TILE_SWRZ, 8>
|
||||
{
|
||||
static const SWR_TILE_MODE TileMode{ SWR_TILE_SWRZ };
|
||||
static UINT GetCu() { return KNOB_TILE_X_DIM_SHIFT; }
|
||||
static UINT GetCv() { return KNOB_TILE_Y_DIM_SHIFT; }
|
||||
static UINT GetCr() { return 0; }
|
||||
static UINT GetTileIDShift() { return KNOB_TILE_X_DIM_SHIFT + KNOB_TILE_Y_DIM_SHIFT; }
|
||||
|
||||
/// @todo correct pdep shifts for all rastertile dims. Unused for now
|
||||
static UINT GetPdepX() { SWR_ASSERT(0); return 0x00; }
|
||||
static UINT GetPdepY() { SWR_ASSERT(0); return 0x00; }
|
||||
};
|
||||
|
||||
template<> struct TilingTraits <SWR_TILE_SWRZ, 32>
|
||||
{
|
||||
static const SWR_TILE_MODE TileMode{ SWR_TILE_SWRZ };
|
||||
static UINT GetCu() { return KNOB_TILE_X_DIM_SHIFT + 2; }
|
||||
static UINT GetCv() { return KNOB_TILE_Y_DIM_SHIFT; }
|
||||
static UINT GetCr() { return 0; }
|
||||
static UINT GetTileIDShift() { return KNOB_TILE_X_DIM_SHIFT + KNOB_TILE_Y_DIM_SHIFT + 2; }
|
||||
|
||||
static UINT GetPdepX() { return 0x37; }
|
||||
static UINT GetPdepY() { return 0xC8; }
|
||||
};
|
||||
|
||||
template<> struct TilingTraits <SWR_TILE_SWRZ, 128>
|
||||
{
|
||||
static const SWR_TILE_MODE TileMode{ SWR_TILE_SWRZ };
|
||||
static UINT GetCu() { return KNOB_TILE_X_DIM_SHIFT + 4; }
|
||||
static UINT GetCv() { return KNOB_TILE_Y_DIM_SHIFT; }
|
||||
static UINT GetCr() { return 0; }
|
||||
static UINT GetTileIDShift() { return KNOB_TILE_X_DIM_SHIFT + KNOB_TILE_Y_DIM_SHIFT + 4; }
|
||||
|
||||
/// @todo correct pdep shifts for all rastertile dims. Unused for now
|
||||
static UINT GetPdepX() { SWR_ASSERT(0); return 0x37; }
|
||||
static UINT GetPdepY() { SWR_ASSERT(0); return 0xC8; }
|
||||
};
|
||||
|
||||
// y-major tiling layout unaffected by element size
|
||||
template<int X> struct TilingTraits <SWR_TILE_MODE_YMAJOR, X>
|
||||
{
|
||||
static const SWR_TILE_MODE TileMode{ SWR_TILE_MODE_YMAJOR };
|
||||
static UINT GetCu() { return 7; }
|
||||
static UINT GetCv() { return 5; }
|
||||
static UINT GetCr() { return 0; }
|
||||
static UINT GetTileIDShift() { return 12; }
|
||||
|
||||
static UINT GetPdepX() { return 0xe0f; }
|
||||
static UINT GetPdepY() { return 0x1f0; }
|
||||
};
|
||||
|
||||
// x-major tiling layout unaffected by element size
|
||||
template<int X> struct TilingTraits <SWR_TILE_MODE_XMAJOR, X>
|
||||
{
|
||||
static const SWR_TILE_MODE TileMode{ SWR_TILE_MODE_XMAJOR };
|
||||
static UINT GetCu() { return 9; }
|
||||
static UINT GetCv() { return 3; }
|
||||
static UINT GetCr() { return 0; }
|
||||
static UINT GetTileIDShift() { return 12; }
|
||||
|
||||
static UINT GetPdepX() { return 0x1ff; }
|
||||
static UINT GetPdepY() { return 0xe00; }
|
||||
};
|
||||
|
||||
template<int X> struct TilingTraits <SWR_TILE_MODE_WMAJOR, X>
|
||||
{
|
||||
static const SWR_TILE_MODE TileMode{ SWR_TILE_MODE_WMAJOR };
|
||||
static UINT GetCu() { return 6; }
|
||||
static UINT GetCv() { return 6; }
|
||||
static UINT GetCr() { return 0; }
|
||||
static UINT GetTileIDShift() { return 12; }
|
||||
|
||||
static UINT GetPdepX() { return 0xe15; }
|
||||
static UINT GetPdepY() { return 0x1ea; }
|
||||
};
|
||||
|
||||
INLINE
|
||||
UINT pdep_u32(UINT a, UINT mask)
|
||||
{
|
||||
#if KNOB_ARCH==KNOB_ARCH_AVX2
|
||||
return _pdep_u32(a, mask);
|
||||
#else
|
||||
UINT result = 0;
|
||||
|
||||
// copied from http://wm.ite.pl/articles/pdep-soft-emu.html
|
||||
// using bsf instead of funky loop
|
||||
DWORD maskIndex;
|
||||
while (_BitScanForward(&maskIndex, mask))
|
||||
{
|
||||
// 1. isolate lowest set bit of mask
|
||||
const UINT lowest = 1 << maskIndex;
|
||||
|
||||
// 2. populate LSB from src
|
||||
const UINT LSB = (UINT)((int)(a << 31) >> 31);
|
||||
|
||||
// 3. copy bit from mask
|
||||
result |= LSB & lowest;
|
||||
|
||||
// 4. clear lowest bit
|
||||
mask &= ~lowest;
|
||||
|
||||
// 5. prepare for next iteration
|
||||
a >>= 1;
|
||||
}
|
||||
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
INLINE
|
||||
UINT pext_u32(UINT a, UINT mask)
|
||||
{
|
||||
#if KNOB_ARCH==KNOB_ARCH_AVX2
|
||||
return _pext_u32(a, mask);
|
||||
#else
|
||||
UINT result = 0;
|
||||
DWORD maskIndex;
|
||||
uint32_t currentBit = 0;
|
||||
while (_BitScanForward(&maskIndex, mask))
|
||||
{
|
||||
// 1. isolate lowest set bit of mask
|
||||
const UINT lowest = 1 << maskIndex;
|
||||
|
||||
// 2. copy bit from mask
|
||||
result |= ((a & lowest) > 0) << currentBit++;
|
||||
|
||||
// 3. clear lowest bit
|
||||
mask &= ~lowest;
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Computes the tileID for 2D tiled surfaces
|
||||
/// @param pitch - surface pitch in bytes
|
||||
/// @param tileX - x offset in tiles
|
||||
/// @param tileY - y offset in tiles
|
||||
template<typename TTraits>
|
||||
INLINE UINT ComputeTileOffset2D(UINT pitch, UINT tileX, UINT tileY)
|
||||
{
|
||||
UINT tileID = tileY * (pitch >> TTraits::GetCu()) + tileX;
|
||||
return tileID << TTraits::GetTileIDShift();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Computes the tileID for 3D tiled surfaces
|
||||
/// @param qpitch - surface qpitch in rows
|
||||
/// @param pitch - surface pitch in bytes
|
||||
/// @param tileX - x offset in tiles
|
||||
/// @param tileY - y offset in tiles
|
||||
/// @param tileZ - y offset in tiles
|
||||
template<typename TTraits>
|
||||
INLINE UINT ComputeTileOffset3D(UINT qpitch, UINT pitch, UINT tileX, UINT tileY, UINT tileZ)
|
||||
{
|
||||
UINT tileID = (tileZ * (qpitch >> TTraits::GetCv()) + tileY) * (pitch >> TTraits::GetCu()) + tileX;
|
||||
return tileID << TTraits::GetTileIDShift();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Computes the byte offset for 2D tiled surfaces
|
||||
/// @param pitch - surface pitch in bytes
|
||||
/// @param x - x offset in bytes
|
||||
/// @param y - y offset in rows
|
||||
template<typename TTraits>
|
||||
INLINE UINT ComputeOffset2D(UINT pitch, UINT x, UINT y)
|
||||
{
|
||||
UINT tileID = ComputeTileOffset2D<TTraits>(pitch, x >> TTraits::GetCu(), y >> TTraits::GetCv());
|
||||
UINT xSwizzle = pdep_u32(x, TTraits::GetPdepX());
|
||||
UINT ySwizzle = pdep_u32(y, TTraits::GetPdepY());
|
||||
return (tileID | xSwizzle | ySwizzle);
|
||||
}
|
||||
|
||||
#if KNOB_ARCH <= KNOB_ARCH_AVX
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Computes the byte offset for 2D tiled surfaces. Specialization
|
||||
/// for tile-y surfaces that uses bit twiddling instead of pdep emulation.
|
||||
/// @param pitch - surface pitch in bytes
|
||||
/// @param x - x offset in bytes
|
||||
/// @param y - y offset in rows
|
||||
template<>
|
||||
INLINE UINT ComputeOffset2D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(UINT pitch, UINT x, UINT y)
|
||||
{
|
||||
typedef TilingTraits<SWR_TILE_MODE_YMAJOR, 32> TTraits;
|
||||
|
||||
UINT tileID = ComputeTileOffset2D<TTraits>(pitch, x >> TTraits::GetCu(), y >> TTraits::GetCv());
|
||||
UINT xSwizzle = ((x << 5) & 0xe00) | (x & 0xf);
|
||||
UINT ySwizzle = (y << 4) & 0x1f0;
|
||||
return (tileID | xSwizzle | ySwizzle);
|
||||
}
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Computes the byte offset for 3D tiled surfaces
|
||||
/// @param qpitch - depth pitch in rows
|
||||
/// @param pitch - surface pitch in bytes
|
||||
/// @param x - x offset in bytes
|
||||
/// @param y - y offset in rows
|
||||
/// @param z - y offset in slices
|
||||
template<typename TTraits>
|
||||
INLINE UINT ComputeOffset3D(UINT qpitch, UINT pitch, UINT x, UINT y, UINT z)
|
||||
{
|
||||
UINT tileID = ComputeTileOffset3D<TTraits>(qpitch, pitch, x >> TTraits::GetCu(), y >> TTraits::GetCv(), z >> TTraits::GetCr());
|
||||
UINT xSwizzle = pdep_u32(x, TTraits::GetPdepX());
|
||||
UINT ySwizzle = pdep_u32(y, TTraits::GetPdepY());
|
||||
return (tileID | xSwizzle | ySwizzle);
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
# Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
# Python source
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import sys
|
||||
import knob_defs
|
||||
from mako.template import Template
|
||||
from mako.exceptions import RichTraceback
|
||||
|
||||
def write_template_to_string(template_filename, **kwargs):
|
||||
try:
|
||||
template = Template(filename=template_filename)
|
||||
# Split + Join fixes line-endings for whatever platform you are using
|
||||
return '\n'.join(template.render(**kwargs).splitlines())
|
||||
except:
|
||||
traceback = RichTraceback()
|
||||
for (filename, lineno, function, line) in traceback.traceback:
|
||||
print("File %s, line %s, in %s" % (filename, lineno, function))
|
||||
print(line, "\n")
|
||||
print("%s: %s" % (str(traceback.error.__class__.__name__), traceback.error))
|
||||
|
||||
def write_template_to_file(template_filename, output_filename, **kwargs):
|
||||
with open(output_filename, "w") as outfile:
|
||||
print(write_template_to_string(template_filename, **kwargs), file=outfile)
|
||||
|
||||
def main(args=sys.argv[1:]):
|
||||
if len(args) != 1:
|
||||
print('Usage:', sys.argv[0], '<output_directory>', file=sys.stderr)
|
||||
return 1
|
||||
|
||||
output_dir = args[0]
|
||||
if not os.path.isdir(output_dir):
|
||||
if os.path.exists(output_dir):
|
||||
print('ERROR: Invalid output directory:', output_dir, file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
os.makedirs(output_dir)
|
||||
except:
|
||||
print('ERROR: Could not create output directory:', output_dir, file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Output path exists, now just run the template
|
||||
template_file = os.sep.join([sys.path[0], 'templates', 'knobs.template'])
|
||||
output_file = os.sep.join([output_dir, 'gen_knobs.cpp'])
|
||||
output_header = os.sep.join([output_dir, 'gen_knobs.h'])
|
||||
|
||||
for f in [output_header, output_file]:
|
||||
write_template_to_file(template_file, f,
|
||||
filename='gen_knobs',
|
||||
knobs=knob_defs.KNOBS,
|
||||
includes=['core/knobs_init.h', 'common/os.h', 'sstream', 'iomanip'],
|
||||
gen_header=True if f == output_header else False)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
|
@ -0,0 +1,226 @@
|
|||
# Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
# Python source
|
||||
KNOBS = [
|
||||
['ENABLE_ASSERT_DIALOGS', {
|
||||
'type' : 'bool',
|
||||
'default' : 'true',
|
||||
'desc' : ['Use dialogs when asserts fire.',
|
||||
'Asserts are only enabled in debug builds'],
|
||||
}],
|
||||
|
||||
['SINGLE_THREADED', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['If enabled will perform all rendering on the API thread.',
|
||||
'This is useful mainly for debugging purposes.'],
|
||||
}],
|
||||
|
||||
['DUMP_SHADER_IR', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['Dumps shader LLVM IR at various stages of jit compilation.'],
|
||||
}],
|
||||
|
||||
['USE_GENERIC_STORETILE', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['Always use generic function for performing StoreTile.',
|
||||
'Will be slightly slower than using optimized (jitted) path'],
|
||||
}],
|
||||
|
||||
['FAST_CLEAR', {
|
||||
'type' : 'bool',
|
||||
'default' : 'true',
|
||||
'desc' : ['Replace 3D primitive execute with a SWRClearRT operation and',
|
||||
'defer clear execution to first backend op on hottile, or hottile store'],
|
||||
}],
|
||||
|
||||
['MAX_NUMA_NODES', {
|
||||
'type' : 'uint32_t',
|
||||
'default' : '0',
|
||||
'desc' : ['Maximum # of NUMA-nodes per system used for worker threads',
|
||||
' 0 == ALL NUMA-nodes in the system',
|
||||
' N == Use at most N NUMA-nodes for rendering'],
|
||||
}],
|
||||
|
||||
['MAX_CORES_PER_NUMA_NODE', {
|
||||
'type' : 'uint32_t',
|
||||
'default' : '0',
|
||||
'desc' : ['Maximum # of cores per NUMA-node used for worker threads.',
|
||||
' 0 == ALL non-API thread cores per NUMA-node',
|
||||
' N == Use at most N cores per NUMA-node'],
|
||||
}],
|
||||
|
||||
['MAX_THREADS_PER_CORE', {
|
||||
'type' : 'uint32_t',
|
||||
'default' : '1',
|
||||
'desc' : ['Maximum # of (hyper)threads per physical core used for worker threads.',
|
||||
' 0 == ALL hyper-threads per core',
|
||||
' N == Use at most N hyper-threads per physical core'],
|
||||
}],
|
||||
|
||||
['MAX_WORKER_THREADS', {
|
||||
'type' : 'uint32_t',
|
||||
'default' : '0',
|
||||
'desc' : ['Maximum worker threads to spawn.',
|
||||
'',
|
||||
'IMPORTANT: If this is non-zero, no worker threads will be bound to',
|
||||
'specific HW threads. They will all be "floating" SW threads.',
|
||||
'In this case, the above 3 KNOBS will be ignored.'],
|
||||
}],
|
||||
|
||||
['BUCKETS_START_FRAME', {
|
||||
'type' : 'uint32_t',
|
||||
'default' : '1200',
|
||||
'desc' : ['Frame from when to start saving buckets data.',
|
||||
'',
|
||||
'NOTE: KNOB_ENABLE_RDTSC must be enabled in core/knobs.h',
|
||||
'for this to have an effect.'],
|
||||
}],
|
||||
|
||||
['BUCKETS_END_FRAME', {
|
||||
'type' : 'uint32_t',
|
||||
'default' : '1400',
|
||||
'desc' : ['Frame at which to stop saving buckets data.',
|
||||
'',
|
||||
'NOTE: KNOB_ENABLE_RDTSC must be enabled in core/knobs.h',
|
||||
'for this to have an effect.'],
|
||||
}],
|
||||
|
||||
['WORKER_SPIN_LOOP_COUNT', {
|
||||
'type' : 'uint32_t',
|
||||
'default' : '5000',
|
||||
'desc' : ['Number of spin-loop iterations worker threads will perform',
|
||||
'before going to sleep when waiting for work'],
|
||||
}],
|
||||
|
||||
['MAX_DRAWS_IN_FLIGHT', {
|
||||
'type' : 'uint32_t',
|
||||
'default' : '160',
|
||||
'desc' : ['Maximum number of draws outstanding before API thread blocks.'],
|
||||
}],
|
||||
|
||||
['MAX_PRIMS_PER_DRAW', {
|
||||
'type' : 'uint32_t',
|
||||
'default' : '2040',
|
||||
'desc' : ['Maximum primitives in a single Draw().',
|
||||
'Larger primitives are split into smaller Draw calls.',
|
||||
'Should be a multiple of (3 * vectorWidth).'],
|
||||
}],
|
||||
|
||||
['MAX_TESS_PRIMS_PER_DRAW', {
|
||||
'type' : 'uint32_t',
|
||||
'default' : '16',
|
||||
'desc' : ['Maximum primitives in a single Draw() with tessellation enabled.',
|
||||
'Larger primitives are split into smaller Draw calls.',
|
||||
'Should be a multiple of (vectorWidth).'],
|
||||
}],
|
||||
|
||||
['MAX_FRAC_ODD_TESS_FACTOR', {
|
||||
'type' : 'float',
|
||||
'default' : '63.0f',
|
||||
'desc' : ['(DEBUG) Maximum tessellation factor for fractional-odd partitioning.'],
|
||||
}],
|
||||
|
||||
['MAX_FRAC_EVEN_TESS_FACTOR', {
|
||||
'type' : 'float',
|
||||
'default' : '64.0f',
|
||||
'desc' : ['(DEBUG) Maximum tessellation factor for fractional-even partitioning.'],
|
||||
}],
|
||||
|
||||
['MAX_INTEGER_TESS_FACTOR', {
|
||||
'type' : 'uint32_t',
|
||||
'default' : '64',
|
||||
'desc' : ['(DEBUG) Maximum tessellation factor for integer partitioning.'],
|
||||
}],
|
||||
|
||||
|
||||
['BUCKETS_ENABLE_THREADVIZ', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['Enable threadviz output.'],
|
||||
}],
|
||||
|
||||
['TOSS_DRAW', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['Disable per-draw/dispatch execution'],
|
||||
}],
|
||||
|
||||
['TOSS_QUEUE_FE', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['Stop per-draw execution at worker FE',
|
||||
'',
|
||||
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
|
||||
}],
|
||||
|
||||
['TOSS_FETCH', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['Stop per-draw execution at vertex fetch',
|
||||
'',
|
||||
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
|
||||
}],
|
||||
|
||||
['TOSS_IA', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['Stop per-draw execution at input assembler',
|
||||
'',
|
||||
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
|
||||
}],
|
||||
|
||||
['TOSS_VS', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['Stop per-draw execution at vertex shader',
|
||||
'',
|
||||
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
|
||||
}],
|
||||
|
||||
['TOSS_SETUP_TRIS', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['Stop per-draw execution at primitive setup',
|
||||
'',
|
||||
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
|
||||
}],
|
||||
|
||||
['TOSS_BIN_TRIS', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['Stop per-draw execution at primitive binning',
|
||||
'',
|
||||
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
|
||||
}],
|
||||
|
||||
['TOSS_RS', {
|
||||
'type' : 'bool',
|
||||
'default' : 'false',
|
||||
'desc' : ['Stop per-draw execution at rasterizer',
|
||||
'',
|
||||
'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
|
||||
}],
|
||||
|
||||
]
|
|
@ -0,0 +1,8 @@
|
|||
# mako/__init__.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
|
||||
__version__ = '1.0.1'
|
|
@ -0,0 +1,845 @@
|
|||
# mako/_ast_util.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
"""
|
||||
ast
|
||||
~~~
|
||||
|
||||
The `ast` module helps Python applications to process trees of the Python
|
||||
abstract syntax grammar. The abstract syntax itself might change with
|
||||
each Python release; this module helps to find out programmatically what
|
||||
the current grammar looks like and allows modifications of it.
|
||||
|
||||
An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as
|
||||
a flag to the `compile()` builtin function or by using the `parse()`
|
||||
function from this module. The result will be a tree of objects whose
|
||||
classes all inherit from `ast.AST`.
|
||||
|
||||
A modified abstract syntax tree can be compiled into a Python code object
|
||||
using the built-in `compile()` function.
|
||||
|
||||
Additionally various helper functions are provided that make working with
|
||||
the trees simpler. The main intention of the helper functions and this
|
||||
module in general is to provide an easy to use interface for libraries
|
||||
that work tightly with the python syntax (template engines for example).
|
||||
|
||||
|
||||
:copyright: Copyright 2008 by Armin Ronacher.
|
||||
:license: Python License.
|
||||
"""
|
||||
from _ast import *
|
||||
from mako.compat import arg_stringname
|
||||
|
||||
BOOLOP_SYMBOLS = {
|
||||
And: 'and',
|
||||
Or: 'or'
|
||||
}
|
||||
|
||||
BINOP_SYMBOLS = {
|
||||
Add: '+',
|
||||
Sub: '-',
|
||||
Mult: '*',
|
||||
Div: '/',
|
||||
FloorDiv: '//',
|
||||
Mod: '%',
|
||||
LShift: '<<',
|
||||
RShift: '>>',
|
||||
BitOr: '|',
|
||||
BitAnd: '&',
|
||||
BitXor: '^'
|
||||
}
|
||||
|
||||
CMPOP_SYMBOLS = {
|
||||
Eq: '==',
|
||||
Gt: '>',
|
||||
GtE: '>=',
|
||||
In: 'in',
|
||||
Is: 'is',
|
||||
IsNot: 'is not',
|
||||
Lt: '<',
|
||||
LtE: '<=',
|
||||
NotEq: '!=',
|
||||
NotIn: 'not in'
|
||||
}
|
||||
|
||||
UNARYOP_SYMBOLS = {
|
||||
Invert: '~',
|
||||
Not: 'not',
|
||||
UAdd: '+',
|
||||
USub: '-'
|
||||
}
|
||||
|
||||
ALL_SYMBOLS = {}
|
||||
ALL_SYMBOLS.update(BOOLOP_SYMBOLS)
|
||||
ALL_SYMBOLS.update(BINOP_SYMBOLS)
|
||||
ALL_SYMBOLS.update(CMPOP_SYMBOLS)
|
||||
ALL_SYMBOLS.update(UNARYOP_SYMBOLS)
|
||||
|
||||
|
||||
def parse(expr, filename='<unknown>', mode='exec'):
|
||||
"""Parse an expression into an AST node."""
|
||||
return compile(expr, filename, mode, PyCF_ONLY_AST)
|
||||
|
||||
|
||||
def to_source(node, indent_with=' ' * 4):
|
||||
"""
|
||||
This function can convert a node tree back into python sourcecode. This
|
||||
is useful for debugging purposes, especially if you're dealing with custom
|
||||
asts not generated by python itself.
|
||||
|
||||
It could be that the sourcecode is evaluable when the AST itself is not
|
||||
compilable / evaluable. The reason for this is that the AST contains some
|
||||
more data than regular sourcecode does, which is dropped during
|
||||
conversion.
|
||||
|
||||
Each level of indentation is replaced with `indent_with`. Per default this
|
||||
parameter is equal to four spaces as suggested by PEP 8, but it might be
|
||||
adjusted to match the application's styleguide.
|
||||
"""
|
||||
generator = SourceGenerator(indent_with)
|
||||
generator.visit(node)
|
||||
return ''.join(generator.result)
|
||||
|
||||
|
||||
def dump(node):
|
||||
"""
|
||||
A very verbose representation of the node passed. This is useful for
|
||||
debugging purposes.
|
||||
"""
|
||||
def _format(node):
|
||||
if isinstance(node, AST):
|
||||
return '%s(%s)' % (node.__class__.__name__,
|
||||
', '.join('%s=%s' % (a, _format(b))
|
||||
for a, b in iter_fields(node)))
|
||||
elif isinstance(node, list):
|
||||
return '[%s]' % ', '.join(_format(x) for x in node)
|
||||
return repr(node)
|
||||
if not isinstance(node, AST):
|
||||
raise TypeError('expected AST, got %r' % node.__class__.__name__)
|
||||
return _format(node)
|
||||
|
||||
|
||||
def copy_location(new_node, old_node):
|
||||
"""
|
||||
Copy the source location hint (`lineno` and `col_offset`) from the
|
||||
old to the new node if possible and return the new one.
|
||||
"""
|
||||
for attr in 'lineno', 'col_offset':
|
||||
if attr in old_node._attributes and attr in new_node._attributes \
|
||||
and hasattr(old_node, attr):
|
||||
setattr(new_node, attr, getattr(old_node, attr))
|
||||
return new_node
|
||||
|
||||
|
||||
def fix_missing_locations(node):
|
||||
"""
|
||||
Some nodes require a line number and the column offset. Without that
|
||||
information the compiler will abort the compilation. Because it can be
|
||||
a dull task to add appropriate line numbers and column offsets when
|
||||
adding new nodes this function can help. It copies the line number and
|
||||
column offset of the parent node to the child nodes without this
|
||||
information.
|
||||
|
||||
Unlike `copy_location` this works recursive and won't touch nodes that
|
||||
already have a location information.
|
||||
"""
|
||||
def _fix(node, lineno, col_offset):
|
||||
if 'lineno' in node._attributes:
|
||||
if not hasattr(node, 'lineno'):
|
||||
node.lineno = lineno
|
||||
else:
|
||||
lineno = node.lineno
|
||||
if 'col_offset' in node._attributes:
|
||||
if not hasattr(node, 'col_offset'):
|
||||
node.col_offset = col_offset
|
||||
else:
|
||||
col_offset = node.col_offset
|
||||
for child in iter_child_nodes(node):
|
||||
_fix(child, lineno, col_offset)
|
||||
_fix(node, 1, 0)
|
||||
return node
|
||||
|
||||
|
||||
def increment_lineno(node, n=1):
|
||||
"""
|
||||
Increment the line numbers of all nodes by `n` if they have line number
|
||||
attributes. This is useful to "move code" to a different location in a
|
||||
file.
|
||||
"""
|
||||
for node in zip((node,), walk(node)):
|
||||
if 'lineno' in node._attributes:
|
||||
node.lineno = getattr(node, 'lineno', 0) + n
|
||||
|
||||
|
||||
def iter_fields(node):
|
||||
"""Iterate over all fields of a node, only yielding existing fields."""
|
||||
# CPython 2.5 compat
|
||||
if not hasattr(node, '_fields') or not node._fields:
|
||||
return
|
||||
for field in node._fields:
|
||||
try:
|
||||
yield field, getattr(node, field)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
|
||||
def get_fields(node):
|
||||
"""Like `iter_fiels` but returns a dict."""
|
||||
return dict(iter_fields(node))
|
||||
|
||||
|
||||
def iter_child_nodes(node):
|
||||
"""Iterate over all child nodes or a node."""
|
||||
for name, field in iter_fields(node):
|
||||
if isinstance(field, AST):
|
||||
yield field
|
||||
elif isinstance(field, list):
|
||||
for item in field:
|
||||
if isinstance(item, AST):
|
||||
yield item
|
||||
|
||||
|
||||
def get_child_nodes(node):
|
||||
"""Like `iter_child_nodes` but returns a list."""
|
||||
return list(iter_child_nodes(node))
|
||||
|
||||
|
||||
def get_compile_mode(node):
|
||||
"""
|
||||
Get the mode for `compile` of a given node. If the node is not a `mod`
|
||||
node (`Expression`, `Module` etc.) a `TypeError` is thrown.
|
||||
"""
|
||||
if not isinstance(node, mod):
|
||||
raise TypeError('expected mod node, got %r' % node.__class__.__name__)
|
||||
return {
|
||||
Expression: 'eval',
|
||||
Interactive: 'single'
|
||||
}.get(node.__class__, 'expr')
|
||||
|
||||
|
||||
def get_docstring(node):
|
||||
"""
|
||||
Return the docstring for the given node or `None` if no docstring can be
|
||||
found. If the node provided does not accept docstrings a `TypeError`
|
||||
will be raised.
|
||||
"""
|
||||
if not isinstance(node, (FunctionDef, ClassDef, Module)):
|
||||
raise TypeError("%r can't have docstrings" % node.__class__.__name__)
|
||||
if node.body and isinstance(node.body[0], Str):
|
||||
return node.body[0].s
|
||||
|
||||
|
||||
def walk(node):
|
||||
"""
|
||||
Iterate over all nodes. This is useful if you only want to modify nodes in
|
||||
place and don't care about the context or the order the nodes are returned.
|
||||
"""
|
||||
from collections import deque
|
||||
todo = deque([node])
|
||||
while todo:
|
||||
node = todo.popleft()
|
||||
todo.extend(iter_child_nodes(node))
|
||||
yield node
|
||||
|
||||
|
||||
class NodeVisitor(object):
|
||||
"""
|
||||
Walks the abstract syntax tree and call visitor functions for every node
|
||||
found. The visitor functions may return values which will be forwarded
|
||||
by the `visit` method.
|
||||
|
||||
Per default the visitor functions for the nodes are ``'visit_'`` +
|
||||
class name of the node. So a `TryFinally` node visit function would
|
||||
be `visit_TryFinally`. This behavior can be changed by overriding
|
||||
the `get_visitor` function. If no visitor function exists for a node
|
||||
(return value `None`) the `generic_visit` visitor is used instead.
|
||||
|
||||
Don't use the `NodeVisitor` if you want to apply changes to nodes during
|
||||
traversing. For this a special visitor exists (`NodeTransformer`) that
|
||||
allows modifications.
|
||||
"""
|
||||
|
||||
def get_visitor(self, node):
|
||||
"""
|
||||
Return the visitor function for this node or `None` if no visitor
|
||||
exists for this node. In that case the generic visit function is
|
||||
used instead.
|
||||
"""
|
||||
method = 'visit_' + node.__class__.__name__
|
||||
return getattr(self, method, None)
|
||||
|
||||
def visit(self, node):
|
||||
"""Visit a node."""
|
||||
f = self.get_visitor(node)
|
||||
if f is not None:
|
||||
return f(node)
|
||||
return self.generic_visit(node)
|
||||
|
||||
def generic_visit(self, node):
|
||||
"""Called if no explicit visitor function exists for a node."""
|
||||
for field, value in iter_fields(node):
|
||||
if isinstance(value, list):
|
||||
for item in value:
|
||||
if isinstance(item, AST):
|
||||
self.visit(item)
|
||||
elif isinstance(value, AST):
|
||||
self.visit(value)
|
||||
|
||||
|
||||
class NodeTransformer(NodeVisitor):
|
||||
"""
|
||||
Walks the abstract syntax tree and allows modifications of nodes.
|
||||
|
||||
The `NodeTransformer` will walk the AST and use the return value of the
|
||||
visitor functions to replace or remove the old node. If the return
|
||||
value of the visitor function is `None` the node will be removed
|
||||
from the previous location otherwise it's replaced with the return
|
||||
value. The return value may be the original node in which case no
|
||||
replacement takes place.
|
||||
|
||||
Here an example transformer that rewrites all `foo` to `data['foo']`::
|
||||
|
||||
class RewriteName(NodeTransformer):
|
||||
|
||||
def visit_Name(self, node):
|
||||
return copy_location(Subscript(
|
||||
value=Name(id='data', ctx=Load()),
|
||||
slice=Index(value=Str(s=node.id)),
|
||||
ctx=node.ctx
|
||||
), node)
|
||||
|
||||
Keep in mind that if the node you're operating on has child nodes
|
||||
you must either transform the child nodes yourself or call the generic
|
||||
visit function for the node first.
|
||||
|
||||
Nodes that were part of a collection of statements (that applies to
|
||||
all statement nodes) may also return a list of nodes rather than just
|
||||
a single node.
|
||||
|
||||
Usually you use the transformer like this::
|
||||
|
||||
node = YourTransformer().visit(node)
|
||||
"""
|
||||
|
||||
def generic_visit(self, node):
|
||||
for field, old_value in iter_fields(node):
|
||||
old_value = getattr(node, field, None)
|
||||
if isinstance(old_value, list):
|
||||
new_values = []
|
||||
for value in old_value:
|
||||
if isinstance(value, AST):
|
||||
value = self.visit(value)
|
||||
if value is None:
|
||||
continue
|
||||
elif not isinstance(value, AST):
|
||||
new_values.extend(value)
|
||||
continue
|
||||
new_values.append(value)
|
||||
old_value[:] = new_values
|
||||
elif isinstance(old_value, AST):
|
||||
new_node = self.visit(old_value)
|
||||
if new_node is None:
|
||||
delattr(node, field)
|
||||
else:
|
||||
setattr(node, field, new_node)
|
||||
return node
|
||||
|
||||
|
||||
class SourceGenerator(NodeVisitor):
|
||||
"""
|
||||
This visitor is able to transform a well formed syntax tree into python
|
||||
sourcecode. For more details have a look at the docstring of the
|
||||
`node_to_source` function.
|
||||
"""
|
||||
|
||||
def __init__(self, indent_with):
|
||||
self.result = []
|
||||
self.indent_with = indent_with
|
||||
self.indentation = 0
|
||||
self.new_lines = 0
|
||||
|
||||
def write(self, x):
|
||||
if self.new_lines:
|
||||
if self.result:
|
||||
self.result.append('\n' * self.new_lines)
|
||||
self.result.append(self.indent_with * self.indentation)
|
||||
self.new_lines = 0
|
||||
self.result.append(x)
|
||||
|
||||
def newline(self, n=1):
|
||||
self.new_lines = max(self.new_lines, n)
|
||||
|
||||
def body(self, statements):
|
||||
self.new_line = True
|
||||
self.indentation += 1
|
||||
for stmt in statements:
|
||||
self.visit(stmt)
|
||||
self.indentation -= 1
|
||||
|
||||
def body_or_else(self, node):
|
||||
self.body(node.body)
|
||||
if node.orelse:
|
||||
self.newline()
|
||||
self.write('else:')
|
||||
self.body(node.orelse)
|
||||
|
||||
def signature(self, node):
|
||||
want_comma = []
|
||||
def write_comma():
|
||||
if want_comma:
|
||||
self.write(', ')
|
||||
else:
|
||||
want_comma.append(True)
|
||||
|
||||
padding = [None] * (len(node.args) - len(node.defaults))
|
||||
for arg, default in zip(node.args, padding + node.defaults):
|
||||
write_comma()
|
||||
self.visit(arg)
|
||||
if default is not None:
|
||||
self.write('=')
|
||||
self.visit(default)
|
||||
if node.vararg is not None:
|
||||
write_comma()
|
||||
self.write('*' + arg_stringname(node.vararg))
|
||||
if node.kwarg is not None:
|
||||
write_comma()
|
||||
self.write('**' + arg_stringname(node.kwarg))
|
||||
|
||||
def decorators(self, node):
|
||||
for decorator in node.decorator_list:
|
||||
self.newline()
|
||||
self.write('@')
|
||||
self.visit(decorator)
|
||||
|
||||
# Statements
|
||||
|
||||
def visit_Assign(self, node):
|
||||
self.newline()
|
||||
for idx, target in enumerate(node.targets):
|
||||
if idx:
|
||||
self.write(', ')
|
||||
self.visit(target)
|
||||
self.write(' = ')
|
||||
self.visit(node.value)
|
||||
|
||||
def visit_AugAssign(self, node):
|
||||
self.newline()
|
||||
self.visit(node.target)
|
||||
self.write(BINOP_SYMBOLS[type(node.op)] + '=')
|
||||
self.visit(node.value)
|
||||
|
||||
def visit_ImportFrom(self, node):
|
||||
self.newline()
|
||||
self.write('from %s%s import ' % ('.' * node.level, node.module))
|
||||
for idx, item in enumerate(node.names):
|
||||
if idx:
|
||||
self.write(', ')
|
||||
self.write(item)
|
||||
|
||||
def visit_Import(self, node):
|
||||
self.newline()
|
||||
for item in node.names:
|
||||
self.write('import ')
|
||||
self.visit(item)
|
||||
|
||||
def visit_Expr(self, node):
|
||||
self.newline()
|
||||
self.generic_visit(node)
|
||||
|
||||
def visit_FunctionDef(self, node):
|
||||
self.newline(n=2)
|
||||
self.decorators(node)
|
||||
self.newline()
|
||||
self.write('def %s(' % node.name)
|
||||
self.signature(node.args)
|
||||
self.write('):')
|
||||
self.body(node.body)
|
||||
|
||||
def visit_ClassDef(self, node):
|
||||
have_args = []
|
||||
def paren_or_comma():
|
||||
if have_args:
|
||||
self.write(', ')
|
||||
else:
|
||||
have_args.append(True)
|
||||
self.write('(')
|
||||
|
||||
self.newline(n=3)
|
||||
self.decorators(node)
|
||||
self.newline()
|
||||
self.write('class %s' % node.name)
|
||||
for base in node.bases:
|
||||
paren_or_comma()
|
||||
self.visit(base)
|
||||
# XXX: the if here is used to keep this module compatible
|
||||
# with python 2.6.
|
||||
if hasattr(node, 'keywords'):
|
||||
for keyword in node.keywords:
|
||||
paren_or_comma()
|
||||
self.write(keyword.arg + '=')
|
||||
self.visit(keyword.value)
|
||||
if node.starargs is not None:
|
||||
paren_or_comma()
|
||||
self.write('*')
|
||||
self.visit(node.starargs)
|
||||
if node.kwargs is not None:
|
||||
paren_or_comma()
|
||||
self.write('**')
|
||||
self.visit(node.kwargs)
|
||||
self.write(have_args and '):' or ':')
|
||||
self.body(node.body)
|
||||
|
||||
def visit_If(self, node):
|
||||
self.newline()
|
||||
self.write('if ')
|
||||
self.visit(node.test)
|
||||
self.write(':')
|
||||
self.body(node.body)
|
||||
while True:
|
||||
else_ = node.orelse
|
||||
if len(else_) == 1 and isinstance(else_[0], If):
|
||||
node = else_[0]
|
||||
self.newline()
|
||||
self.write('elif ')
|
||||
self.visit(node.test)
|
||||
self.write(':')
|
||||
self.body(node.body)
|
||||
else:
|
||||
self.newline()
|
||||
self.write('else:')
|
||||
self.body(else_)
|
||||
break
|
||||
|
||||
def visit_For(self, node):
|
||||
self.newline()
|
||||
self.write('for ')
|
||||
self.visit(node.target)
|
||||
self.write(' in ')
|
||||
self.visit(node.iter)
|
||||
self.write(':')
|
||||
self.body_or_else(node)
|
||||
|
||||
def visit_While(self, node):
|
||||
self.newline()
|
||||
self.write('while ')
|
||||
self.visit(node.test)
|
||||
self.write(':')
|
||||
self.body_or_else(node)
|
||||
|
||||
def visit_With(self, node):
|
||||
self.newline()
|
||||
self.write('with ')
|
||||
self.visit(node.context_expr)
|
||||
if node.optional_vars is not None:
|
||||
self.write(' as ')
|
||||
self.visit(node.optional_vars)
|
||||
self.write(':')
|
||||
self.body(node.body)
|
||||
|
||||
def visit_Pass(self, node):
|
||||
self.newline()
|
||||
self.write('pass')
|
||||
|
||||
def visit_Print(self, node):
|
||||
# XXX: python 2.6 only
|
||||
self.newline()
|
||||
self.write('print ')
|
||||
want_comma = False
|
||||
if node.dest is not None:
|
||||
self.write(' >> ')
|
||||
self.visit(node.dest)
|
||||
want_comma = True
|
||||
for value in node.values:
|
||||
if want_comma:
|
||||
self.write(', ')
|
||||
self.visit(value)
|
||||
want_comma = True
|
||||
if not node.nl:
|
||||
self.write(',')
|
||||
|
||||
def visit_Delete(self, node):
|
||||
self.newline()
|
||||
self.write('del ')
|
||||
for idx, target in enumerate(node):
|
||||
if idx:
|
||||
self.write(', ')
|
||||
self.visit(target)
|
||||
|
||||
def visit_TryExcept(self, node):
|
||||
self.newline()
|
||||
self.write('try:')
|
||||
self.body(node.body)
|
||||
for handler in node.handlers:
|
||||
self.visit(handler)
|
||||
|
||||
def visit_TryFinally(self, node):
|
||||
self.newline()
|
||||
self.write('try:')
|
||||
self.body(node.body)
|
||||
self.newline()
|
||||
self.write('finally:')
|
||||
self.body(node.finalbody)
|
||||
|
||||
def visit_Global(self, node):
|
||||
self.newline()
|
||||
self.write('global ' + ', '.join(node.names))
|
||||
|
||||
def visit_Nonlocal(self, node):
|
||||
self.newline()
|
||||
self.write('nonlocal ' + ', '.join(node.names))
|
||||
|
||||
def visit_Return(self, node):
|
||||
self.newline()
|
||||
self.write('return ')
|
||||
self.visit(node.value)
|
||||
|
||||
def visit_Break(self, node):
|
||||
self.newline()
|
||||
self.write('break')
|
||||
|
||||
def visit_Continue(self, node):
|
||||
self.newline()
|
||||
self.write('continue')
|
||||
|
||||
def visit_Raise(self, node):
|
||||
# XXX: Python 2.6 / 3.0 compatibility
|
||||
self.newline()
|
||||
self.write('raise')
|
||||
if hasattr(node, 'exc') and node.exc is not None:
|
||||
self.write(' ')
|
||||
self.visit(node.exc)
|
||||
if node.cause is not None:
|
||||
self.write(' from ')
|
||||
self.visit(node.cause)
|
||||
elif hasattr(node, 'type') and node.type is not None:
|
||||
self.visit(node.type)
|
||||
if node.inst is not None:
|
||||
self.write(', ')
|
||||
self.visit(node.inst)
|
||||
if node.tback is not None:
|
||||
self.write(', ')
|
||||
self.visit(node.tback)
|
||||
|
||||
# Expressions
|
||||
|
||||
def visit_Attribute(self, node):
|
||||
self.visit(node.value)
|
||||
self.write('.' + node.attr)
|
||||
|
||||
def visit_Call(self, node):
|
||||
want_comma = []
|
||||
def write_comma():
|
||||
if want_comma:
|
||||
self.write(', ')
|
||||
else:
|
||||
want_comma.append(True)
|
||||
|
||||
self.visit(node.func)
|
||||
self.write('(')
|
||||
for arg in node.args:
|
||||
write_comma()
|
||||
self.visit(arg)
|
||||
for keyword in node.keywords:
|
||||
write_comma()
|
||||
self.write(keyword.arg + '=')
|
||||
self.visit(keyword.value)
|
||||
if node.starargs is not None:
|
||||
write_comma()
|
||||
self.write('*')
|
||||
self.visit(node.starargs)
|
||||
if node.kwargs is not None:
|
||||
write_comma()
|
||||
self.write('**')
|
||||
self.visit(node.kwargs)
|
||||
self.write(')')
|
||||
|
||||
def visit_Name(self, node):
|
||||
self.write(node.id)
|
||||
|
||||
def visit_NameConstant(self, node):
|
||||
self.write(str(node.value))
|
||||
|
||||
def visit_arg(self, node):
|
||||
self.write(node.arg)
|
||||
|
||||
def visit_Str(self, node):
|
||||
self.write(repr(node.s))
|
||||
|
||||
def visit_Bytes(self, node):
|
||||
self.write(repr(node.s))
|
||||
|
||||
def visit_Num(self, node):
|
||||
self.write(repr(node.n))
|
||||
|
||||
def visit_Tuple(self, node):
|
||||
self.write('(')
|
||||
idx = -1
|
||||
for idx, item in enumerate(node.elts):
|
||||
if idx:
|
||||
self.write(', ')
|
||||
self.visit(item)
|
||||
self.write(idx and ')' or ',)')
|
||||
|
||||
def sequence_visit(left, right):
|
||||
def visit(self, node):
|
||||
self.write(left)
|
||||
for idx, item in enumerate(node.elts):
|
||||
if idx:
|
||||
self.write(', ')
|
||||
self.visit(item)
|
||||
self.write(right)
|
||||
return visit
|
||||
|
||||
visit_List = sequence_visit('[', ']')
|
||||
visit_Set = sequence_visit('{', '}')
|
||||
del sequence_visit
|
||||
|
||||
def visit_Dict(self, node):
|
||||
self.write('{')
|
||||
for idx, (key, value) in enumerate(zip(node.keys, node.values)):
|
||||
if idx:
|
||||
self.write(', ')
|
||||
self.visit(key)
|
||||
self.write(': ')
|
||||
self.visit(value)
|
||||
self.write('}')
|
||||
|
||||
def visit_BinOp(self, node):
|
||||
self.write('(')
|
||||
self.visit(node.left)
|
||||
self.write(' %s ' % BINOP_SYMBOLS[type(node.op)])
|
||||
self.visit(node.right)
|
||||
self.write(')')
|
||||
|
||||
def visit_BoolOp(self, node):
|
||||
self.write('(')
|
||||
for idx, value in enumerate(node.values):
|
||||
if idx:
|
||||
self.write(' %s ' % BOOLOP_SYMBOLS[type(node.op)])
|
||||
self.visit(value)
|
||||
self.write(')')
|
||||
|
||||
def visit_Compare(self, node):
|
||||
self.write('(')
|
||||
self.visit(node.left)
|
||||
for op, right in zip(node.ops, node.comparators):
|
||||
self.write(' %s ' % CMPOP_SYMBOLS[type(op)])
|
||||
self.visit(right)
|
||||
self.write(')')
|
||||
|
||||
def visit_UnaryOp(self, node):
|
||||
self.write('(')
|
||||
op = UNARYOP_SYMBOLS[type(node.op)]
|
||||
self.write(op)
|
||||
if op == 'not':
|
||||
self.write(' ')
|
||||
self.visit(node.operand)
|
||||
self.write(')')
|
||||
|
||||
def visit_Subscript(self, node):
|
||||
self.visit(node.value)
|
||||
self.write('[')
|
||||
self.visit(node.slice)
|
||||
self.write(']')
|
||||
|
||||
def visit_Slice(self, node):
|
||||
if node.lower is not None:
|
||||
self.visit(node.lower)
|
||||
self.write(':')
|
||||
if node.upper is not None:
|
||||
self.visit(node.upper)
|
||||
if node.step is not None:
|
||||
self.write(':')
|
||||
if not (isinstance(node.step, Name) and node.step.id == 'None'):
|
||||
self.visit(node.step)
|
||||
|
||||
def visit_ExtSlice(self, node):
|
||||
for idx, item in node.dims:
|
||||
if idx:
|
||||
self.write(', ')
|
||||
self.visit(item)
|
||||
|
||||
def visit_Yield(self, node):
|
||||
self.write('yield ')
|
||||
self.visit(node.value)
|
||||
|
||||
def visit_Lambda(self, node):
|
||||
self.write('lambda ')
|
||||
self.signature(node.args)
|
||||
self.write(': ')
|
||||
self.visit(node.body)
|
||||
|
||||
def visit_Ellipsis(self, node):
|
||||
self.write('Ellipsis')
|
||||
|
||||
def generator_visit(left, right):
|
||||
def visit(self, node):
|
||||
self.write(left)
|
||||
self.visit(node.elt)
|
||||
for comprehension in node.generators:
|
||||
self.visit(comprehension)
|
||||
self.write(right)
|
||||
return visit
|
||||
|
||||
visit_ListComp = generator_visit('[', ']')
|
||||
visit_GeneratorExp = generator_visit('(', ')')
|
||||
visit_SetComp = generator_visit('{', '}')
|
||||
del generator_visit
|
||||
|
||||
def visit_DictComp(self, node):
|
||||
self.write('{')
|
||||
self.visit(node.key)
|
||||
self.write(': ')
|
||||
self.visit(node.value)
|
||||
for comprehension in node.generators:
|
||||
self.visit(comprehension)
|
||||
self.write('}')
|
||||
|
||||
def visit_IfExp(self, node):
|
||||
self.visit(node.body)
|
||||
self.write(' if ')
|
||||
self.visit(node.test)
|
||||
self.write(' else ')
|
||||
self.visit(node.orelse)
|
||||
|
||||
def visit_Starred(self, node):
|
||||
self.write('*')
|
||||
self.visit(node.value)
|
||||
|
||||
def visit_Repr(self, node):
|
||||
# XXX: python 2.6 only
|
||||
self.write('`')
|
||||
self.visit(node.value)
|
||||
self.write('`')
|
||||
|
||||
# Helper Nodes
|
||||
|
||||
def visit_alias(self, node):
|
||||
self.write(node.name)
|
||||
if node.asname is not None:
|
||||
self.write(' as ' + node.asname)
|
||||
|
||||
def visit_comprehension(self, node):
|
||||
self.write(' for ')
|
||||
self.visit(node.target)
|
||||
self.write(' in ')
|
||||
self.visit(node.iter)
|
||||
if node.ifs:
|
||||
for if_ in node.ifs:
|
||||
self.write(' if ')
|
||||
self.visit(if_)
|
||||
|
||||
def visit_excepthandler(self, node):
|
||||
self.newline()
|
||||
self.write('except')
|
||||
if node.type is not None:
|
||||
self.write(' ')
|
||||
self.visit(node.type)
|
||||
if node.name is not None:
|
||||
self.write(' as ')
|
||||
self.visit(node.name)
|
||||
self.write(':')
|
||||
self.body(node.body)
|
|
@ -0,0 +1,178 @@
|
|||
# mako/ast.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
"""utilities for analyzing expressions and blocks of Python
|
||||
code, as well as generating Python from AST nodes"""
|
||||
|
||||
from mako import exceptions, pyparser, compat
|
||||
import re
|
||||
|
||||
class PythonCode(object):
|
||||
"""represents information about a string containing Python code"""
|
||||
def __init__(self, code, **exception_kwargs):
|
||||
self.code = code
|
||||
|
||||
# represents all identifiers which are assigned to at some point in
|
||||
# the code
|
||||
self.declared_identifiers = set()
|
||||
|
||||
# represents all identifiers which are referenced before their
|
||||
# assignment, if any
|
||||
self.undeclared_identifiers = set()
|
||||
|
||||
# note that an identifier can be in both the undeclared and declared
|
||||
# lists.
|
||||
|
||||
# using AST to parse instead of using code.co_varnames,
|
||||
# code.co_names has several advantages:
|
||||
# - we can locate an identifier as "undeclared" even if
|
||||
# its declared later in the same block of code
|
||||
# - AST is less likely to break with version changes
|
||||
# (for example, the behavior of co_names changed a little bit
|
||||
# in python version 2.5)
|
||||
if isinstance(code, compat.string_types):
|
||||
expr = pyparser.parse(code.lstrip(), "exec", **exception_kwargs)
|
||||
else:
|
||||
expr = code
|
||||
|
||||
f = pyparser.FindIdentifiers(self, **exception_kwargs)
|
||||
f.visit(expr)
|
||||
|
||||
class ArgumentList(object):
|
||||
"""parses a fragment of code as a comma-separated list of expressions"""
|
||||
def __init__(self, code, **exception_kwargs):
|
||||
self.codeargs = []
|
||||
self.args = []
|
||||
self.declared_identifiers = set()
|
||||
self.undeclared_identifiers = set()
|
||||
if isinstance(code, compat.string_types):
|
||||
if re.match(r"\S", code) and not re.match(r",\s*$", code):
|
||||
# if theres text and no trailing comma, insure its parsed
|
||||
# as a tuple by adding a trailing comma
|
||||
code += ","
|
||||
expr = pyparser.parse(code, "exec", **exception_kwargs)
|
||||
else:
|
||||
expr = code
|
||||
|
||||
f = pyparser.FindTuple(self, PythonCode, **exception_kwargs)
|
||||
f.visit(expr)
|
||||
|
||||
class PythonFragment(PythonCode):
|
||||
"""extends PythonCode to provide identifier lookups in partial control
|
||||
statements
|
||||
|
||||
e.g.
|
||||
for x in 5:
|
||||
elif y==9:
|
||||
except (MyException, e):
|
||||
etc.
|
||||
"""
|
||||
def __init__(self, code, **exception_kwargs):
|
||||
m = re.match(r'^(\w+)(?:\s+(.*?))?:\s*(#|$)', code.strip(), re.S)
|
||||
if not m:
|
||||
raise exceptions.CompileException(
|
||||
"Fragment '%s' is not a partial control statement" %
|
||||
code, **exception_kwargs)
|
||||
if m.group(3):
|
||||
code = code[:m.start(3)]
|
||||
(keyword, expr) = m.group(1,2)
|
||||
if keyword in ['for','if', 'while']:
|
||||
code = code + "pass"
|
||||
elif keyword == 'try':
|
||||
code = code + "pass\nexcept:pass"
|
||||
elif keyword == 'elif' or keyword == 'else':
|
||||
code = "if False:pass\n" + code + "pass"
|
||||
elif keyword == 'except':
|
||||
code = "try:pass\n" + code + "pass"
|
||||
elif keyword == 'with':
|
||||
code = code + "pass"
|
||||
else:
|
||||
raise exceptions.CompileException(
|
||||
"Unsupported control keyword: '%s'" %
|
||||
keyword, **exception_kwargs)
|
||||
super(PythonFragment, self).__init__(code, **exception_kwargs)
|
||||
|
||||
|
||||
class FunctionDecl(object):
|
||||
"""function declaration"""
|
||||
def __init__(self, code, allow_kwargs=True, **exception_kwargs):
|
||||
self.code = code
|
||||
expr = pyparser.parse(code, "exec", **exception_kwargs)
|
||||
|
||||
f = pyparser.ParseFunc(self, **exception_kwargs)
|
||||
f.visit(expr)
|
||||
if not hasattr(self, 'funcname'):
|
||||
raise exceptions.CompileException(
|
||||
"Code '%s' is not a function declaration" % code,
|
||||
**exception_kwargs)
|
||||
if not allow_kwargs and self.kwargs:
|
||||
raise exceptions.CompileException(
|
||||
"'**%s' keyword argument not allowed here" %
|
||||
self.kwargnames[-1], **exception_kwargs)
|
||||
|
||||
def get_argument_expressions(self, as_call=False):
|
||||
"""Return the argument declarations of this FunctionDecl as a printable
|
||||
list.
|
||||
|
||||
By default the return value is appropriate for writing in a ``def``;
|
||||
set `as_call` to true to build arguments to be passed to the function
|
||||
instead (assuming locals with the same names as the arguments exist).
|
||||
"""
|
||||
|
||||
namedecls = []
|
||||
|
||||
# Build in reverse order, since defaults and slurpy args come last
|
||||
argnames = self.argnames[::-1]
|
||||
kwargnames = self.kwargnames[::-1]
|
||||
defaults = self.defaults[::-1]
|
||||
kwdefaults = self.kwdefaults[::-1]
|
||||
|
||||
# Named arguments
|
||||
if self.kwargs:
|
||||
namedecls.append("**" + kwargnames.pop(0))
|
||||
|
||||
for name in kwargnames:
|
||||
# Keyword-only arguments must always be used by name, so even if
|
||||
# this is a call, print out `foo=foo`
|
||||
if as_call:
|
||||
namedecls.append("%s=%s" % (name, name))
|
||||
elif kwdefaults:
|
||||
default = kwdefaults.pop(0)
|
||||
if default is None:
|
||||
# The AST always gives kwargs a default, since you can do
|
||||
# `def foo(*, a=1, b, c=3)`
|
||||
namedecls.append(name)
|
||||
else:
|
||||
namedecls.append("%s=%s" % (
|
||||
name, pyparser.ExpressionGenerator(default).value()))
|
||||
else:
|
||||
namedecls.append(name)
|
||||
|
||||
# Positional arguments
|
||||
if self.varargs:
|
||||
namedecls.append("*" + argnames.pop(0))
|
||||
|
||||
for name in argnames:
|
||||
if as_call or not defaults:
|
||||
namedecls.append(name)
|
||||
else:
|
||||
default = defaults.pop(0)
|
||||
namedecls.append("%s=%s" % (
|
||||
name, pyparser.ExpressionGenerator(default).value()))
|
||||
|
||||
namedecls.reverse()
|
||||
return namedecls
|
||||
|
||||
@property
|
||||
def allargnames(self):
|
||||
return tuple(self.argnames) + tuple(self.kwargnames)
|
||||
|
||||
class FunctionArgs(FunctionDecl):
|
||||
"""the argument portion of a function declaration"""
|
||||
|
||||
def __init__(self, code, **kwargs):
|
||||
super(FunctionArgs, self).__init__("def ANON(%s):pass" % code,
|
||||
**kwargs)
|
|
@ -0,0 +1,238 @@
|
|||
# mako/cache.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
from mako import compat, util
|
||||
|
||||
_cache_plugins = util.PluginLoader("mako.cache")
|
||||
|
||||
register_plugin = _cache_plugins.register
|
||||
register_plugin("beaker", "mako.ext.beaker_cache", "BeakerCacheImpl")
|
||||
|
||||
|
||||
class Cache(object):
|
||||
"""Represents a data content cache made available to the module
|
||||
space of a specific :class:`.Template` object.
|
||||
|
||||
.. versionadded:: 0.6
|
||||
:class:`.Cache` by itself is mostly a
|
||||
container for a :class:`.CacheImpl` object, which implements
|
||||
a fixed API to provide caching services; specific subclasses exist to
|
||||
implement different
|
||||
caching strategies. Mako includes a backend that works with
|
||||
the Beaker caching system. Beaker itself then supports
|
||||
a number of backends (i.e. file, memory, memcached, etc.)
|
||||
|
||||
The construction of a :class:`.Cache` is part of the mechanics
|
||||
of a :class:`.Template`, and programmatic access to this
|
||||
cache is typically via the :attr:`.Template.cache` attribute.
|
||||
|
||||
"""
|
||||
|
||||
impl = None
|
||||
"""Provide the :class:`.CacheImpl` in use by this :class:`.Cache`.
|
||||
|
||||
This accessor allows a :class:`.CacheImpl` with additional
|
||||
methods beyond that of :class:`.Cache` to be used programmatically.
|
||||
|
||||
"""
|
||||
|
||||
id = None
|
||||
"""Return the 'id' that identifies this cache.
|
||||
|
||||
This is a value that should be globally unique to the
|
||||
:class:`.Template` associated with this cache, and can
|
||||
be used by a caching system to name a local container
|
||||
for data specific to this template.
|
||||
|
||||
"""
|
||||
|
||||
starttime = None
|
||||
"""Epochal time value for when the owning :class:`.Template` was
|
||||
first compiled.
|
||||
|
||||
A cache implementation may wish to invalidate data earlier than
|
||||
this timestamp; this has the effect of the cache for a specific
|
||||
:class:`.Template` starting clean any time the :class:`.Template`
|
||||
is recompiled, such as when the original template file changed on
|
||||
the filesystem.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, template, *args):
|
||||
# check for a stale template calling the
|
||||
# constructor
|
||||
if isinstance(template, compat.string_types) and args:
|
||||
return
|
||||
self.template = template
|
||||
self.id = template.module.__name__
|
||||
self.starttime = template.module._modified_time
|
||||
self._def_regions = {}
|
||||
self.impl = self._load_impl(self.template.cache_impl)
|
||||
|
||||
def _load_impl(self, name):
|
||||
return _cache_plugins.load(name)(self)
|
||||
|
||||
def get_or_create(self, key, creation_function, **kw):
|
||||
"""Retrieve a value from the cache, using the given creation function
|
||||
to generate a new value."""
|
||||
|
||||
return self._ctx_get_or_create(key, creation_function, None, **kw)
|
||||
|
||||
def _ctx_get_or_create(self, key, creation_function, context, **kw):
|
||||
"""Retrieve a value from the cache, using the given creation function
|
||||
to generate a new value."""
|
||||
|
||||
if not self.template.cache_enabled:
|
||||
return creation_function()
|
||||
|
||||
return self.impl.get_or_create(
|
||||
key,
|
||||
creation_function,
|
||||
**self._get_cache_kw(kw, context))
|
||||
|
||||
def set(self, key, value, **kw):
|
||||
"""Place a value in the cache.
|
||||
|
||||
:param key: the value's key.
|
||||
:param value: the value.
|
||||
:param \**kw: cache configuration arguments.
|
||||
|
||||
"""
|
||||
|
||||
self.impl.set(key, value, **self._get_cache_kw(kw, None))
|
||||
|
||||
put = set
|
||||
"""A synonym for :meth:`.Cache.set`.
|
||||
|
||||
This is here for backwards compatibility.
|
||||
|
||||
"""
|
||||
|
||||
def get(self, key, **kw):
|
||||
"""Retrieve a value from the cache.
|
||||
|
||||
:param key: the value's key.
|
||||
:param \**kw: cache configuration arguments. The
|
||||
backend is configured using these arguments upon first request.
|
||||
Subsequent requests that use the same series of configuration
|
||||
values will use that same backend.
|
||||
|
||||
"""
|
||||
return self.impl.get(key, **self._get_cache_kw(kw, None))
|
||||
|
||||
def invalidate(self, key, **kw):
|
||||
"""Invalidate a value in the cache.
|
||||
|
||||
:param key: the value's key.
|
||||
:param \**kw: cache configuration arguments. The
|
||||
backend is configured using these arguments upon first request.
|
||||
Subsequent requests that use the same series of configuration
|
||||
values will use that same backend.
|
||||
|
||||
"""
|
||||
self.impl.invalidate(key, **self._get_cache_kw(kw, None))
|
||||
|
||||
def invalidate_body(self):
|
||||
"""Invalidate the cached content of the "body" method for this
|
||||
template.
|
||||
|
||||
"""
|
||||
self.invalidate('render_body', __M_defname='render_body')
|
||||
|
||||
def invalidate_def(self, name):
|
||||
"""Invalidate the cached content of a particular ``<%def>`` within this
|
||||
template.
|
||||
|
||||
"""
|
||||
|
||||
self.invalidate('render_%s' % name, __M_defname='render_%s' % name)
|
||||
|
||||
def invalidate_closure(self, name):
|
||||
"""Invalidate a nested ``<%def>`` within this template.
|
||||
|
||||
Caching of nested defs is a blunt tool as there is no
|
||||
management of scope -- nested defs that use cache tags
|
||||
need to have names unique of all other nested defs in the
|
||||
template, else their content will be overwritten by
|
||||
each other.
|
||||
|
||||
"""
|
||||
|
||||
self.invalidate(name, __M_defname=name)
|
||||
|
||||
def _get_cache_kw(self, kw, context):
|
||||
defname = kw.pop('__M_defname', None)
|
||||
if not defname:
|
||||
tmpl_kw = self.template.cache_args.copy()
|
||||
tmpl_kw.update(kw)
|
||||
elif defname in self._def_regions:
|
||||
tmpl_kw = self._def_regions[defname]
|
||||
else:
|
||||
tmpl_kw = self.template.cache_args.copy()
|
||||
tmpl_kw.update(kw)
|
||||
self._def_regions[defname] = tmpl_kw
|
||||
if context and self.impl.pass_context:
|
||||
tmpl_kw = tmpl_kw.copy()
|
||||
tmpl_kw.setdefault('context', context)
|
||||
return tmpl_kw
|
||||
|
||||
|
||||
class CacheImpl(object):
|
||||
"""Provide a cache implementation for use by :class:`.Cache`."""
|
||||
|
||||
def __init__(self, cache):
|
||||
self.cache = cache
|
||||
|
||||
pass_context = False
|
||||
"""If ``True``, the :class:`.Context` will be passed to
|
||||
:meth:`get_or_create <.CacheImpl.get_or_create>` as the name ``'context'``.
|
||||
"""
|
||||
|
||||
def get_or_create(self, key, creation_function, **kw):
|
||||
"""Retrieve a value from the cache, using the given creation function
|
||||
to generate a new value.
|
||||
|
||||
This function *must* return a value, either from
|
||||
the cache, or via the given creation function.
|
||||
If the creation function is called, the newly
|
||||
created value should be populated into the cache
|
||||
under the given key before being returned.
|
||||
|
||||
:param key: the value's key.
|
||||
:param creation_function: function that when called generates
|
||||
a new value.
|
||||
:param \**kw: cache configuration arguments.
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def set(self, key, value, **kw):
|
||||
"""Place a value in the cache.
|
||||
|
||||
:param key: the value's key.
|
||||
:param value: the value.
|
||||
:param \**kw: cache configuration arguments.
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get(self, key, **kw):
|
||||
"""Retrieve a value from the cache.
|
||||
|
||||
:param key: the value's key.
|
||||
:param \**kw: cache configuration arguments.
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def invalidate(self, key, **kw):
|
||||
"""Invalidate a value in the cache.
|
||||
|
||||
:param key: the value's key.
|
||||
:param \**kw: cache configuration arguments.
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
|
@ -0,0 +1,62 @@
|
|||
# mako/cmd.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
from argparse import ArgumentParser
|
||||
from os.path import isfile, dirname
|
||||
import sys
|
||||
from mako.template import Template
|
||||
from mako.lookup import TemplateLookup
|
||||
from mako import exceptions
|
||||
|
||||
def varsplit(var):
|
||||
if "=" not in var:
|
||||
return (var, "")
|
||||
return var.split("=", 1)
|
||||
|
||||
def _exit():
|
||||
sys.stderr.write(exceptions.text_error_template().render())
|
||||
sys.exit(1)
|
||||
|
||||
def cmdline(argv=None):
|
||||
|
||||
parser = ArgumentParser("usage: %prog [FILENAME]")
|
||||
parser.add_argument("--var", default=[], action="append",
|
||||
help="variable (can be used multiple times, use name=value)")
|
||||
parser.add_argument("--template-dir", default=[], action="append",
|
||||
help="Directory to use for template lookup (multiple "
|
||||
"directories may be provided). If not given then if the "
|
||||
"template is read from stdin, the value defaults to be "
|
||||
"the current directory, otherwise it defaults to be the "
|
||||
"parent directory of the file provided.")
|
||||
parser.add_argument('input', nargs='?', default='-')
|
||||
|
||||
options = parser.parse_args(argv)
|
||||
if options.input == '-':
|
||||
lookup_dirs = options.template_dir or ["."]
|
||||
lookup = TemplateLookup(lookup_dirs)
|
||||
try:
|
||||
template = Template(sys.stdin.read(), lookup=lookup)
|
||||
except:
|
||||
_exit()
|
||||
else:
|
||||
filename = options.input
|
||||
if not isfile(filename):
|
||||
raise SystemExit("error: can't find %s" % filename)
|
||||
lookup_dirs = options.template_dir or [dirname(filename)]
|
||||
lookup = TemplateLookup(lookup_dirs)
|
||||
try:
|
||||
template = Template(filename=filename, lookup=lookup)
|
||||
except:
|
||||
_exit()
|
||||
|
||||
kw = dict([varsplit(var) for var in options.var])
|
||||
try:
|
||||
print(template.render(**kw))
|
||||
except:
|
||||
_exit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cmdline()
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,174 @@
|
|||
import sys
|
||||
import time
|
||||
|
||||
py3k = sys.version_info >= (3, 0)
|
||||
py33 = sys.version_info >= (3, 3)
|
||||
py2k = sys.version_info < (3,)
|
||||
py26 = sys.version_info >= (2, 6)
|
||||
jython = sys.platform.startswith('java')
|
||||
win32 = sys.platform.startswith('win')
|
||||
pypy = hasattr(sys, 'pypy_version_info')
|
||||
|
||||
if py3k:
|
||||
from io import StringIO
|
||||
import builtins as compat_builtins
|
||||
from urllib.parse import quote_plus, unquote_plus
|
||||
from html.entities import codepoint2name, name2codepoint
|
||||
string_types = str,
|
||||
binary_type = bytes
|
||||
text_type = str
|
||||
|
||||
from io import BytesIO as byte_buffer
|
||||
|
||||
def u(s):
|
||||
return s
|
||||
|
||||
def b(s):
|
||||
return s.encode("latin-1")
|
||||
|
||||
def octal(lit):
|
||||
return eval("0o" + lit)
|
||||
|
||||
else:
|
||||
import __builtin__ as compat_builtins
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except:
|
||||
from StringIO import StringIO
|
||||
|
||||
byte_buffer = StringIO
|
||||
|
||||
from urllib import quote_plus, unquote_plus
|
||||
from htmlentitydefs import codepoint2name, name2codepoint
|
||||
string_types = basestring,
|
||||
binary_type = str
|
||||
text_type = unicode
|
||||
|
||||
def u(s):
|
||||
return unicode(s, "utf-8")
|
||||
|
||||
def b(s):
|
||||
return s
|
||||
|
||||
def octal(lit):
|
||||
return eval("0" + lit)
|
||||
|
||||
|
||||
if py33:
|
||||
from importlib import machinery
|
||||
def load_module(module_id, path):
|
||||
return machinery.SourceFileLoader(module_id, path).load_module()
|
||||
else:
|
||||
import imp
|
||||
def load_module(module_id, path):
|
||||
fp = open(path, 'rb')
|
||||
try:
|
||||
return imp.load_source(module_id, path, fp)
|
||||
finally:
|
||||
fp.close()
|
||||
|
||||
|
||||
if py3k:
|
||||
def reraise(tp, value, tb=None, cause=None):
|
||||
if cause is not None:
|
||||
value.__cause__ = cause
|
||||
if value.__traceback__ is not tb:
|
||||
raise value.with_traceback(tb)
|
||||
raise value
|
||||
else:
|
||||
exec("def reraise(tp, value, tb=None, cause=None):\n"
|
||||
" raise tp, value, tb\n")
|
||||
|
||||
|
||||
def exception_as():
|
||||
return sys.exc_info()[1]
|
||||
|
||||
try:
|
||||
import threading
|
||||
if py3k:
|
||||
import _thread as thread
|
||||
else:
|
||||
import thread
|
||||
except ImportError:
|
||||
import dummy_threading as threading
|
||||
if py3k:
|
||||
import _dummy_thread as thread
|
||||
else:
|
||||
import dummy_thread as thread
|
||||
|
||||
if win32 or jython:
|
||||
time_func = time.clock
|
||||
else:
|
||||
time_func = time.time
|
||||
|
||||
try:
|
||||
from functools import partial
|
||||
except:
|
||||
def partial(func, *args, **keywords):
|
||||
def newfunc(*fargs, **fkeywords):
|
||||
newkeywords = keywords.copy()
|
||||
newkeywords.update(fkeywords)
|
||||
return func(*(args + fargs), **newkeywords)
|
||||
return newfunc
|
||||
|
||||
|
||||
all = all
|
||||
import json
|
||||
|
||||
def exception_name(exc):
|
||||
return exc.__class__.__name__
|
||||
|
||||
try:
|
||||
from inspect import CO_VARKEYWORDS, CO_VARARGS
|
||||
def inspect_func_args(fn):
|
||||
if py3k:
|
||||
co = fn.__code__
|
||||
else:
|
||||
co = fn.func_code
|
||||
|
||||
nargs = co.co_argcount
|
||||
names = co.co_varnames
|
||||
args = list(names[:nargs])
|
||||
|
||||
varargs = None
|
||||
if co.co_flags & CO_VARARGS:
|
||||
varargs = co.co_varnames[nargs]
|
||||
nargs = nargs + 1
|
||||
varkw = None
|
||||
if co.co_flags & CO_VARKEYWORDS:
|
||||
varkw = co.co_varnames[nargs]
|
||||
|
||||
if py3k:
|
||||
return args, varargs, varkw, fn.__defaults__
|
||||
else:
|
||||
return args, varargs, varkw, fn.func_defaults
|
||||
except ImportError:
|
||||
import inspect
|
||||
def inspect_func_args(fn):
|
||||
return inspect.getargspec(fn)
|
||||
|
||||
if py3k:
|
||||
def callable(fn):
|
||||
return hasattr(fn, '__call__')
|
||||
else:
|
||||
callable = callable
|
||||
|
||||
|
||||
################################################
|
||||
# cross-compatible metaclass implementation
|
||||
# Copyright (c) 2010-2012 Benjamin Peterson
|
||||
def with_metaclass(meta, base=object):
|
||||
"""Create a base class with a metaclass."""
|
||||
return meta("%sBase" % meta.__name__, (base,), {})
|
||||
################################################
|
||||
|
||||
|
||||
def arg_stringname(func_arg):
|
||||
"""Gets the string name of a kwarg or vararg
|
||||
In Python3.4 a function's args are
|
||||
of _ast.arg type not _ast.name
|
||||
"""
|
||||
if hasattr(func_arg, 'arg'):
|
||||
return func_arg.arg
|
||||
else:
|
||||
return str(func_arg)
|
|
@ -0,0 +1,373 @@
|
|||
# mako/exceptions.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
"""exception classes"""
|
||||
|
||||
import traceback
|
||||
import sys
|
||||
from mako import util, compat
|
||||
|
||||
class MakoException(Exception):
|
||||
pass
|
||||
|
||||
class RuntimeException(MakoException):
|
||||
pass
|
||||
|
||||
def _format_filepos(lineno, pos, filename):
|
||||
if filename is None:
|
||||
return " at line: %d char: %d" % (lineno, pos)
|
||||
else:
|
||||
return " in file '%s' at line: %d char: %d" % (filename, lineno, pos)
|
||||
|
||||
|
||||
class CompileException(MakoException):
|
||||
def __init__(self, message, source, lineno, pos, filename):
|
||||
MakoException.__init__(self,
|
||||
message + _format_filepos(lineno, pos, filename))
|
||||
self.lineno = lineno
|
||||
self.pos = pos
|
||||
self.filename = filename
|
||||
self.source = source
|
||||
|
||||
class SyntaxException(MakoException):
|
||||
def __init__(self, message, source, lineno, pos, filename):
|
||||
MakoException.__init__(self,
|
||||
message + _format_filepos(lineno, pos, filename))
|
||||
self.lineno = lineno
|
||||
self.pos = pos
|
||||
self.filename = filename
|
||||
self.source = source
|
||||
|
||||
class UnsupportedError(MakoException):
|
||||
"""raised when a retired feature is used."""
|
||||
|
||||
class NameConflictError(MakoException):
|
||||
"""raised when a reserved word is used inappropriately"""
|
||||
|
||||
class TemplateLookupException(MakoException):
|
||||
pass
|
||||
|
||||
class TopLevelLookupException(TemplateLookupException):
|
||||
pass
|
||||
|
||||
class RichTraceback(object):
|
||||
"""Pull the current exception from the ``sys`` traceback and extracts
|
||||
Mako-specific template information.
|
||||
|
||||
See the usage examples in :ref:`handling_exceptions`.
|
||||
|
||||
"""
|
||||
def __init__(self, error=None, traceback=None):
|
||||
self.source, self.lineno = "", 0
|
||||
|
||||
if error is None or traceback is None:
|
||||
t, value, tback = sys.exc_info()
|
||||
|
||||
if error is None:
|
||||
error = value or t
|
||||
|
||||
if traceback is None:
|
||||
traceback = tback
|
||||
|
||||
self.error = error
|
||||
self.records = self._init(traceback)
|
||||
|
||||
if isinstance(self.error, (CompileException, SyntaxException)):
|
||||
self.source = self.error.source
|
||||
self.lineno = self.error.lineno
|
||||
self._has_source = True
|
||||
|
||||
self._init_message()
|
||||
|
||||
@property
|
||||
def errorname(self):
|
||||
return compat.exception_name(self.error)
|
||||
|
||||
def _init_message(self):
|
||||
"""Find a unicode representation of self.error"""
|
||||
try:
|
||||
self.message = compat.text_type(self.error)
|
||||
except UnicodeError:
|
||||
try:
|
||||
self.message = str(self.error)
|
||||
except UnicodeEncodeError:
|
||||
# Fallback to args as neither unicode nor
|
||||
# str(Exception(u'\xe6')) work in Python < 2.6
|
||||
self.message = self.error.args[0]
|
||||
if not isinstance(self.message, compat.text_type):
|
||||
self.message = compat.text_type(self.message, 'ascii', 'replace')
|
||||
|
||||
def _get_reformatted_records(self, records):
|
||||
for rec in records:
|
||||
if rec[6] is not None:
|
||||
yield (rec[4], rec[5], rec[2], rec[6])
|
||||
else:
|
||||
yield tuple(rec[0:4])
|
||||
|
||||
@property
|
||||
def traceback(self):
|
||||
"""Return a list of 4-tuple traceback records (i.e. normal python
|
||||
format) with template-corresponding lines remapped to the originating
|
||||
template.
|
||||
|
||||
"""
|
||||
return list(self._get_reformatted_records(self.records))
|
||||
|
||||
@property
|
||||
def reverse_records(self):
|
||||
return reversed(self.records)
|
||||
|
||||
@property
|
||||
def reverse_traceback(self):
|
||||
"""Return the same data as traceback, except in reverse order.
|
||||
"""
|
||||
|
||||
return list(self._get_reformatted_records(self.reverse_records))
|
||||
|
||||
def _init(self, trcback):
|
||||
"""format a traceback from sys.exc_info() into 7-item tuples,
|
||||
containing the regular four traceback tuple items, plus the original
|
||||
template filename, the line number adjusted relative to the template
|
||||
source, and code line from that line number of the template."""
|
||||
|
||||
import mako.template
|
||||
mods = {}
|
||||
rawrecords = traceback.extract_tb(trcback)
|
||||
new_trcback = []
|
||||
for filename, lineno, function, line in rawrecords:
|
||||
if not line:
|
||||
line = ''
|
||||
try:
|
||||
(line_map, template_lines) = mods[filename]
|
||||
except KeyError:
|
||||
try:
|
||||
info = mako.template._get_module_info(filename)
|
||||
module_source = info.code
|
||||
template_source = info.source
|
||||
template_filename = info.template_filename or filename
|
||||
except KeyError:
|
||||
# A normal .py file (not a Template)
|
||||
if not compat.py3k:
|
||||
try:
|
||||
fp = open(filename, 'rb')
|
||||
encoding = util.parse_encoding(fp)
|
||||
fp.close()
|
||||
except IOError:
|
||||
encoding = None
|
||||
if encoding:
|
||||
line = line.decode(encoding)
|
||||
else:
|
||||
line = line.decode('ascii', 'replace')
|
||||
new_trcback.append((filename, lineno, function, line,
|
||||
None, None, None, None))
|
||||
continue
|
||||
|
||||
template_ln = 1
|
||||
|
||||
source_map = mako.template.ModuleInfo.\
|
||||
get_module_source_metadata(
|
||||
module_source, full_line_map=True)
|
||||
line_map = source_map['full_line_map']
|
||||
|
||||
template_lines = [line for line in
|
||||
template_source.split("\n")]
|
||||
mods[filename] = (line_map, template_lines)
|
||||
|
||||
template_ln = line_map[lineno - 1]
|
||||
|
||||
if template_ln <= len(template_lines):
|
||||
template_line = template_lines[template_ln - 1]
|
||||
else:
|
||||
template_line = None
|
||||
new_trcback.append((filename, lineno, function,
|
||||
line, template_filename, template_ln,
|
||||
template_line, template_source))
|
||||
if not self.source:
|
||||
for l in range(len(new_trcback) - 1, 0, -1):
|
||||
if new_trcback[l][5]:
|
||||
self.source = new_trcback[l][7]
|
||||
self.lineno = new_trcback[l][5]
|
||||
break
|
||||
else:
|
||||
if new_trcback:
|
||||
try:
|
||||
# A normal .py file (not a Template)
|
||||
fp = open(new_trcback[-1][0], 'rb')
|
||||
encoding = util.parse_encoding(fp)
|
||||
fp.seek(0)
|
||||
self.source = fp.read()
|
||||
fp.close()
|
||||
if encoding:
|
||||
self.source = self.source.decode(encoding)
|
||||
except IOError:
|
||||
self.source = ''
|
||||
self.lineno = new_trcback[-1][1]
|
||||
return new_trcback
|
||||
|
||||
|
||||
def text_error_template(lookup=None):
|
||||
"""Provides a template that renders a stack trace in a similar format to
|
||||
the Python interpreter, substituting source template filenames, line
|
||||
numbers and code for that of the originating source template, as
|
||||
applicable.
|
||||
|
||||
"""
|
||||
import mako.template
|
||||
return mako.template.Template(r"""
|
||||
<%page args="error=None, traceback=None"/>
|
||||
<%!
|
||||
from mako.exceptions import RichTraceback
|
||||
%>\
|
||||
<%
|
||||
tback = RichTraceback(error=error, traceback=traceback)
|
||||
%>\
|
||||
Traceback (most recent call last):
|
||||
% for (filename, lineno, function, line) in tback.traceback:
|
||||
File "${filename}", line ${lineno}, in ${function or '?'}
|
||||
${line | trim}
|
||||
% endfor
|
||||
${tback.errorname}: ${tback.message}
|
||||
""")
|
||||
|
||||
|
||||
def _install_pygments():
|
||||
global syntax_highlight, pygments_html_formatter
|
||||
from mako.ext.pygmentplugin import syntax_highlight,\
|
||||
pygments_html_formatter
|
||||
|
||||
def _install_fallback():
|
||||
global syntax_highlight, pygments_html_formatter
|
||||
from mako.filters import html_escape
|
||||
pygments_html_formatter = None
|
||||
def syntax_highlight(filename='', language=None):
|
||||
return html_escape
|
||||
|
||||
def _install_highlighting():
|
||||
try:
|
||||
_install_pygments()
|
||||
except ImportError:
|
||||
_install_fallback()
|
||||
_install_highlighting()
|
||||
|
||||
def html_error_template():
|
||||
"""Provides a template that renders a stack trace in an HTML format,
|
||||
providing an excerpt of code as well as substituting source template
|
||||
filenames, line numbers and code for that of the originating source
|
||||
template, as applicable.
|
||||
|
||||
The template's default ``encoding_errors`` value is
|
||||
``'htmlentityreplace'``. The template has two options. With the
|
||||
``full`` option disabled, only a section of an HTML document is
|
||||
returned. With the ``css`` option disabled, the default stylesheet
|
||||
won't be included.
|
||||
|
||||
"""
|
||||
import mako.template
|
||||
return mako.template.Template(r"""
|
||||
<%!
|
||||
from mako.exceptions import RichTraceback, syntax_highlight,\
|
||||
pygments_html_formatter
|
||||
%>
|
||||
<%page args="full=True, css=True, error=None, traceback=None"/>
|
||||
% if full:
|
||||
<html>
|
||||
<head>
|
||||
<title>Mako Runtime Error</title>
|
||||
% endif
|
||||
% if css:
|
||||
<style>
|
||||
body { font-family:verdana; margin:10px 30px 10px 30px;}
|
||||
.stacktrace { margin:5px 5px 5px 5px; }
|
||||
.highlight { padding:0px 10px 0px 10px; background-color:#9F9FDF; }
|
||||
.nonhighlight { padding:0px; background-color:#DFDFDF; }
|
||||
.sample { padding:10px; margin:10px 10px 10px 10px;
|
||||
font-family:monospace; }
|
||||
.sampleline { padding:0px 10px 0px 10px; }
|
||||
.sourceline { margin:5px 5px 10px 5px; font-family:monospace;}
|
||||
.location { font-size:80%; }
|
||||
.highlight { white-space:pre; }
|
||||
.sampleline { white-space:pre; }
|
||||
|
||||
% if pygments_html_formatter:
|
||||
${pygments_html_formatter.get_style_defs()}
|
||||
.linenos { min-width: 2.5em; text-align: right; }
|
||||
pre { margin: 0; }
|
||||
.syntax-highlighted { padding: 0 10px; }
|
||||
.syntax-highlightedtable { border-spacing: 1px; }
|
||||
.nonhighlight { border-top: 1px solid #DFDFDF;
|
||||
border-bottom: 1px solid #DFDFDF; }
|
||||
.stacktrace .nonhighlight { margin: 5px 15px 10px; }
|
||||
.sourceline { margin: 0 0; font-family:monospace; }
|
||||
.code { background-color: #F8F8F8; width: 100%; }
|
||||
.error .code { background-color: #FFBDBD; }
|
||||
.error .syntax-highlighted { background-color: #FFBDBD; }
|
||||
% endif
|
||||
|
||||
</style>
|
||||
% endif
|
||||
% if full:
|
||||
</head>
|
||||
<body>
|
||||
% endif
|
||||
|
||||
<h2>Error !</h2>
|
||||
<%
|
||||
tback = RichTraceback(error=error, traceback=traceback)
|
||||
src = tback.source
|
||||
line = tback.lineno
|
||||
if src:
|
||||
lines = src.split('\n')
|
||||
else:
|
||||
lines = None
|
||||
%>
|
||||
<h3>${tback.errorname}: ${tback.message|h}</h3>
|
||||
|
||||
% if lines:
|
||||
<div class="sample">
|
||||
<div class="nonhighlight">
|
||||
% for index in range(max(0, line-4),min(len(lines), line+5)):
|
||||
<%
|
||||
if pygments_html_formatter:
|
||||
pygments_html_formatter.linenostart = index + 1
|
||||
%>
|
||||
% if index + 1 == line:
|
||||
<%
|
||||
if pygments_html_formatter:
|
||||
old_cssclass = pygments_html_formatter.cssclass
|
||||
pygments_html_formatter.cssclass = 'error ' + old_cssclass
|
||||
%>
|
||||
${lines[index] | syntax_highlight(language='mako')}
|
||||
<%
|
||||
if pygments_html_formatter:
|
||||
pygments_html_formatter.cssclass = old_cssclass
|
||||
%>
|
||||
% else:
|
||||
${lines[index] | syntax_highlight(language='mako')}
|
||||
% endif
|
||||
% endfor
|
||||
</div>
|
||||
</div>
|
||||
% endif
|
||||
|
||||
<div class="stacktrace">
|
||||
% for (filename, lineno, function, line) in tback.reverse_traceback:
|
||||
<div class="location">${filename}, line ${lineno}:</div>
|
||||
<div class="nonhighlight">
|
||||
<%
|
||||
if pygments_html_formatter:
|
||||
pygments_html_formatter.linenostart = lineno
|
||||
%>
|
||||
<div class="sourceline">${line | syntax_highlight(filename)}</div>
|
||||
</div>
|
||||
% endfor
|
||||
</div>
|
||||
|
||||
% if full:
|
||||
</body>
|
||||
</html>
|
||||
% endif
|
||||
""", output_encoding=sys.getdefaultencoding(),
|
||||
encoding_errors='htmlentityreplace')
|
|
@ -0,0 +1,201 @@
|
|||
# mako/filters.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
|
||||
import re
|
||||
import codecs
|
||||
|
||||
from mako.compat import quote_plus, unquote_plus, codepoint2name, \
|
||||
name2codepoint
|
||||
|
||||
from mako import compat
|
||||
|
||||
xml_escapes = {
|
||||
'&': '&',
|
||||
'>': '>',
|
||||
'<': '<',
|
||||
'"': '"', # also " in html-only
|
||||
"'": ''' # also ' in html-only
|
||||
}
|
||||
|
||||
# XXX: " is valid in HTML and XML
|
||||
# ' is not valid HTML, but is valid XML
|
||||
|
||||
def legacy_html_escape(s):
|
||||
"""legacy HTML escape for non-unicode mode."""
|
||||
s = s.replace("&", "&")
|
||||
s = s.replace(">", ">")
|
||||
s = s.replace("<", "<")
|
||||
s = s.replace('"', """)
|
||||
s = s.replace("'", "'")
|
||||
return s
|
||||
|
||||
|
||||
try:
|
||||
import markupsafe
|
||||
html_escape = markupsafe.escape
|
||||
except ImportError:
|
||||
html_escape = legacy_html_escape
|
||||
|
||||
def xml_escape(string):
|
||||
return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string)
|
||||
|
||||
def url_escape(string):
|
||||
# convert into a list of octets
|
||||
string = string.encode("utf8")
|
||||
return quote_plus(string)
|
||||
|
||||
def legacy_url_escape(string):
|
||||
# convert into a list of octets
|
||||
return quote_plus(string)
|
||||
|
||||
def url_unescape(string):
|
||||
text = unquote_plus(string)
|
||||
if not is_ascii_str(text):
|
||||
text = text.decode("utf8")
|
||||
return text
|
||||
|
||||
def trim(string):
|
||||
return string.strip()
|
||||
|
||||
|
||||
class Decode(object):
|
||||
def __getattr__(self, key):
|
||||
def decode(x):
|
||||
if isinstance(x, compat.text_type):
|
||||
return x
|
||||
elif not isinstance(x, compat.binary_type):
|
||||
return decode(str(x))
|
||||
else:
|
||||
return compat.text_type(x, encoding=key)
|
||||
return decode
|
||||
decode = Decode()
|
||||
|
||||
|
||||
_ASCII_re = re.compile(r'\A[\x00-\x7f]*\Z')
|
||||
|
||||
def is_ascii_str(text):
|
||||
return isinstance(text, str) and _ASCII_re.match(text)
|
||||
|
||||
################################################################
|
||||
|
||||
class XMLEntityEscaper(object):
|
||||
def __init__(self, codepoint2name, name2codepoint):
|
||||
self.codepoint2entity = dict([(c, compat.text_type('&%s;' % n))
|
||||
for c, n in codepoint2name.items()])
|
||||
self.name2codepoint = name2codepoint
|
||||
|
||||
def escape_entities(self, text):
|
||||
"""Replace characters with their character entity references.
|
||||
|
||||
Only characters corresponding to a named entity are replaced.
|
||||
"""
|
||||
return compat.text_type(text).translate(self.codepoint2entity)
|
||||
|
||||
def __escape(self, m):
|
||||
codepoint = ord(m.group())
|
||||
try:
|
||||
return self.codepoint2entity[codepoint]
|
||||
except (KeyError, IndexError):
|
||||
return '&#x%X;' % codepoint
|
||||
|
||||
|
||||
__escapable = re.compile(r'["&<>]|[^\x00-\x7f]')
|
||||
|
||||
def escape(self, text):
|
||||
"""Replace characters with their character references.
|
||||
|
||||
Replace characters by their named entity references.
|
||||
Non-ASCII characters, if they do not have a named entity reference,
|
||||
are replaced by numerical character references.
|
||||
|
||||
The return value is guaranteed to be ASCII.
|
||||
"""
|
||||
return self.__escapable.sub(self.__escape, compat.text_type(text)
|
||||
).encode('ascii')
|
||||
|
||||
# XXX: This regexp will not match all valid XML entity names__.
|
||||
# (It punts on details involving involving CombiningChars and Extenders.)
|
||||
#
|
||||
# .. __: http://www.w3.org/TR/2000/REC-xml-20001006#NT-EntityRef
|
||||
__characterrefs = re.compile(r'''& (?:
|
||||
\#(\d+)
|
||||
| \#x([\da-f]+)
|
||||
| ( (?!\d) [:\w] [-.:\w]+ )
|
||||
) ;''',
|
||||
re.X | re.UNICODE)
|
||||
|
||||
def __unescape(self, m):
|
||||
dval, hval, name = m.groups()
|
||||
if dval:
|
||||
codepoint = int(dval)
|
||||
elif hval:
|
||||
codepoint = int(hval, 16)
|
||||
else:
|
||||
codepoint = self.name2codepoint.get(name, 0xfffd)
|
||||
# U+FFFD = "REPLACEMENT CHARACTER"
|
||||
if codepoint < 128:
|
||||
return chr(codepoint)
|
||||
return chr(codepoint)
|
||||
|
||||
def unescape(self, text):
|
||||
"""Unescape character references.
|
||||
|
||||
All character references (both entity references and numerical
|
||||
character references) are unescaped.
|
||||
"""
|
||||
return self.__characterrefs.sub(self.__unescape, text)
|
||||
|
||||
|
||||
_html_entities_escaper = XMLEntityEscaper(codepoint2name, name2codepoint)
|
||||
|
||||
html_entities_escape = _html_entities_escaper.escape_entities
|
||||
html_entities_unescape = _html_entities_escaper.unescape
|
||||
|
||||
|
||||
def htmlentityreplace_errors(ex):
|
||||
"""An encoding error handler.
|
||||
|
||||
This python `codecs`_ error handler replaces unencodable
|
||||
characters with HTML entities, or, if no HTML entity exists for
|
||||
the character, XML character references.
|
||||
|
||||
>>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace')
|
||||
'The cost was €12.'
|
||||
"""
|
||||
if isinstance(ex, UnicodeEncodeError):
|
||||
# Handle encoding errors
|
||||
bad_text = ex.object[ex.start:ex.end]
|
||||
text = _html_entities_escaper.escape(bad_text)
|
||||
return (compat.text_type(text), ex.end)
|
||||
raise ex
|
||||
|
||||
codecs.register_error('htmlentityreplace', htmlentityreplace_errors)
|
||||
|
||||
|
||||
# TODO: options to make this dynamic per-compilation will be added in a later
|
||||
# release
|
||||
DEFAULT_ESCAPES = {
|
||||
'x': 'filters.xml_escape',
|
||||
'h': 'filters.html_escape',
|
||||
'u': 'filters.url_escape',
|
||||
'trim': 'filters.trim',
|
||||
'entity': 'filters.html_entities_escape',
|
||||
'unicode': 'unicode',
|
||||
'decode': 'decode',
|
||||
'str': 'str',
|
||||
'n': 'n'
|
||||
}
|
||||
|
||||
if compat.py3k:
|
||||
DEFAULT_ESCAPES.update({
|
||||
'unicode': 'str'
|
||||
})
|
||||
|
||||
NON_UNICODE_ESCAPES = DEFAULT_ESCAPES.copy()
|
||||
NON_UNICODE_ESCAPES['h'] = 'filters.legacy_html_escape'
|
||||
NON_UNICODE_ESCAPES['u'] = 'filters.legacy_url_escape'
|
||||
|
|
@ -0,0 +1,441 @@
|
|||
# mako/lexer.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
"""provides the Lexer class for parsing template strings into parse trees."""
|
||||
|
||||
import re
|
||||
import codecs
|
||||
from mako import parsetree, exceptions, compat
|
||||
from mako.pygen import adjust_whitespace
|
||||
|
||||
_regexp_cache = {}
|
||||
|
||||
class Lexer(object):
|
||||
def __init__(self, text, filename=None,
|
||||
disable_unicode=False,
|
||||
input_encoding=None, preprocessor=None):
|
||||
self.text = text
|
||||
self.filename = filename
|
||||
self.template = parsetree.TemplateNode(self.filename)
|
||||
self.matched_lineno = 1
|
||||
self.matched_charpos = 0
|
||||
self.lineno = 1
|
||||
self.match_position = 0
|
||||
self.tag = []
|
||||
self.control_line = []
|
||||
self.ternary_stack = []
|
||||
self.disable_unicode = disable_unicode
|
||||
self.encoding = input_encoding
|
||||
|
||||
if compat.py3k and disable_unicode:
|
||||
raise exceptions.UnsupportedError(
|
||||
"Mako for Python 3 does not "
|
||||
"support disabling Unicode")
|
||||
|
||||
if preprocessor is None:
|
||||
self.preprocessor = []
|
||||
elif not hasattr(preprocessor, '__iter__'):
|
||||
self.preprocessor = [preprocessor]
|
||||
else:
|
||||
self.preprocessor = preprocessor
|
||||
|
||||
@property
|
||||
def exception_kwargs(self):
|
||||
return {'source': self.text,
|
||||
'lineno': self.matched_lineno,
|
||||
'pos': self.matched_charpos,
|
||||
'filename': self.filename}
|
||||
|
||||
def match(self, regexp, flags=None):
|
||||
"""compile the given regexp, cache the reg, and call match_reg()."""
|
||||
|
||||
try:
|
||||
reg = _regexp_cache[(regexp, flags)]
|
||||
except KeyError:
|
||||
if flags:
|
||||
reg = re.compile(regexp, flags)
|
||||
else:
|
||||
reg = re.compile(regexp)
|
||||
_regexp_cache[(regexp, flags)] = reg
|
||||
|
||||
return self.match_reg(reg)
|
||||
|
||||
def match_reg(self, reg):
|
||||
"""match the given regular expression object to the current text
|
||||
position.
|
||||
|
||||
if a match occurs, update the current text and line position.
|
||||
|
||||
"""
|
||||
|
||||
mp = self.match_position
|
||||
|
||||
match = reg.match(self.text, self.match_position)
|
||||
if match:
|
||||
(start, end) = match.span()
|
||||
if end == start:
|
||||
self.match_position = end + 1
|
||||
else:
|
||||
self.match_position = end
|
||||
self.matched_lineno = self.lineno
|
||||
lines = re.findall(r"\n", self.text[mp:self.match_position])
|
||||
cp = mp - 1
|
||||
while (cp >= 0 and cp < self.textlength and self.text[cp] != '\n'):
|
||||
cp -= 1
|
||||
self.matched_charpos = mp - cp
|
||||
self.lineno += len(lines)
|
||||
#print "MATCHED:", match.group(0), "LINE START:",
|
||||
# self.matched_lineno, "LINE END:", self.lineno
|
||||
#print "MATCH:", regexp, "\n", self.text[mp : mp + 15], \
|
||||
# (match and "TRUE" or "FALSE")
|
||||
return match
|
||||
|
||||
def parse_until_text(self, *text):
|
||||
startpos = self.match_position
|
||||
text_re = r'|'.join(text)
|
||||
brace_level = 0
|
||||
while True:
|
||||
match = self.match(r'#.*\n')
|
||||
if match:
|
||||
continue
|
||||
match = self.match(r'(\"\"\"|\'\'\'|\"|\')((?<!\\)\\\1|.)*?\1',
|
||||
re.S)
|
||||
if match:
|
||||
continue
|
||||
match = self.match(r'(%s)' % text_re)
|
||||
if match:
|
||||
if match.group(1) == '}' and brace_level > 0:
|
||||
brace_level -= 1
|
||||
continue
|
||||
return \
|
||||
self.text[startpos:
|
||||
self.match_position - len(match.group(1))],\
|
||||
match.group(1)
|
||||
match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S)
|
||||
if match:
|
||||
brace_level += match.group(1).count('{')
|
||||
brace_level -= match.group(1).count('}')
|
||||
continue
|
||||
raise exceptions.SyntaxException(
|
||||
"Expected: %s" %
|
||||
','.join(text),
|
||||
**self.exception_kwargs)
|
||||
|
||||
def append_node(self, nodecls, *args, **kwargs):
|
||||
kwargs.setdefault('source', self.text)
|
||||
kwargs.setdefault('lineno', self.matched_lineno)
|
||||
kwargs.setdefault('pos', self.matched_charpos)
|
||||
kwargs['filename'] = self.filename
|
||||
node = nodecls(*args, **kwargs)
|
||||
if len(self.tag):
|
||||
self.tag[-1].nodes.append(node)
|
||||
else:
|
||||
self.template.nodes.append(node)
|
||||
# build a set of child nodes for the control line
|
||||
# (used for loop variable detection)
|
||||
# also build a set of child nodes on ternary control lines
|
||||
# (used for determining if a pass needs to be auto-inserted
|
||||
if self.control_line:
|
||||
control_frame = self.control_line[-1]
|
||||
control_frame.nodes.append(node)
|
||||
if not (isinstance(node, parsetree.ControlLine) and
|
||||
control_frame.is_ternary(node.keyword)):
|
||||
if self.ternary_stack and self.ternary_stack[-1]:
|
||||
self.ternary_stack[-1][-1].nodes.append(node)
|
||||
if isinstance(node, parsetree.Tag):
|
||||
if len(self.tag):
|
||||
node.parent = self.tag[-1]
|
||||
self.tag.append(node)
|
||||
elif isinstance(node, parsetree.ControlLine):
|
||||
if node.isend:
|
||||
self.control_line.pop()
|
||||
self.ternary_stack.pop()
|
||||
elif node.is_primary:
|
||||
self.control_line.append(node)
|
||||
self.ternary_stack.append([])
|
||||
elif self.control_line and \
|
||||
self.control_line[-1].is_ternary(node.keyword):
|
||||
self.ternary_stack[-1].append(node)
|
||||
elif self.control_line and \
|
||||
not self.control_line[-1].is_ternary(node.keyword):
|
||||
raise exceptions.SyntaxException(
|
||||
"Keyword '%s' not a legal ternary for keyword '%s'" %
|
||||
(node.keyword, self.control_line[-1].keyword),
|
||||
**self.exception_kwargs)
|
||||
|
||||
_coding_re = re.compile(r'#.*coding[:=]\s*([-\w.]+).*\r?\n')
|
||||
|
||||
def decode_raw_stream(self, text, decode_raw, known_encoding, filename):
|
||||
"""given string/unicode or bytes/string, determine encoding
|
||||
from magic encoding comment, return body as unicode
|
||||
or raw if decode_raw=False
|
||||
|
||||
"""
|
||||
if isinstance(text, compat.text_type):
|
||||
m = self._coding_re.match(text)
|
||||
encoding = m and m.group(1) or known_encoding or 'ascii'
|
||||
return encoding, text
|
||||
|
||||
if text.startswith(codecs.BOM_UTF8):
|
||||
text = text[len(codecs.BOM_UTF8):]
|
||||
parsed_encoding = 'utf-8'
|
||||
m = self._coding_re.match(text.decode('utf-8', 'ignore'))
|
||||
if m is not None and m.group(1) != 'utf-8':
|
||||
raise exceptions.CompileException(
|
||||
"Found utf-8 BOM in file, with conflicting "
|
||||
"magic encoding comment of '%s'" % m.group(1),
|
||||
text.decode('utf-8', 'ignore'),
|
||||
0, 0, filename)
|
||||
else:
|
||||
m = self._coding_re.match(text.decode('utf-8', 'ignore'))
|
||||
if m:
|
||||
parsed_encoding = m.group(1)
|
||||
else:
|
||||
parsed_encoding = known_encoding or 'ascii'
|
||||
|
||||
if decode_raw:
|
||||
try:
|
||||
text = text.decode(parsed_encoding)
|
||||
except UnicodeDecodeError:
|
||||
raise exceptions.CompileException(
|
||||
"Unicode decode operation of encoding '%s' failed" %
|
||||
parsed_encoding,
|
||||
text.decode('utf-8', 'ignore'),
|
||||
0, 0, filename)
|
||||
|
||||
return parsed_encoding, text
|
||||
|
||||
def parse(self):
|
||||
self.encoding, self.text = self.decode_raw_stream(self.text,
|
||||
not self.disable_unicode,
|
||||
self.encoding,
|
||||
self.filename,)
|
||||
|
||||
for preproc in self.preprocessor:
|
||||
self.text = preproc(self.text)
|
||||
|
||||
# push the match marker past the
|
||||
# encoding comment.
|
||||
self.match_reg(self._coding_re)
|
||||
|
||||
self.textlength = len(self.text)
|
||||
|
||||
while (True):
|
||||
if self.match_position > self.textlength:
|
||||
break
|
||||
|
||||
if self.match_end():
|
||||
break
|
||||
if self.match_expression():
|
||||
continue
|
||||
if self.match_control_line():
|
||||
continue
|
||||
if self.match_comment():
|
||||
continue
|
||||
if self.match_tag_start():
|
||||
continue
|
||||
if self.match_tag_end():
|
||||
continue
|
||||
if self.match_python_block():
|
||||
continue
|
||||
if self.match_text():
|
||||
continue
|
||||
|
||||
if self.match_position > self.textlength:
|
||||
break
|
||||
raise exceptions.CompileException("assertion failed")
|
||||
|
||||
if len(self.tag):
|
||||
raise exceptions.SyntaxException("Unclosed tag: <%%%s>" %
|
||||
self.tag[-1].keyword,
|
||||
**self.exception_kwargs)
|
||||
if len(self.control_line):
|
||||
raise exceptions.SyntaxException(
|
||||
"Unterminated control keyword: '%s'" %
|
||||
self.control_line[-1].keyword,
|
||||
self.text,
|
||||
self.control_line[-1].lineno,
|
||||
self.control_line[-1].pos, self.filename)
|
||||
return self.template
|
||||
|
||||
def match_tag_start(self):
|
||||
match = self.match(r'''
|
||||
\<% # opening tag
|
||||
|
||||
([\w\.\:]+) # keyword
|
||||
|
||||
((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \
|
||||
# sign, string expression
|
||||
|
||||
\s* # more whitespace
|
||||
|
||||
(/)?> # closing
|
||||
|
||||
''',
|
||||
|
||||
re.I | re.S | re.X)
|
||||
|
||||
if match:
|
||||
keyword, attr, isend = match.groups()
|
||||
self.keyword = keyword
|
||||
attributes = {}
|
||||
if attr:
|
||||
for att in re.findall(
|
||||
r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr):
|
||||
key, val1, val2 = att
|
||||
text = val1 or val2
|
||||
text = text.replace('\r\n', '\n')
|
||||
attributes[key] = text
|
||||
self.append_node(parsetree.Tag, keyword, attributes)
|
||||
if isend:
|
||||
self.tag.pop()
|
||||
else:
|
||||
if keyword == 'text':
|
||||
match = self.match(r'(.*?)(?=\</%text>)', re.S)
|
||||
if not match:
|
||||
raise exceptions.SyntaxException(
|
||||
"Unclosed tag: <%%%s>" %
|
||||
self.tag[-1].keyword,
|
||||
**self.exception_kwargs)
|
||||
self.append_node(parsetree.Text, match.group(1))
|
||||
return self.match_tag_end()
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def match_tag_end(self):
|
||||
match = self.match(r'\</%[\t ]*(.+?)[\t ]*>')
|
||||
if match:
|
||||
if not len(self.tag):
|
||||
raise exceptions.SyntaxException(
|
||||
"Closing tag without opening tag: </%%%s>" %
|
||||
match.group(1),
|
||||
**self.exception_kwargs)
|
||||
elif self.tag[-1].keyword != match.group(1):
|
||||
raise exceptions.SyntaxException(
|
||||
"Closing tag </%%%s> does not match tag: <%%%s>" %
|
||||
(match.group(1), self.tag[-1].keyword),
|
||||
**self.exception_kwargs)
|
||||
self.tag.pop()
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def match_end(self):
|
||||
match = self.match(r'\Z', re.S)
|
||||
if match:
|
||||
string = match.group()
|
||||
if string:
|
||||
return string
|
||||
else:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def match_text(self):
|
||||
match = self.match(r"""
|
||||
(.*?) # anything, followed by:
|
||||
(
|
||||
(?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based
|
||||
# comment preceded by a
|
||||
# consumed newline and whitespace
|
||||
|
|
||||
(?=\${) # an expression
|
||||
|
|
||||
(?=</?[%&]) # a substitution or block or call start or end
|
||||
# - don't consume
|
||||
|
|
||||
(\\\r?\n) # an escaped newline - throw away
|
||||
|
|
||||
\Z # end of string
|
||||
)""", re.X | re.S)
|
||||
|
||||
if match:
|
||||
text = match.group(1)
|
||||
if text:
|
||||
self.append_node(parsetree.Text, text)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def match_python_block(self):
|
||||
match = self.match(r"<%(!)?")
|
||||
if match:
|
||||
line, pos = self.matched_lineno, self.matched_charpos
|
||||
text, end = self.parse_until_text(r'%>')
|
||||
# the trailing newline helps
|
||||
# compiler.parse() not complain about indentation
|
||||
text = adjust_whitespace(text) + "\n"
|
||||
self.append_node(
|
||||
parsetree.Code,
|
||||
text,
|
||||
match.group(1) == '!', lineno=line, pos=pos)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def match_expression(self):
|
||||
match = self.match(r"\${")
|
||||
if match:
|
||||
line, pos = self.matched_lineno, self.matched_charpos
|
||||
text, end = self.parse_until_text(r'\|', r'}')
|
||||
if end == '|':
|
||||
escapes, end = self.parse_until_text(r'}')
|
||||
else:
|
||||
escapes = ""
|
||||
text = text.replace('\r\n', '\n')
|
||||
self.append_node(
|
||||
parsetree.Expression,
|
||||
text, escapes.strip(),
|
||||
lineno=line, pos=pos)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def match_control_line(self):
|
||||
match = self.match(
|
||||
r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\r?\n)|[^\r\n])*)"
|
||||
r"(?:\r?\n|\Z)", re.M)
|
||||
if match:
|
||||
operator = match.group(1)
|
||||
text = match.group(2)
|
||||
if operator == '%':
|
||||
m2 = re.match(r'(end)?(\w+)\s*(.*)', text)
|
||||
if not m2:
|
||||
raise exceptions.SyntaxException(
|
||||
"Invalid control line: '%s'" %
|
||||
text,
|
||||
**self.exception_kwargs)
|
||||
isend, keyword = m2.group(1, 2)
|
||||
isend = (isend is not None)
|
||||
|
||||
if isend:
|
||||
if not len(self.control_line):
|
||||
raise exceptions.SyntaxException(
|
||||
"No starting keyword '%s' for '%s'" %
|
||||
(keyword, text),
|
||||
**self.exception_kwargs)
|
||||
elif self.control_line[-1].keyword != keyword:
|
||||
raise exceptions.SyntaxException(
|
||||
"Keyword '%s' doesn't match keyword '%s'" %
|
||||
(text, self.control_line[-1].keyword),
|
||||
**self.exception_kwargs)
|
||||
self.append_node(parsetree.ControlLine, keyword, isend, text)
|
||||
else:
|
||||
self.append_node(parsetree.Comment, text)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def match_comment(self):
|
||||
"""matches the multiline version of a comment"""
|
||||
match = self.match(r"<%doc>(.*?)</%doc>", re.S)
|
||||
if match:
|
||||
self.append_node(parsetree.Comment, match.group(1))
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
|
@ -0,0 +1,359 @@
|
|||
# mako/lookup.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
import os, stat, posixpath, re
|
||||
from mako import exceptions, util
|
||||
from mako.template import Template
|
||||
|
||||
try:
|
||||
import threading
|
||||
except:
|
||||
import dummy_threading as threading
|
||||
|
||||
class TemplateCollection(object):
|
||||
"""Represent a collection of :class:`.Template` objects,
|
||||
identifiable via URI.
|
||||
|
||||
A :class:`.TemplateCollection` is linked to the usage of
|
||||
all template tags that address other templates, such
|
||||
as ``<%include>``, ``<%namespace>``, and ``<%inherit>``.
|
||||
The ``file`` attribute of each of those tags refers
|
||||
to a string URI that is passed to that :class:`.Template`
|
||||
object's :class:`.TemplateCollection` for resolution.
|
||||
|
||||
:class:`.TemplateCollection` is an abstract class,
|
||||
with the usual default implementation being :class:`.TemplateLookup`.
|
||||
|
||||
"""
|
||||
|
||||
def has_template(self, uri):
|
||||
"""Return ``True`` if this :class:`.TemplateLookup` is
|
||||
capable of returning a :class:`.Template` object for the
|
||||
given ``uri``.
|
||||
|
||||
:param uri: String URI of the template to be resolved.
|
||||
|
||||
"""
|
||||
try:
|
||||
self.get_template(uri)
|
||||
return True
|
||||
except exceptions.TemplateLookupException:
|
||||
return False
|
||||
|
||||
def get_template(self, uri, relativeto=None):
|
||||
"""Return a :class:`.Template` object corresponding to the given
|
||||
``uri``.
|
||||
|
||||
The default implementation raises
|
||||
:class:`.NotImplementedError`. Implementations should
|
||||
raise :class:`.TemplateLookupException` if the given ``uri``
|
||||
cannot be resolved.
|
||||
|
||||
:param uri: String URI of the template to be resolved.
|
||||
:param relativeto: if present, the given ``uri`` is assumed to
|
||||
be relative to this URI.
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def filename_to_uri(self, uri, filename):
|
||||
"""Convert the given ``filename`` to a URI relative to
|
||||
this :class:`.TemplateCollection`."""
|
||||
|
||||
return uri
|
||||
|
||||
def adjust_uri(self, uri, filename):
|
||||
"""Adjust the given ``uri`` based on the calling ``filename``.
|
||||
|
||||
When this method is called from the runtime, the
|
||||
``filename`` parameter is taken directly to the ``filename``
|
||||
attribute of the calling template. Therefore a custom
|
||||
:class:`.TemplateCollection` subclass can place any string
|
||||
identifier desired in the ``filename`` parameter of the
|
||||
:class:`.Template` objects it constructs and have them come back
|
||||
here.
|
||||
|
||||
"""
|
||||
return uri
|
||||
|
||||
class TemplateLookup(TemplateCollection):
|
||||
"""Represent a collection of templates that locates template source files
|
||||
from the local filesystem.
|
||||
|
||||
The primary argument is the ``directories`` argument, the list of
|
||||
directories to search:
|
||||
|
||||
.. sourcecode:: python
|
||||
|
||||
lookup = TemplateLookup(["/path/to/templates"])
|
||||
some_template = lookup.get_template("/index.html")
|
||||
|
||||
The :class:`.TemplateLookup` can also be given :class:`.Template` objects
|
||||
programatically using :meth:`.put_string` or :meth:`.put_template`:
|
||||
|
||||
.. sourcecode:: python
|
||||
|
||||
lookup = TemplateLookup()
|
||||
lookup.put_string("base.html", '''
|
||||
<html><body>${self.next()}</body></html>
|
||||
''')
|
||||
lookup.put_string("hello.html", '''
|
||||
<%include file='base.html'/>
|
||||
|
||||
Hello, world !
|
||||
''')
|
||||
|
||||
|
||||
:param directories: A list of directory names which will be
|
||||
searched for a particular template URI. The URI is appended
|
||||
to each directory and the filesystem checked.
|
||||
|
||||
:param collection_size: Approximate size of the collection used
|
||||
to store templates. If left at its default of ``-1``, the size
|
||||
is unbounded, and a plain Python dictionary is used to
|
||||
relate URI strings to :class:`.Template` instances.
|
||||
Otherwise, a least-recently-used cache object is used which
|
||||
will maintain the size of the collection approximately to
|
||||
the number given.
|
||||
|
||||
:param filesystem_checks: When at its default value of ``True``,
|
||||
each call to :meth:`.TemplateLookup.get_template()` will
|
||||
compare the filesystem last modified time to the time in
|
||||
which an existing :class:`.Template` object was created.
|
||||
This allows the :class:`.TemplateLookup` to regenerate a
|
||||
new :class:`.Template` whenever the original source has
|
||||
been updated. Set this to ``False`` for a very minor
|
||||
performance increase.
|
||||
|
||||
:param modulename_callable: A callable which, when present,
|
||||
is passed the path of the source file as well as the
|
||||
requested URI, and then returns the full path of the
|
||||
generated Python module file. This is used to inject
|
||||
alternate schemes for Python module location. If left at
|
||||
its default of ``None``, the built in system of generation
|
||||
based on ``module_directory`` plus ``uri`` is used.
|
||||
|
||||
All other keyword parameters available for
|
||||
:class:`.Template` are mirrored here. When new
|
||||
:class:`.Template` objects are created, the keywords
|
||||
established with this :class:`.TemplateLookup` are passed on
|
||||
to each new :class:`.Template`.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
directories=None,
|
||||
module_directory=None,
|
||||
filesystem_checks=True,
|
||||
collection_size=-1,
|
||||
format_exceptions=False,
|
||||
error_handler=None,
|
||||
disable_unicode=False,
|
||||
bytestring_passthrough=False,
|
||||
output_encoding=None,
|
||||
encoding_errors='strict',
|
||||
|
||||
cache_args=None,
|
||||
cache_impl='beaker',
|
||||
cache_enabled=True,
|
||||
cache_type=None,
|
||||
cache_dir=None,
|
||||
cache_url=None,
|
||||
|
||||
modulename_callable=None,
|
||||
module_writer=None,
|
||||
default_filters=None,
|
||||
buffer_filters=(),
|
||||
strict_undefined=False,
|
||||
imports=None,
|
||||
future_imports=None,
|
||||
enable_loop=True,
|
||||
input_encoding=None,
|
||||
preprocessor=None,
|
||||
lexer_cls=None):
|
||||
|
||||
self.directories = [posixpath.normpath(d) for d in
|
||||
util.to_list(directories, ())
|
||||
]
|
||||
self.module_directory = module_directory
|
||||
self.modulename_callable = modulename_callable
|
||||
self.filesystem_checks = filesystem_checks
|
||||
self.collection_size = collection_size
|
||||
|
||||
if cache_args is None:
|
||||
cache_args = {}
|
||||
# transfer deprecated cache_* args
|
||||
if cache_dir:
|
||||
cache_args.setdefault('dir', cache_dir)
|
||||
if cache_url:
|
||||
cache_args.setdefault('url', cache_url)
|
||||
if cache_type:
|
||||
cache_args.setdefault('type', cache_type)
|
||||
|
||||
self.template_args = {
|
||||
'format_exceptions':format_exceptions,
|
||||
'error_handler':error_handler,
|
||||
'disable_unicode':disable_unicode,
|
||||
'bytestring_passthrough':bytestring_passthrough,
|
||||
'output_encoding':output_encoding,
|
||||
'cache_impl':cache_impl,
|
||||
'encoding_errors':encoding_errors,
|
||||
'input_encoding':input_encoding,
|
||||
'module_directory':module_directory,
|
||||
'module_writer':module_writer,
|
||||
'cache_args':cache_args,
|
||||
'cache_enabled':cache_enabled,
|
||||
'default_filters':default_filters,
|
||||
'buffer_filters':buffer_filters,
|
||||
'strict_undefined':strict_undefined,
|
||||
'imports':imports,
|
||||
'future_imports':future_imports,
|
||||
'enable_loop':enable_loop,
|
||||
'preprocessor':preprocessor,
|
||||
'lexer_cls':lexer_cls
|
||||
}
|
||||
|
||||
if collection_size == -1:
|
||||
self._collection = {}
|
||||
self._uri_cache = {}
|
||||
else:
|
||||
self._collection = util.LRUCache(collection_size)
|
||||
self._uri_cache = util.LRUCache(collection_size)
|
||||
self._mutex = threading.Lock()
|
||||
|
||||
def get_template(self, uri):
|
||||
"""Return a :class:`.Template` object corresponding to the given
|
||||
``uri``.
|
||||
|
||||
.. note:: The ``relativeto`` argument is not supported here at the moment.
|
||||
|
||||
"""
|
||||
|
||||
try:
|
||||
if self.filesystem_checks:
|
||||
return self._check(uri, self._collection[uri])
|
||||
else:
|
||||
return self._collection[uri]
|
||||
except KeyError:
|
||||
u = re.sub(r'^\/+', '', uri)
|
||||
for dir in self.directories:
|
||||
srcfile = posixpath.normpath(posixpath.join(dir, u))
|
||||
if os.path.isfile(srcfile):
|
||||
return self._load(srcfile, uri)
|
||||
else:
|
||||
raise exceptions.TopLevelLookupException(
|
||||
"Cant locate template for uri %r" % uri)
|
||||
|
||||
def adjust_uri(self, uri, relativeto):
|
||||
"""Adjust the given ``uri`` based on the given relative URI."""
|
||||
|
||||
key = (uri, relativeto)
|
||||
if key in self._uri_cache:
|
||||
return self._uri_cache[key]
|
||||
|
||||
if uri[0] != '/':
|
||||
if relativeto is not None:
|
||||
v = self._uri_cache[key] = posixpath.join(
|
||||
posixpath.dirname(relativeto), uri)
|
||||
else:
|
||||
v = self._uri_cache[key] = '/' + uri
|
||||
else:
|
||||
v = self._uri_cache[key] = uri
|
||||
return v
|
||||
|
||||
|
||||
def filename_to_uri(self, filename):
|
||||
"""Convert the given ``filename`` to a URI relative to
|
||||
this :class:`.TemplateCollection`."""
|
||||
|
||||
try:
|
||||
return self._uri_cache[filename]
|
||||
except KeyError:
|
||||
value = self._relativeize(filename)
|
||||
self._uri_cache[filename] = value
|
||||
return value
|
||||
|
||||
def _relativeize(self, filename):
|
||||
"""Return the portion of a filename that is 'relative'
|
||||
to the directories in this lookup.
|
||||
|
||||
"""
|
||||
|
||||
filename = posixpath.normpath(filename)
|
||||
for dir in self.directories:
|
||||
if filename[0:len(dir)] == dir:
|
||||
return filename[len(dir):]
|
||||
else:
|
||||
return None
|
||||
|
||||
def _load(self, filename, uri):
|
||||
self._mutex.acquire()
|
||||
try:
|
||||
try:
|
||||
# try returning from collection one
|
||||
# more time in case concurrent thread already loaded
|
||||
return self._collection[uri]
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
if self.modulename_callable is not None:
|
||||
module_filename = self.modulename_callable(filename, uri)
|
||||
else:
|
||||
module_filename = None
|
||||
self._collection[uri] = template = Template(
|
||||
uri=uri,
|
||||
filename=posixpath.normpath(filename),
|
||||
lookup=self,
|
||||
module_filename=module_filename,
|
||||
**self.template_args)
|
||||
return template
|
||||
except:
|
||||
# if compilation fails etc, ensure
|
||||
# template is removed from collection,
|
||||
# re-raise
|
||||
self._collection.pop(uri, None)
|
||||
raise
|
||||
finally:
|
||||
self._mutex.release()
|
||||
|
||||
def _check(self, uri, template):
|
||||
if template.filename is None:
|
||||
return template
|
||||
|
||||
try:
|
||||
template_stat = os.stat(template.filename)
|
||||
if template.module._modified_time < \
|
||||
template_stat[stat.ST_MTIME]:
|
||||
self._collection.pop(uri, None)
|
||||
return self._load(template.filename, uri)
|
||||
else:
|
||||
return template
|
||||
except OSError:
|
||||
self._collection.pop(uri, None)
|
||||
raise exceptions.TemplateLookupException(
|
||||
"Cant locate template for uri %r" % uri)
|
||||
|
||||
|
||||
def put_string(self, uri, text):
|
||||
"""Place a new :class:`.Template` object into this
|
||||
:class:`.TemplateLookup`, based on the given string of
|
||||
``text``.
|
||||
|
||||
"""
|
||||
self._collection[uri] = Template(
|
||||
text,
|
||||
lookup=self,
|
||||
uri=uri,
|
||||
**self.template_args)
|
||||
|
||||
def put_template(self, uri, template):
|
||||
"""Place a new :class:`.Template` object into this
|
||||
:class:`.TemplateLookup`, based on the given
|
||||
:class:`.Template` object.
|
||||
|
||||
"""
|
||||
self._collection[uri] = template
|
||||
|
|
@ -0,0 +1,594 @@
|
|||
# mako/parsetree.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
"""defines the parse tree components for Mako templates."""
|
||||
|
||||
from mako import exceptions, ast, util, filters, compat
|
||||
import re
|
||||
|
||||
class Node(object):
|
||||
"""base class for a Node in the parse tree."""
|
||||
|
||||
def __init__(self, source, lineno, pos, filename):
|
||||
self.source = source
|
||||
self.lineno = lineno
|
||||
self.pos = pos
|
||||
self.filename = filename
|
||||
|
||||
@property
|
||||
def exception_kwargs(self):
|
||||
return {'source': self.source, 'lineno': self.lineno,
|
||||
'pos': self.pos, 'filename': self.filename}
|
||||
|
||||
def get_children(self):
|
||||
return []
|
||||
|
||||
def accept_visitor(self, visitor):
|
||||
def traverse(node):
|
||||
for n in node.get_children():
|
||||
n.accept_visitor(visitor)
|
||||
|
||||
method = getattr(visitor, "visit" + self.__class__.__name__, traverse)
|
||||
method(self)
|
||||
|
||||
class TemplateNode(Node):
|
||||
"""a 'container' node that stores the overall collection of nodes."""
|
||||
|
||||
def __init__(self, filename):
|
||||
super(TemplateNode, self).__init__('', 0, 0, filename)
|
||||
self.nodes = []
|
||||
self.page_attributes = {}
|
||||
|
||||
def get_children(self):
|
||||
return self.nodes
|
||||
|
||||
def __repr__(self):
|
||||
return "TemplateNode(%s, %r)" % (
|
||||
util.sorted_dict_repr(self.page_attributes),
|
||||
self.nodes)
|
||||
|
||||
class ControlLine(Node):
|
||||
"""defines a control line, a line-oriented python line or end tag.
|
||||
|
||||
e.g.::
|
||||
|
||||
% if foo:
|
||||
(markup)
|
||||
% endif
|
||||
|
||||
"""
|
||||
|
||||
has_loop_context = False
|
||||
|
||||
def __init__(self, keyword, isend, text, **kwargs):
|
||||
super(ControlLine, self).__init__(**kwargs)
|
||||
self.text = text
|
||||
self.keyword = keyword
|
||||
self.isend = isend
|
||||
self.is_primary = keyword in ['for', 'if', 'while', 'try', 'with']
|
||||
self.nodes = []
|
||||
if self.isend:
|
||||
self._declared_identifiers = []
|
||||
self._undeclared_identifiers = []
|
||||
else:
|
||||
code = ast.PythonFragment(text, **self.exception_kwargs)
|
||||
self._declared_identifiers = code.declared_identifiers
|
||||
self._undeclared_identifiers = code.undeclared_identifiers
|
||||
|
||||
def get_children(self):
|
||||
return self.nodes
|
||||
|
||||
def declared_identifiers(self):
|
||||
return self._declared_identifiers
|
||||
|
||||
def undeclared_identifiers(self):
|
||||
return self._undeclared_identifiers
|
||||
|
||||
def is_ternary(self, keyword):
|
||||
"""return true if the given keyword is a ternary keyword
|
||||
for this ControlLine"""
|
||||
|
||||
return keyword in {
|
||||
'if':set(['else', 'elif']),
|
||||
'try':set(['except', 'finally']),
|
||||
'for':set(['else'])
|
||||
}.get(self.keyword, [])
|
||||
|
||||
def __repr__(self):
|
||||
return "ControlLine(%r, %r, %r, %r)" % (
|
||||
self.keyword,
|
||||
self.text,
|
||||
self.isend,
|
||||
(self.lineno, self.pos)
|
||||
)
|
||||
|
||||
class Text(Node):
|
||||
"""defines plain text in the template."""
|
||||
|
||||
def __init__(self, content, **kwargs):
|
||||
super(Text, self).__init__(**kwargs)
|
||||
self.content = content
|
||||
|
||||
def __repr__(self):
|
||||
return "Text(%r, %r)" % (self.content, (self.lineno, self.pos))
|
||||
|
||||
class Code(Node):
|
||||
"""defines a Python code block, either inline or module level.
|
||||
|
||||
e.g.::
|
||||
|
||||
inline:
|
||||
<%
|
||||
x = 12
|
||||
%>
|
||||
|
||||
module level:
|
||||
<%!
|
||||
import logger
|
||||
%>
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, text, ismodule, **kwargs):
|
||||
super(Code, self).__init__(**kwargs)
|
||||
self.text = text
|
||||
self.ismodule = ismodule
|
||||
self.code = ast.PythonCode(text, **self.exception_kwargs)
|
||||
|
||||
def declared_identifiers(self):
|
||||
return self.code.declared_identifiers
|
||||
|
||||
def undeclared_identifiers(self):
|
||||
return self.code.undeclared_identifiers
|
||||
|
||||
def __repr__(self):
|
||||
return "Code(%r, %r, %r)" % (
|
||||
self.text,
|
||||
self.ismodule,
|
||||
(self.lineno, self.pos)
|
||||
)
|
||||
|
||||
class Comment(Node):
|
||||
"""defines a comment line.
|
||||
|
||||
# this is a comment
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, text, **kwargs):
|
||||
super(Comment, self).__init__(**kwargs)
|
||||
self.text = text
|
||||
|
||||
def __repr__(self):
|
||||
return "Comment(%r, %r)" % (self.text, (self.lineno, self.pos))
|
||||
|
||||
class Expression(Node):
|
||||
"""defines an inline expression.
|
||||
|
||||
${x+y}
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, text, escapes, **kwargs):
|
||||
super(Expression, self).__init__(**kwargs)
|
||||
self.text = text
|
||||
self.escapes = escapes
|
||||
self.escapes_code = ast.ArgumentList(escapes, **self.exception_kwargs)
|
||||
self.code = ast.PythonCode(text, **self.exception_kwargs)
|
||||
|
||||
def declared_identifiers(self):
|
||||
return []
|
||||
|
||||
def undeclared_identifiers(self):
|
||||
# TODO: make the "filter" shortcut list configurable at parse/gen time
|
||||
return self.code.undeclared_identifiers.union(
|
||||
self.escapes_code.undeclared_identifiers.difference(
|
||||
set(filters.DEFAULT_ESCAPES.keys())
|
||||
)
|
||||
).difference(self.code.declared_identifiers)
|
||||
|
||||
def __repr__(self):
|
||||
return "Expression(%r, %r, %r)" % (
|
||||
self.text,
|
||||
self.escapes_code.args,
|
||||
(self.lineno, self.pos)
|
||||
)
|
||||
|
||||
class _TagMeta(type):
|
||||
"""metaclass to allow Tag to produce a subclass according to
|
||||
its keyword"""
|
||||
|
||||
_classmap = {}
|
||||
|
||||
def __init__(cls, clsname, bases, dict):
|
||||
if getattr(cls, '__keyword__', None) is not None:
|
||||
cls._classmap[cls.__keyword__] = cls
|
||||
super(_TagMeta, cls).__init__(clsname, bases, dict)
|
||||
|
||||
def __call__(cls, keyword, attributes, **kwargs):
|
||||
if ":" in keyword:
|
||||
ns, defname = keyword.split(':')
|
||||
return type.__call__(CallNamespaceTag, ns, defname,
|
||||
attributes, **kwargs)
|
||||
|
||||
try:
|
||||
cls = _TagMeta._classmap[keyword]
|
||||
except KeyError:
|
||||
raise exceptions.CompileException(
|
||||
"No such tag: '%s'" % keyword,
|
||||
source=kwargs['source'],
|
||||
lineno=kwargs['lineno'],
|
||||
pos=kwargs['pos'],
|
||||
filename=kwargs['filename']
|
||||
)
|
||||
return type.__call__(cls, keyword, attributes, **kwargs)
|
||||
|
||||
class Tag(compat.with_metaclass(_TagMeta, Node)):
|
||||
"""abstract base class for tags.
|
||||
|
||||
<%sometag/>
|
||||
|
||||
<%someothertag>
|
||||
stuff
|
||||
</%someothertag>
|
||||
|
||||
"""
|
||||
__keyword__ = None
|
||||
|
||||
def __init__(self, keyword, attributes, expressions,
|
||||
nonexpressions, required, **kwargs):
|
||||
"""construct a new Tag instance.
|
||||
|
||||
this constructor not called directly, and is only called
|
||||
by subclasses.
|
||||
|
||||
:param keyword: the tag keyword
|
||||
|
||||
:param attributes: raw dictionary of attribute key/value pairs
|
||||
|
||||
:param expressions: a set of identifiers that are legal attributes,
|
||||
which can also contain embedded expressions
|
||||
|
||||
:param nonexpressions: a set of identifiers that are legal
|
||||
attributes, which cannot contain embedded expressions
|
||||
|
||||
:param \**kwargs:
|
||||
other arguments passed to the Node superclass (lineno, pos)
|
||||
|
||||
"""
|
||||
super(Tag, self).__init__(**kwargs)
|
||||
self.keyword = keyword
|
||||
self.attributes = attributes
|
||||
self._parse_attributes(expressions, nonexpressions)
|
||||
missing = [r for r in required if r not in self.parsed_attributes]
|
||||
if len(missing):
|
||||
raise exceptions.CompileException(
|
||||
"Missing attribute(s): %s" %
|
||||
",".join([repr(m) for m in missing]),
|
||||
**self.exception_kwargs)
|
||||
self.parent = None
|
||||
self.nodes = []
|
||||
|
||||
def is_root(self):
|
||||
return self.parent is None
|
||||
|
||||
def get_children(self):
|
||||
return self.nodes
|
||||
|
||||
def _parse_attributes(self, expressions, nonexpressions):
|
||||
undeclared_identifiers = set()
|
||||
self.parsed_attributes = {}
|
||||
for key in self.attributes:
|
||||
if key in expressions:
|
||||
expr = []
|
||||
for x in re.compile(r'(\${.+?})',
|
||||
re.S).split(self.attributes[key]):
|
||||
m = re.compile(r'^\${(.+?)}$', re.S).match(x)
|
||||
if m:
|
||||
code = ast.PythonCode(m.group(1).rstrip(),
|
||||
**self.exception_kwargs)
|
||||
# we aren't discarding "declared_identifiers" here,
|
||||
# which we do so that list comprehension-declared
|
||||
# variables aren't counted. As yet can't find a
|
||||
# condition that requires it here.
|
||||
undeclared_identifiers = \
|
||||
undeclared_identifiers.union(
|
||||
code.undeclared_identifiers)
|
||||
expr.append('(%s)' % m.group(1))
|
||||
else:
|
||||
if x:
|
||||
expr.append(repr(x))
|
||||
self.parsed_attributes[key] = " + ".join(expr) or repr('')
|
||||
elif key in nonexpressions:
|
||||
if re.search(r'\${.+?}', self.attributes[key]):
|
||||
raise exceptions.CompileException(
|
||||
"Attibute '%s' in tag '%s' does not allow embedded "
|
||||
"expressions" % (key, self.keyword),
|
||||
**self.exception_kwargs)
|
||||
self.parsed_attributes[key] = repr(self.attributes[key])
|
||||
else:
|
||||
raise exceptions.CompileException(
|
||||
"Invalid attribute for tag '%s': '%s'" %
|
||||
(self.keyword, key),
|
||||
**self.exception_kwargs)
|
||||
self.expression_undeclared_identifiers = undeclared_identifiers
|
||||
|
||||
def declared_identifiers(self):
|
||||
return []
|
||||
|
||||
def undeclared_identifiers(self):
|
||||
return self.expression_undeclared_identifiers
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%r, %s, %r, %r)" % (self.__class__.__name__,
|
||||
self.keyword,
|
||||
util.sorted_dict_repr(self.attributes),
|
||||
(self.lineno, self.pos),
|
||||
self.nodes
|
||||
)
|
||||
|
||||
class IncludeTag(Tag):
|
||||
__keyword__ = 'include'
|
||||
|
||||
def __init__(self, keyword, attributes, **kwargs):
|
||||
super(IncludeTag, self).__init__(
|
||||
keyword,
|
||||
attributes,
|
||||
('file', 'import', 'args'),
|
||||
(), ('file',), **kwargs)
|
||||
self.page_args = ast.PythonCode(
|
||||
"__DUMMY(%s)" % attributes.get('args', ''),
|
||||
**self.exception_kwargs)
|
||||
|
||||
def declared_identifiers(self):
|
||||
return []
|
||||
|
||||
def undeclared_identifiers(self):
|
||||
identifiers = self.page_args.undeclared_identifiers.\
|
||||
difference(set(["__DUMMY"])).\
|
||||
difference(self.page_args.declared_identifiers)
|
||||
return identifiers.union(super(IncludeTag, self).
|
||||
undeclared_identifiers())
|
||||
|
||||
class NamespaceTag(Tag):
|
||||
__keyword__ = 'namespace'
|
||||
|
||||
def __init__(self, keyword, attributes, **kwargs):
|
||||
super(NamespaceTag, self).__init__(
|
||||
keyword, attributes,
|
||||
('file',),
|
||||
('name','inheritable',
|
||||
'import','module'),
|
||||
(), **kwargs)
|
||||
|
||||
self.name = attributes.get('name', '__anon_%s' % hex(abs(id(self))))
|
||||
if not 'name' in attributes and not 'import' in attributes:
|
||||
raise exceptions.CompileException(
|
||||
"'name' and/or 'import' attributes are required "
|
||||
"for <%namespace>",
|
||||
**self.exception_kwargs)
|
||||
if 'file' in attributes and 'module' in attributes:
|
||||
raise exceptions.CompileException(
|
||||
"<%namespace> may only have one of 'file' or 'module'",
|
||||
**self.exception_kwargs
|
||||
)
|
||||
|
||||
def declared_identifiers(self):
|
||||
return []
|
||||
|
||||
class TextTag(Tag):
|
||||
__keyword__ = 'text'
|
||||
|
||||
def __init__(self, keyword, attributes, **kwargs):
|
||||
super(TextTag, self).__init__(
|
||||
keyword,
|
||||
attributes, (),
|
||||
('filter'), (), **kwargs)
|
||||
self.filter_args = ast.ArgumentList(
|
||||
attributes.get('filter', ''),
|
||||
**self.exception_kwargs)
|
||||
|
||||
def undeclared_identifiers(self):
|
||||
return self.filter_args.\
|
||||
undeclared_identifiers.\
|
||||
difference(filters.DEFAULT_ESCAPES.keys()).union(
|
||||
self.expression_undeclared_identifiers
|
||||
)
|
||||
|
||||
class DefTag(Tag):
|
||||
__keyword__ = 'def'
|
||||
|
||||
def __init__(self, keyword, attributes, **kwargs):
|
||||
expressions = ['buffered', 'cached'] + [
|
||||
c for c in attributes if c.startswith('cache_')]
|
||||
|
||||
|
||||
super(DefTag, self).__init__(
|
||||
keyword,
|
||||
attributes,
|
||||
expressions,
|
||||
('name', 'filter', 'decorator'),
|
||||
('name',),
|
||||
**kwargs)
|
||||
name = attributes['name']
|
||||
if re.match(r'^[\w_]+$', name):
|
||||
raise exceptions.CompileException(
|
||||
"Missing parenthesis in %def",
|
||||
**self.exception_kwargs)
|
||||
self.function_decl = ast.FunctionDecl("def " + name + ":pass",
|
||||
**self.exception_kwargs)
|
||||
self.name = self.function_decl.funcname
|
||||
self.decorator = attributes.get('decorator', '')
|
||||
self.filter_args = ast.ArgumentList(
|
||||
attributes.get('filter', ''),
|
||||
**self.exception_kwargs)
|
||||
|
||||
is_anonymous = False
|
||||
is_block = False
|
||||
|
||||
@property
|
||||
def funcname(self):
|
||||
return self.function_decl.funcname
|
||||
|
||||
def get_argument_expressions(self, **kw):
|
||||
return self.function_decl.get_argument_expressions(**kw)
|
||||
|
||||
def declared_identifiers(self):
|
||||
return self.function_decl.allargnames
|
||||
|
||||
def undeclared_identifiers(self):
|
||||
res = []
|
||||
for c in self.function_decl.defaults:
|
||||
res += list(ast.PythonCode(c, **self.exception_kwargs).
|
||||
undeclared_identifiers)
|
||||
return set(res).union(
|
||||
self.filter_args.\
|
||||
undeclared_identifiers.\
|
||||
difference(filters.DEFAULT_ESCAPES.keys())
|
||||
).union(
|
||||
self.expression_undeclared_identifiers
|
||||
).difference(
|
||||
self.function_decl.allargnames
|
||||
)
|
||||
|
||||
class BlockTag(Tag):
|
||||
__keyword__ = 'block'
|
||||
|
||||
def __init__(self, keyword, attributes, **kwargs):
|
||||
expressions = ['buffered', 'cached', 'args'] + [
|
||||
c for c in attributes if c.startswith('cache_')]
|
||||
|
||||
super(BlockTag, self).__init__(
|
||||
keyword,
|
||||
attributes,
|
||||
expressions,
|
||||
('name','filter', 'decorator'),
|
||||
(),
|
||||
**kwargs)
|
||||
name = attributes.get('name')
|
||||
if name and not re.match(r'^[\w_]+$',name):
|
||||
raise exceptions.CompileException(
|
||||
"%block may not specify an argument signature",
|
||||
**self.exception_kwargs)
|
||||
if not name and attributes.get('args', None):
|
||||
raise exceptions.CompileException(
|
||||
"Only named %blocks may specify args",
|
||||
**self.exception_kwargs
|
||||
)
|
||||
self.body_decl = ast.FunctionArgs(attributes.get('args', ''),
|
||||
**self.exception_kwargs)
|
||||
|
||||
self.name = name
|
||||
self.decorator = attributes.get('decorator', '')
|
||||
self.filter_args = ast.ArgumentList(
|
||||
attributes.get('filter', ''),
|
||||
**self.exception_kwargs)
|
||||
|
||||
|
||||
is_block = True
|
||||
|
||||
@property
|
||||
def is_anonymous(self):
|
||||
return self.name is None
|
||||
|
||||
@property
|
||||
def funcname(self):
|
||||
return self.name or "__M_anon_%d" % (self.lineno, )
|
||||
|
||||
def get_argument_expressions(self, **kw):
|
||||
return self.body_decl.get_argument_expressions(**kw)
|
||||
|
||||
def declared_identifiers(self):
|
||||
return self.body_decl.allargnames
|
||||
|
||||
def undeclared_identifiers(self):
|
||||
return (self.filter_args.\
|
||||
undeclared_identifiers.\
|
||||
difference(filters.DEFAULT_ESCAPES.keys())
|
||||
).union(self.expression_undeclared_identifiers)
|
||||
|
||||
|
||||
|
||||
class CallTag(Tag):
|
||||
__keyword__ = 'call'
|
||||
|
||||
def __init__(self, keyword, attributes, **kwargs):
|
||||
super(CallTag, self).__init__(keyword, attributes,
|
||||
('args'), ('expr',), ('expr',), **kwargs)
|
||||
self.expression = attributes['expr']
|
||||
self.code = ast.PythonCode(self.expression, **self.exception_kwargs)
|
||||
self.body_decl = ast.FunctionArgs(attributes.get('args', ''),
|
||||
**self.exception_kwargs)
|
||||
|
||||
def declared_identifiers(self):
|
||||
return self.code.declared_identifiers.union(self.body_decl.allargnames)
|
||||
|
||||
def undeclared_identifiers(self):
|
||||
return self.code.undeclared_identifiers.\
|
||||
difference(self.code.declared_identifiers)
|
||||
|
||||
class CallNamespaceTag(Tag):
|
||||
|
||||
def __init__(self, namespace, defname, attributes, **kwargs):
|
||||
super(CallNamespaceTag, self).__init__(
|
||||
namespace + ":" + defname,
|
||||
attributes,
|
||||
tuple(attributes.keys()) + ('args', ),
|
||||
(),
|
||||
(),
|
||||
**kwargs)
|
||||
|
||||
self.expression = "%s.%s(%s)" % (
|
||||
namespace,
|
||||
defname,
|
||||
",".join(["%s=%s" % (k, v) for k, v in
|
||||
self.parsed_attributes.items()
|
||||
if k != 'args'])
|
||||
)
|
||||
self.code = ast.PythonCode(self.expression, **self.exception_kwargs)
|
||||
self.body_decl = ast.FunctionArgs(
|
||||
attributes.get('args', ''),
|
||||
**self.exception_kwargs)
|
||||
|
||||
def declared_identifiers(self):
|
||||
return self.code.declared_identifiers.union(self.body_decl.allargnames)
|
||||
|
||||
def undeclared_identifiers(self):
|
||||
return self.code.undeclared_identifiers.\
|
||||
difference(self.code.declared_identifiers)
|
||||
|
||||
class InheritTag(Tag):
|
||||
__keyword__ = 'inherit'
|
||||
|
||||
def __init__(self, keyword, attributes, **kwargs):
|
||||
super(InheritTag, self).__init__(
|
||||
keyword, attributes,
|
||||
('file',), (), ('file',), **kwargs)
|
||||
|
||||
class PageTag(Tag):
|
||||
__keyword__ = 'page'
|
||||
|
||||
def __init__(self, keyword, attributes, **kwargs):
|
||||
expressions = ['cached', 'args', 'expression_filter', 'enable_loop'] + [
|
||||
c for c in attributes if c.startswith('cache_')]
|
||||
|
||||
super(PageTag, self).__init__(
|
||||
keyword,
|
||||
attributes,
|
||||
expressions,
|
||||
(),
|
||||
(),
|
||||
**kwargs)
|
||||
self.body_decl = ast.FunctionArgs(attributes.get('args', ''),
|
||||
**self.exception_kwargs)
|
||||
self.filter_args = ast.ArgumentList(
|
||||
attributes.get('expression_filter', ''),
|
||||
**self.exception_kwargs)
|
||||
|
||||
def declared_identifiers(self):
|
||||
return self.body_decl.allargnames
|
||||
|
||||
|
|
@ -0,0 +1,299 @@
|
|||
# mako/pygen.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
"""utilities for generating and formatting literal Python code."""
|
||||
|
||||
import re
|
||||
from mako import exceptions
|
||||
|
||||
class PythonPrinter(object):
|
||||
def __init__(self, stream):
|
||||
# indentation counter
|
||||
self.indent = 0
|
||||
|
||||
# a stack storing information about why we incremented
|
||||
# the indentation counter, to help us determine if we
|
||||
# should decrement it
|
||||
self.indent_detail = []
|
||||
|
||||
# the string of whitespace multiplied by the indent
|
||||
# counter to produce a line
|
||||
self.indentstring = " "
|
||||
|
||||
# the stream we are writing to
|
||||
self.stream = stream
|
||||
|
||||
# current line number
|
||||
self.lineno = 1
|
||||
|
||||
# a list of lines that represents a buffered "block" of code,
|
||||
# which can be later printed relative to an indent level
|
||||
self.line_buffer = []
|
||||
|
||||
self.in_indent_lines = False
|
||||
|
||||
self._reset_multi_line_flags()
|
||||
|
||||
# mapping of generated python lines to template
|
||||
# source lines
|
||||
self.source_map = {}
|
||||
|
||||
def _update_lineno(self, num):
|
||||
self.lineno += num
|
||||
|
||||
def start_source(self, lineno):
|
||||
if self.lineno not in self.source_map:
|
||||
self.source_map[self.lineno] = lineno
|
||||
|
||||
def write_blanks(self, num):
|
||||
self.stream.write("\n" * num)
|
||||
self._update_lineno(num)
|
||||
|
||||
def write_indented_block(self, block):
|
||||
"""print a line or lines of python which already contain indentation.
|
||||
|
||||
The indentation of the total block of lines will be adjusted to that of
|
||||
the current indent level."""
|
||||
self.in_indent_lines = False
|
||||
for l in re.split(r'\r?\n', block):
|
||||
self.line_buffer.append(l)
|
||||
self._update_lineno(1)
|
||||
|
||||
def writelines(self, *lines):
|
||||
"""print a series of lines of python."""
|
||||
for line in lines:
|
||||
self.writeline(line)
|
||||
|
||||
def writeline(self, line):
|
||||
"""print a line of python, indenting it according to the current
|
||||
indent level.
|
||||
|
||||
this also adjusts the indentation counter according to the
|
||||
content of the line.
|
||||
|
||||
"""
|
||||
|
||||
if not self.in_indent_lines:
|
||||
self._flush_adjusted_lines()
|
||||
self.in_indent_lines = True
|
||||
|
||||
if (line is None or
|
||||
re.match(r"^\s*#",line) or
|
||||
re.match(r"^\s*$", line)
|
||||
):
|
||||
hastext = False
|
||||
else:
|
||||
hastext = True
|
||||
|
||||
is_comment = line and len(line) and line[0] == '#'
|
||||
|
||||
# see if this line should decrease the indentation level
|
||||
if (not is_comment and
|
||||
(not hastext or self._is_unindentor(line))
|
||||
):
|
||||
|
||||
if self.indent > 0:
|
||||
self.indent -= 1
|
||||
# if the indent_detail stack is empty, the user
|
||||
# probably put extra closures - the resulting
|
||||
# module wont compile.
|
||||
if len(self.indent_detail) == 0:
|
||||
raise exceptions.SyntaxException(
|
||||
"Too many whitespace closures")
|
||||
self.indent_detail.pop()
|
||||
|
||||
if line is None:
|
||||
return
|
||||
|
||||
# write the line
|
||||
self.stream.write(self._indent_line(line) + "\n")
|
||||
self._update_lineno(len(line.split("\n")))
|
||||
|
||||
# see if this line should increase the indentation level.
|
||||
# note that a line can both decrase (before printing) and
|
||||
# then increase (after printing) the indentation level.
|
||||
|
||||
if re.search(r":[ \t]*(?:#.*)?$", line):
|
||||
# increment indentation count, and also
|
||||
# keep track of what the keyword was that indented us,
|
||||
# if it is a python compound statement keyword
|
||||
# where we might have to look for an "unindent" keyword
|
||||
match = re.match(r"^\s*(if|try|elif|while|for|with)", line)
|
||||
if match:
|
||||
# its a "compound" keyword, so we will check for "unindentors"
|
||||
indentor = match.group(1)
|
||||
self.indent += 1
|
||||
self.indent_detail.append(indentor)
|
||||
else:
|
||||
indentor = None
|
||||
# its not a "compound" keyword. but lets also
|
||||
# test for valid Python keywords that might be indenting us,
|
||||
# else assume its a non-indenting line
|
||||
m2 = re.match(r"^\s*(def|class|else|elif|except|finally)",
|
||||
line)
|
||||
if m2:
|
||||
self.indent += 1
|
||||
self.indent_detail.append(indentor)
|
||||
|
||||
def close(self):
|
||||
"""close this printer, flushing any remaining lines."""
|
||||
self._flush_adjusted_lines()
|
||||
|
||||
def _is_unindentor(self, line):
|
||||
"""return true if the given line is an 'unindentor',
|
||||
relative to the last 'indent' event received.
|
||||
|
||||
"""
|
||||
|
||||
# no indentation detail has been pushed on; return False
|
||||
if len(self.indent_detail) == 0:
|
||||
return False
|
||||
|
||||
indentor = self.indent_detail[-1]
|
||||
|
||||
# the last indent keyword we grabbed is not a
|
||||
# compound statement keyword; return False
|
||||
if indentor is None:
|
||||
return False
|
||||
|
||||
# if the current line doesnt have one of the "unindentor" keywords,
|
||||
# return False
|
||||
match = re.match(r"^\s*(else|elif|except|finally).*\:", line)
|
||||
if not match:
|
||||
return False
|
||||
|
||||
# whitespace matches up, we have a compound indentor,
|
||||
# and this line has an unindentor, this
|
||||
# is probably good enough
|
||||
return True
|
||||
|
||||
# should we decide that its not good enough, heres
|
||||
# more stuff to check.
|
||||
#keyword = match.group(1)
|
||||
|
||||
# match the original indent keyword
|
||||
#for crit in [
|
||||
# (r'if|elif', r'else|elif'),
|
||||
# (r'try', r'except|finally|else'),
|
||||
# (r'while|for', r'else'),
|
||||
#]:
|
||||
# if re.match(crit[0], indentor) and re.match(crit[1], keyword):
|
||||
# return True
|
||||
|
||||
#return False
|
||||
|
||||
def _indent_line(self, line, stripspace=''):
|
||||
"""indent the given line according to the current indent level.
|
||||
|
||||
stripspace is a string of space that will be truncated from the
|
||||
start of the line before indenting."""
|
||||
|
||||
return re.sub(r"^%s" % stripspace, self.indentstring
|
||||
* self.indent, line)
|
||||
|
||||
def _reset_multi_line_flags(self):
|
||||
"""reset the flags which would indicate we are in a backslashed
|
||||
or triple-quoted section."""
|
||||
|
||||
self.backslashed, self.triplequoted = False, False
|
||||
|
||||
def _in_multi_line(self, line):
|
||||
"""return true if the given line is part of a multi-line block,
|
||||
via backslash or triple-quote."""
|
||||
|
||||
# we are only looking for explicitly joined lines here, not
|
||||
# implicit ones (i.e. brackets, braces etc.). this is just to
|
||||
# guard against the possibility of modifying the space inside of
|
||||
# a literal multiline string with unfortunately placed
|
||||
# whitespace
|
||||
|
||||
current_state = (self.backslashed or self.triplequoted)
|
||||
|
||||
if re.search(r"\\$", line):
|
||||
self.backslashed = True
|
||||
else:
|
||||
self.backslashed = False
|
||||
|
||||
triples = len(re.findall(r"\"\"\"|\'\'\'", line))
|
||||
if triples == 1 or triples % 2 != 0:
|
||||
self.triplequoted = not self.triplequoted
|
||||
|
||||
return current_state
|
||||
|
||||
def _flush_adjusted_lines(self):
|
||||
stripspace = None
|
||||
self._reset_multi_line_flags()
|
||||
|
||||
for entry in self.line_buffer:
|
||||
if self._in_multi_line(entry):
|
||||
self.stream.write(entry + "\n")
|
||||
else:
|
||||
entry = entry.expandtabs()
|
||||
if stripspace is None and re.search(r"^[ \t]*[^# \t]", entry):
|
||||
stripspace = re.match(r"^([ \t]*)", entry).group(1)
|
||||
self.stream.write(self._indent_line(entry, stripspace) + "\n")
|
||||
|
||||
self.line_buffer = []
|
||||
self._reset_multi_line_flags()
|
||||
|
||||
|
||||
def adjust_whitespace(text):
|
||||
"""remove the left-whitespace margin of a block of Python code."""
|
||||
|
||||
state = [False, False]
|
||||
(backslashed, triplequoted) = (0, 1)
|
||||
|
||||
def in_multi_line(line):
|
||||
start_state = (state[backslashed] or state[triplequoted])
|
||||
|
||||
if re.search(r"\\$", line):
|
||||
state[backslashed] = True
|
||||
else:
|
||||
state[backslashed] = False
|
||||
|
||||
def match(reg, t):
|
||||
m = re.match(reg, t)
|
||||
if m:
|
||||
return m, t[len(m.group(0)):]
|
||||
else:
|
||||
return None, t
|
||||
|
||||
while line:
|
||||
if state[triplequoted]:
|
||||
m, line = match(r"%s" % state[triplequoted], line)
|
||||
if m:
|
||||
state[triplequoted] = False
|
||||
else:
|
||||
m, line = match(r".*?(?=%s|$)" % state[triplequoted], line)
|
||||
else:
|
||||
m, line = match(r'#', line)
|
||||
if m:
|
||||
return start_state
|
||||
|
||||
m, line = match(r"\"\"\"|\'\'\'", line)
|
||||
if m:
|
||||
state[triplequoted] = m.group(0)
|
||||
continue
|
||||
|
||||
m, line = match(r".*?(?=\"\"\"|\'\'\'|#|$)", line)
|
||||
|
||||
return start_state
|
||||
|
||||
def _indent_line(line, stripspace=''):
|
||||
return re.sub(r"^%s" % stripspace, '', line)
|
||||
|
||||
lines = []
|
||||
stripspace = None
|
||||
|
||||
for line in re.split(r'\r?\n', text):
|
||||
if in_multi_line(line):
|
||||
lines.append(line)
|
||||
else:
|
||||
line = line.expandtabs()
|
||||
if stripspace is None and re.search(r"^[ \t]*[^# \t]", line):
|
||||
stripspace = re.match(r"^([ \t]*)", line).group(1)
|
||||
lines.append(_indent_line(line, stripspace))
|
||||
return "\n".join(lines)
|
|
@ -0,0 +1,232 @@
|
|||
# mako/pyparser.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
"""Handles parsing of Python code.
|
||||
|
||||
Parsing to AST is done via _ast on Python > 2.5, otherwise the compiler
|
||||
module is used.
|
||||
"""
|
||||
|
||||
from mako import exceptions, util, compat
|
||||
from mako.compat import arg_stringname
|
||||
import operator
|
||||
|
||||
if compat.py3k:
|
||||
# words that cannot be assigned to (notably
|
||||
# smaller than the total keys in __builtins__)
|
||||
reserved = set(['True', 'False', 'None', 'print'])
|
||||
|
||||
# the "id" attribute on a function node
|
||||
arg_id = operator.attrgetter('arg')
|
||||
else:
|
||||
# words that cannot be assigned to (notably
|
||||
# smaller than the total keys in __builtins__)
|
||||
reserved = set(['True', 'False', 'None'])
|
||||
|
||||
# the "id" attribute on a function node
|
||||
arg_id = operator.attrgetter('id')
|
||||
|
||||
import _ast
|
||||
util.restore__ast(_ast)
|
||||
from mako import _ast_util
|
||||
|
||||
|
||||
def parse(code, mode='exec', **exception_kwargs):
|
||||
"""Parse an expression into AST"""
|
||||
|
||||
try:
|
||||
return _ast_util.parse(code, '<unknown>', mode)
|
||||
except Exception:
|
||||
raise exceptions.SyntaxException(
|
||||
"(%s) %s (%r)" % (
|
||||
compat.exception_as().__class__.__name__,
|
||||
compat.exception_as(),
|
||||
code[0:50]
|
||||
), **exception_kwargs)
|
||||
|
||||
|
||||
class FindIdentifiers(_ast_util.NodeVisitor):
|
||||
|
||||
def __init__(self, listener, **exception_kwargs):
|
||||
self.in_function = False
|
||||
self.in_assign_targets = False
|
||||
self.local_ident_stack = set()
|
||||
self.listener = listener
|
||||
self.exception_kwargs = exception_kwargs
|
||||
|
||||
def _add_declared(self, name):
|
||||
if not self.in_function:
|
||||
self.listener.declared_identifiers.add(name)
|
||||
else:
|
||||
self.local_ident_stack.add(name)
|
||||
|
||||
def visit_ClassDef(self, node):
|
||||
self._add_declared(node.name)
|
||||
|
||||
def visit_Assign(self, node):
|
||||
|
||||
# flip around the visiting of Assign so the expression gets
|
||||
# evaluated first, in the case of a clause like "x=x+5" (x
|
||||
# is undeclared)
|
||||
|
||||
self.visit(node.value)
|
||||
in_a = self.in_assign_targets
|
||||
self.in_assign_targets = True
|
||||
for n in node.targets:
|
||||
self.visit(n)
|
||||
self.in_assign_targets = in_a
|
||||
|
||||
if compat.py3k:
|
||||
|
||||
# ExceptHandler is in Python 2, but this block only works in
|
||||
# Python 3 (and is required there)
|
||||
|
||||
def visit_ExceptHandler(self, node):
|
||||
if node.name is not None:
|
||||
self._add_declared(node.name)
|
||||
if node.type is not None:
|
||||
self.visit(node.type)
|
||||
for statement in node.body:
|
||||
self.visit(statement)
|
||||
|
||||
def visit_Lambda(self, node, *args):
|
||||
self._visit_function(node, True)
|
||||
|
||||
def visit_FunctionDef(self, node):
|
||||
self._add_declared(node.name)
|
||||
self._visit_function(node, False)
|
||||
|
||||
def _expand_tuples(self, args):
|
||||
for arg in args:
|
||||
if isinstance(arg, _ast.Tuple):
|
||||
for n in arg.elts:
|
||||
yield n
|
||||
else:
|
||||
yield arg
|
||||
|
||||
def _visit_function(self, node, islambda):
|
||||
|
||||
# push function state onto stack. dont log any more
|
||||
# identifiers as "declared" until outside of the function,
|
||||
# but keep logging identifiers as "undeclared". track
|
||||
# argument names in each function header so they arent
|
||||
# counted as "undeclared"
|
||||
|
||||
inf = self.in_function
|
||||
self.in_function = True
|
||||
|
||||
local_ident_stack = self.local_ident_stack
|
||||
self.local_ident_stack = local_ident_stack.union([
|
||||
arg_id(arg) for arg in self._expand_tuples(node.args.args)
|
||||
])
|
||||
if islambda:
|
||||
self.visit(node.body)
|
||||
else:
|
||||
for n in node.body:
|
||||
self.visit(n)
|
||||
self.in_function = inf
|
||||
self.local_ident_stack = local_ident_stack
|
||||
|
||||
def visit_For(self, node):
|
||||
|
||||
# flip around visit
|
||||
|
||||
self.visit(node.iter)
|
||||
self.visit(node.target)
|
||||
for statement in node.body:
|
||||
self.visit(statement)
|
||||
for statement in node.orelse:
|
||||
self.visit(statement)
|
||||
|
||||
def visit_Name(self, node):
|
||||
if isinstance(node.ctx, _ast.Store):
|
||||
# this is eqiuvalent to visit_AssName in
|
||||
# compiler
|
||||
self._add_declared(node.id)
|
||||
elif node.id not in reserved and node.id \
|
||||
not in self.listener.declared_identifiers and node.id \
|
||||
not in self.local_ident_stack:
|
||||
self.listener.undeclared_identifiers.add(node.id)
|
||||
|
||||
def visit_Import(self, node):
|
||||
for name in node.names:
|
||||
if name.asname is not None:
|
||||
self._add_declared(name.asname)
|
||||
else:
|
||||
self._add_declared(name.name.split('.')[0])
|
||||
|
||||
def visit_ImportFrom(self, node):
|
||||
for name in node.names:
|
||||
if name.asname is not None:
|
||||
self._add_declared(name.asname)
|
||||
else:
|
||||
if name.name == '*':
|
||||
raise exceptions.CompileException(
|
||||
"'import *' is not supported, since all identifier "
|
||||
"names must be explicitly declared. Please use the "
|
||||
"form 'from <modulename> import <name1>, <name2>, "
|
||||
"...' instead.", **self.exception_kwargs)
|
||||
self._add_declared(name.name)
|
||||
|
||||
|
||||
class FindTuple(_ast_util.NodeVisitor):
|
||||
|
||||
def __init__(self, listener, code_factory, **exception_kwargs):
|
||||
self.listener = listener
|
||||
self.exception_kwargs = exception_kwargs
|
||||
self.code_factory = code_factory
|
||||
|
||||
def visit_Tuple(self, node):
|
||||
for n in node.elts:
|
||||
p = self.code_factory(n, **self.exception_kwargs)
|
||||
self.listener.codeargs.append(p)
|
||||
self.listener.args.append(ExpressionGenerator(n).value())
|
||||
self.listener.declared_identifiers = \
|
||||
self.listener.declared_identifiers.union(
|
||||
p.declared_identifiers)
|
||||
self.listener.undeclared_identifiers = \
|
||||
self.listener.undeclared_identifiers.union(
|
||||
p.undeclared_identifiers)
|
||||
|
||||
|
||||
class ParseFunc(_ast_util.NodeVisitor):
|
||||
|
||||
def __init__(self, listener, **exception_kwargs):
|
||||
self.listener = listener
|
||||
self.exception_kwargs = exception_kwargs
|
||||
|
||||
def visit_FunctionDef(self, node):
|
||||
self.listener.funcname = node.name
|
||||
|
||||
argnames = [arg_id(arg) for arg in node.args.args]
|
||||
if node.args.vararg:
|
||||
argnames.append(arg_stringname(node.args.vararg))
|
||||
|
||||
if compat.py2k:
|
||||
# kw-only args don't exist in Python 2
|
||||
kwargnames = []
|
||||
else:
|
||||
kwargnames = [arg_id(arg) for arg in node.args.kwonlyargs]
|
||||
if node.args.kwarg:
|
||||
kwargnames.append(arg_stringname(node.args.kwarg))
|
||||
self.listener.argnames = argnames
|
||||
self.listener.defaults = node.args.defaults # ast
|
||||
self.listener.kwargnames = kwargnames
|
||||
if compat.py2k:
|
||||
self.listener.kwdefaults = []
|
||||
else:
|
||||
self.listener.kwdefaults = node.args.kw_defaults
|
||||
self.listener.varargs = node.args.vararg
|
||||
self.listener.kwargs = node.args.kwarg
|
||||
|
||||
class ExpressionGenerator(object):
|
||||
|
||||
def __init__(self, astnode):
|
||||
self.generator = _ast_util.SourceGenerator(' ' * 4)
|
||||
self.generator.visit(astnode)
|
||||
|
||||
def value(self):
|
||||
return ''.join(self.generator.result)
|
|
@ -0,0 +1,878 @@
|
|||
# mako/runtime.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
"""provides runtime services for templates, including Context,
|
||||
Namespace, and various helper functions."""
|
||||
|
||||
from mako import exceptions, util, compat
|
||||
from mako.compat import compat_builtins
|
||||
import sys
|
||||
|
||||
|
||||
class Context(object):
|
||||
"""Provides runtime namespace, output buffer, and various
|
||||
callstacks for templates.
|
||||
|
||||
See :ref:`runtime_toplevel` for detail on the usage of
|
||||
:class:`.Context`.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, buffer, **data):
|
||||
self._buffer_stack = [buffer]
|
||||
|
||||
self._data = data
|
||||
|
||||
self._kwargs = data.copy()
|
||||
self._with_template = None
|
||||
self._outputting_as_unicode = None
|
||||
self.namespaces = {}
|
||||
|
||||
# "capture" function which proxies to the
|
||||
# generic "capture" function
|
||||
self._data['capture'] = compat.partial(capture, self)
|
||||
|
||||
# "caller" stack used by def calls with content
|
||||
self.caller_stack = self._data['caller'] = CallerStack()
|
||||
|
||||
def _set_with_template(self, t):
|
||||
self._with_template = t
|
||||
illegal_names = t.reserved_names.intersection(self._data)
|
||||
if illegal_names:
|
||||
raise exceptions.NameConflictError(
|
||||
"Reserved words passed to render(): %s" %
|
||||
", ".join(illegal_names))
|
||||
|
||||
@property
|
||||
def lookup(self):
|
||||
"""Return the :class:`.TemplateLookup` associated
|
||||
with this :class:`.Context`.
|
||||
|
||||
"""
|
||||
return self._with_template.lookup
|
||||
|
||||
@property
|
||||
def kwargs(self):
|
||||
"""Return the dictionary of top level keyword arguments associated
|
||||
with this :class:`.Context`.
|
||||
|
||||
This dictionary only includes the top-level arguments passed to
|
||||
:meth:`.Template.render`. It does not include names produced within
|
||||
the template execution such as local variable names or special names
|
||||
such as ``self``, ``next``, etc.
|
||||
|
||||
The purpose of this dictionary is primarily for the case that
|
||||
a :class:`.Template` accepts arguments via its ``<%page>`` tag,
|
||||
which are normally expected to be passed via :meth:`.Template.render`,
|
||||
except the template is being called in an inheritance context,
|
||||
using the ``body()`` method. :attr:`.Context.kwargs` can then be
|
||||
used to propagate these arguments to the inheriting template::
|
||||
|
||||
${next.body(**context.kwargs)}
|
||||
|
||||
"""
|
||||
return self._kwargs.copy()
|
||||
|
||||
def push_caller(self, caller):
|
||||
"""Push a ``caller`` callable onto the callstack for
|
||||
this :class:`.Context`."""
|
||||
|
||||
|
||||
self.caller_stack.append(caller)
|
||||
|
||||
def pop_caller(self):
|
||||
"""Pop a ``caller`` callable onto the callstack for this
|
||||
:class:`.Context`."""
|
||||
|
||||
del self.caller_stack[-1]
|
||||
|
||||
def keys(self):
|
||||
"""Return a list of all names established in this :class:`.Context`."""
|
||||
|
||||
return list(self._data.keys())
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key in self._data:
|
||||
return self._data[key]
|
||||
else:
|
||||
return compat_builtins.__dict__[key]
|
||||
|
||||
def _push_writer(self):
|
||||
"""push a capturing buffer onto this Context and return
|
||||
the new writer function."""
|
||||
|
||||
buf = util.FastEncodingBuffer()
|
||||
self._buffer_stack.append(buf)
|
||||
return buf.write
|
||||
|
||||
def _pop_buffer_and_writer(self):
|
||||
"""pop the most recent capturing buffer from this Context
|
||||
and return the current writer after the pop.
|
||||
|
||||
"""
|
||||
|
||||
buf = self._buffer_stack.pop()
|
||||
return buf, self._buffer_stack[-1].write
|
||||
|
||||
def _push_buffer(self):
|
||||
"""push a capturing buffer onto this Context."""
|
||||
|
||||
self._push_writer()
|
||||
|
||||
def _pop_buffer(self):
|
||||
"""pop the most recent capturing buffer from this Context."""
|
||||
|
||||
return self._buffer_stack.pop()
|
||||
|
||||
def get(self, key, default=None):
|
||||
"""Return a value from this :class:`.Context`."""
|
||||
|
||||
return self._data.get(key, compat_builtins.__dict__.get(key, default))
|
||||
|
||||
def write(self, string):
|
||||
"""Write a string to this :class:`.Context` object's
|
||||
underlying output buffer."""
|
||||
|
||||
self._buffer_stack[-1].write(string)
|
||||
|
||||
def writer(self):
|
||||
"""Return the current writer function."""
|
||||
|
||||
return self._buffer_stack[-1].write
|
||||
|
||||
def _copy(self):
|
||||
c = Context.__new__(Context)
|
||||
c._buffer_stack = self._buffer_stack
|
||||
c._data = self._data.copy()
|
||||
c._kwargs = self._kwargs
|
||||
c._with_template = self._with_template
|
||||
c._outputting_as_unicode = self._outputting_as_unicode
|
||||
c.namespaces = self.namespaces
|
||||
c.caller_stack = self.caller_stack
|
||||
return c
|
||||
|
||||
def _locals(self, d):
|
||||
"""Create a new :class:`.Context` with a copy of this
|
||||
:class:`.Context`'s current state,
|
||||
updated with the given dictionary.
|
||||
|
||||
The :attr:`.Context.kwargs` collection remains
|
||||
unaffected.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
if not d:
|
||||
return self
|
||||
c = self._copy()
|
||||
c._data.update(d)
|
||||
return c
|
||||
|
||||
def _clean_inheritance_tokens(self):
|
||||
"""create a new copy of this :class:`.Context`. with
|
||||
tokens related to inheritance state removed."""
|
||||
|
||||
c = self._copy()
|
||||
x = c._data
|
||||
x.pop('self', None)
|
||||
x.pop('parent', None)
|
||||
x.pop('next', None)
|
||||
return c
|
||||
|
||||
class CallerStack(list):
|
||||
def __init__(self):
|
||||
self.nextcaller = None
|
||||
|
||||
def __nonzero__(self):
|
||||
return self.__bool__()
|
||||
|
||||
def __bool__(self):
|
||||
return len(self) and self._get_caller() and True or False
|
||||
|
||||
def _get_caller(self):
|
||||
# this method can be removed once
|
||||
# codegen MAGIC_NUMBER moves past 7
|
||||
return self[-1]
|
||||
|
||||
def __getattr__(self, key):
|
||||
return getattr(self._get_caller(), key)
|
||||
|
||||
def _push_frame(self):
|
||||
frame = self.nextcaller or None
|
||||
self.append(frame)
|
||||
self.nextcaller = None
|
||||
return frame
|
||||
|
||||
def _pop_frame(self):
|
||||
self.nextcaller = self.pop()
|
||||
|
||||
|
||||
class Undefined(object):
|
||||
"""Represents an undefined value in a template.
|
||||
|
||||
All template modules have a constant value
|
||||
``UNDEFINED`` present which is an instance of this
|
||||
object.
|
||||
|
||||
"""
|
||||
def __str__(self):
|
||||
raise NameError("Undefined")
|
||||
|
||||
def __nonzero__(self):
|
||||
return self.__bool__()
|
||||
|
||||
def __bool__(self):
|
||||
return False
|
||||
|
||||
UNDEFINED = Undefined()
|
||||
|
||||
class LoopStack(object):
|
||||
"""a stack for LoopContexts that implements the context manager protocol
|
||||
to automatically pop off the top of the stack on context exit
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.stack = []
|
||||
|
||||
def _enter(self, iterable):
|
||||
self._push(iterable)
|
||||
return self._top
|
||||
|
||||
def _exit(self):
|
||||
self._pop()
|
||||
return self._top
|
||||
|
||||
@property
|
||||
def _top(self):
|
||||
if self.stack:
|
||||
return self.stack[-1]
|
||||
else:
|
||||
return self
|
||||
|
||||
def _pop(self):
|
||||
return self.stack.pop()
|
||||
|
||||
def _push(self, iterable):
|
||||
new = LoopContext(iterable)
|
||||
if self.stack:
|
||||
new.parent = self.stack[-1]
|
||||
return self.stack.append(new)
|
||||
|
||||
def __getattr__(self, key):
|
||||
raise exceptions.RuntimeException("No loop context is established")
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._top)
|
||||
|
||||
|
||||
class LoopContext(object):
|
||||
"""A magic loop variable.
|
||||
Automatically accessible in any ``% for`` block.
|
||||
|
||||
See the section :ref:`loop_context` for usage
|
||||
notes.
|
||||
|
||||
:attr:`parent` -> :class:`.LoopContext` or ``None``
|
||||
The parent loop, if one exists.
|
||||
:attr:`index` -> `int`
|
||||
The 0-based iteration count.
|
||||
:attr:`reverse_index` -> `int`
|
||||
The number of iterations remaining.
|
||||
:attr:`first` -> `bool`
|
||||
``True`` on the first iteration, ``False`` otherwise.
|
||||
:attr:`last` -> `bool`
|
||||
``True`` on the last iteration, ``False`` otherwise.
|
||||
:attr:`even` -> `bool`
|
||||
``True`` when ``index`` is even.
|
||||
:attr:`odd` -> `bool`
|
||||
``True`` when ``index`` is odd.
|
||||
"""
|
||||
|
||||
def __init__(self, iterable):
|
||||
self._iterable = iterable
|
||||
self.index = 0
|
||||
self.parent = None
|
||||
|
||||
def __iter__(self):
|
||||
for i in self._iterable:
|
||||
yield i
|
||||
self.index += 1
|
||||
|
||||
@util.memoized_instancemethod
|
||||
def __len__(self):
|
||||
return len(self._iterable)
|
||||
|
||||
@property
|
||||
def reverse_index(self):
|
||||
return len(self) - self.index - 1
|
||||
|
||||
@property
|
||||
def first(self):
|
||||
return self.index == 0
|
||||
|
||||
@property
|
||||
def last(self):
|
||||
return self.index == len(self) - 1
|
||||
|
||||
@property
|
||||
def even(self):
|
||||
return not self.odd
|
||||
|
||||
@property
|
||||
def odd(self):
|
||||
return bool(self.index % 2)
|
||||
|
||||
def cycle(self, *values):
|
||||
"""Cycle through values as the loop progresses.
|
||||
"""
|
||||
if not values:
|
||||
raise ValueError("You must provide values to cycle through")
|
||||
return values[self.index % len(values)]
|
||||
|
||||
|
||||
class _NSAttr(object):
|
||||
def __init__(self, parent):
|
||||
self.__parent = parent
|
||||
def __getattr__(self, key):
|
||||
ns = self.__parent
|
||||
while ns:
|
||||
if hasattr(ns.module, key):
|
||||
return getattr(ns.module, key)
|
||||
else:
|
||||
ns = ns.inherits
|
||||
raise AttributeError(key)
|
||||
|
||||
class Namespace(object):
|
||||
"""Provides access to collections of rendering methods, which
|
||||
can be local, from other templates, or from imported modules.
|
||||
|
||||
To access a particular rendering method referenced by a
|
||||
:class:`.Namespace`, use plain attribute access:
|
||||
|
||||
.. sourcecode:: mako
|
||||
|
||||
${some_namespace.foo(x, y, z)}
|
||||
|
||||
:class:`.Namespace` also contains several built-in attributes
|
||||
described here.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, name, context,
|
||||
callables=None, inherits=None,
|
||||
populate_self=True, calling_uri=None):
|
||||
self.name = name
|
||||
self.context = context
|
||||
self.inherits = inherits
|
||||
if callables is not None:
|
||||
self.callables = dict([(c.__name__, c) for c in callables])
|
||||
|
||||
callables = ()
|
||||
|
||||
module = None
|
||||
"""The Python module referenced by this :class:`.Namespace`.
|
||||
|
||||
If the namespace references a :class:`.Template`, then
|
||||
this module is the equivalent of ``template.module``,
|
||||
i.e. the generated module for the template.
|
||||
|
||||
"""
|
||||
|
||||
template = None
|
||||
"""The :class:`.Template` object referenced by this
|
||||
:class:`.Namespace`, if any.
|
||||
|
||||
"""
|
||||
|
||||
context = None
|
||||
"""The :class:`.Context` object for this :class:`.Namespace`.
|
||||
|
||||
Namespaces are often created with copies of contexts that
|
||||
contain slightly different data, particularly in inheritance
|
||||
scenarios. Using the :class:`.Context` off of a :class:`.Namespace` one
|
||||
can traverse an entire chain of templates that inherit from
|
||||
one-another.
|
||||
|
||||
"""
|
||||
|
||||
filename = None
|
||||
"""The path of the filesystem file used for this
|
||||
:class:`.Namespace`'s module or template.
|
||||
|
||||
If this is a pure module-based
|
||||
:class:`.Namespace`, this evaluates to ``module.__file__``. If a
|
||||
template-based namespace, it evaluates to the original
|
||||
template file location.
|
||||
|
||||
"""
|
||||
|
||||
uri = None
|
||||
"""The URI for this :class:`.Namespace`'s template.
|
||||
|
||||
I.e. whatever was sent to :meth:`.TemplateLookup.get_template()`.
|
||||
|
||||
This is the equivalent of :attr:`.Template.uri`.
|
||||
|
||||
"""
|
||||
|
||||
_templateuri = None
|
||||
|
||||
@util.memoized_property
|
||||
def attr(self):
|
||||
"""Access module level attributes by name.
|
||||
|
||||
This accessor allows templates to supply "scalar"
|
||||
attributes which are particularly handy in inheritance
|
||||
relationships.
|
||||
|
||||
.. seealso::
|
||||
|
||||
:ref:`inheritance_attr`
|
||||
|
||||
:ref:`namespace_attr_for_includes`
|
||||
|
||||
"""
|
||||
return _NSAttr(self)
|
||||
|
||||
def get_namespace(self, uri):
|
||||
"""Return a :class:`.Namespace` corresponding to the given ``uri``.
|
||||
|
||||
If the given ``uri`` is a relative URI (i.e. it does not
|
||||
contain a leading slash ``/``), the ``uri`` is adjusted to
|
||||
be relative to the ``uri`` of the namespace itself. This
|
||||
method is therefore mostly useful off of the built-in
|
||||
``local`` namespace, described in :ref:`namespace_local`.
|
||||
|
||||
In
|
||||
most cases, a template wouldn't need this function, and
|
||||
should instead use the ``<%namespace>`` tag to load
|
||||
namespaces. However, since all ``<%namespace>`` tags are
|
||||
evaluated before the body of a template ever runs,
|
||||
this method can be used to locate namespaces using
|
||||
expressions that were generated within the body code of
|
||||
the template, or to conditionally use a particular
|
||||
namespace.
|
||||
|
||||
"""
|
||||
key = (self, uri)
|
||||
if key in self.context.namespaces:
|
||||
return self.context.namespaces[key]
|
||||
else:
|
||||
ns = TemplateNamespace(uri, self.context._copy(),
|
||||
templateuri=uri,
|
||||
calling_uri=self._templateuri)
|
||||
self.context.namespaces[key] = ns
|
||||
return ns
|
||||
|
||||
def get_template(self, uri):
|
||||
"""Return a :class:`.Template` from the given ``uri``.
|
||||
|
||||
The ``uri`` resolution is relative to the ``uri`` of this
|
||||
:class:`.Namespace` object's :class:`.Template`.
|
||||
|
||||
"""
|
||||
return _lookup_template(self.context, uri, self._templateuri)
|
||||
|
||||
def get_cached(self, key, **kwargs):
|
||||
"""Return a value from the :class:`.Cache` referenced by this
|
||||
:class:`.Namespace` object's :class:`.Template`.
|
||||
|
||||
The advantage to this method versus direct access to the
|
||||
:class:`.Cache` is that the configuration parameters
|
||||
declared in ``<%page>`` take effect here, thereby calling
|
||||
up the same configured backend as that configured
|
||||
by ``<%page>``.
|
||||
|
||||
"""
|
||||
|
||||
return self.cache.get(key, **kwargs)
|
||||
|
||||
@property
|
||||
def cache(self):
|
||||
"""Return the :class:`.Cache` object referenced
|
||||
by this :class:`.Namespace` object's
|
||||
:class:`.Template`.
|
||||
|
||||
"""
|
||||
return self.template.cache
|
||||
|
||||
def include_file(self, uri, **kwargs):
|
||||
"""Include a file at the given ``uri``."""
|
||||
|
||||
_include_file(self.context, uri, self._templateuri, **kwargs)
|
||||
|
||||
def _populate(self, d, l):
|
||||
for ident in l:
|
||||
if ident == '*':
|
||||
for (k, v) in self._get_star():
|
||||
d[k] = v
|
||||
else:
|
||||
d[ident] = getattr(self, ident)
|
||||
|
||||
def _get_star(self):
|
||||
if self.callables:
|
||||
for key in self.callables:
|
||||
yield (key, self.callables[key])
|
||||
|
||||
def __getattr__(self, key):
|
||||
if key in self.callables:
|
||||
val = self.callables[key]
|
||||
elif self.inherits:
|
||||
val = getattr(self.inherits, key)
|
||||
else:
|
||||
raise AttributeError(
|
||||
"Namespace '%s' has no member '%s'" %
|
||||
(self.name, key))
|
||||
setattr(self, key, val)
|
||||
return val
|
||||
|
||||
class TemplateNamespace(Namespace):
|
||||
"""A :class:`.Namespace` specific to a :class:`.Template` instance."""
|
||||
|
||||
def __init__(self, name, context, template=None, templateuri=None,
|
||||
callables=None, inherits=None,
|
||||
populate_self=True, calling_uri=None):
|
||||
self.name = name
|
||||
self.context = context
|
||||
self.inherits = inherits
|
||||
if callables is not None:
|
||||
self.callables = dict([(c.__name__, c) for c in callables])
|
||||
|
||||
if templateuri is not None:
|
||||
self.template = _lookup_template(context, templateuri,
|
||||
calling_uri)
|
||||
self._templateuri = self.template.module._template_uri
|
||||
elif template is not None:
|
||||
self.template = template
|
||||
self._templateuri = template.module._template_uri
|
||||
else:
|
||||
raise TypeError("'template' argument is required.")
|
||||
|
||||
if populate_self:
|
||||
lclcallable, lclcontext = \
|
||||
_populate_self_namespace(context, self.template,
|
||||
self_ns=self)
|
||||
|
||||
@property
|
||||
def module(self):
|
||||
"""The Python module referenced by this :class:`.Namespace`.
|
||||
|
||||
If the namespace references a :class:`.Template`, then
|
||||
this module is the equivalent of ``template.module``,
|
||||
i.e. the generated module for the template.
|
||||
|
||||
"""
|
||||
return self.template.module
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
"""The path of the filesystem file used for this
|
||||
:class:`.Namespace`'s module or template.
|
||||
"""
|
||||
return self.template.filename
|
||||
|
||||
@property
|
||||
def uri(self):
|
||||
"""The URI for this :class:`.Namespace`'s template.
|
||||
|
||||
I.e. whatever was sent to :meth:`.TemplateLookup.get_template()`.
|
||||
|
||||
This is the equivalent of :attr:`.Template.uri`.
|
||||
|
||||
"""
|
||||
return self.template.uri
|
||||
|
||||
def _get_star(self):
|
||||
if self.callables:
|
||||
for key in self.callables:
|
||||
yield (key, self.callables[key])
|
||||
def get(key):
|
||||
callable_ = self.template._get_def_callable(key)
|
||||
return compat.partial(callable_, self.context)
|
||||
for k in self.template.module._exports:
|
||||
yield (k, get(k))
|
||||
|
||||
def __getattr__(self, key):
|
||||
if key in self.callables:
|
||||
val = self.callables[key]
|
||||
elif self.template.has_def(key):
|
||||
callable_ = self.template._get_def_callable(key)
|
||||
val = compat.partial(callable_, self.context)
|
||||
elif self.inherits:
|
||||
val = getattr(self.inherits, key)
|
||||
|
||||
else:
|
||||
raise AttributeError(
|
||||
"Namespace '%s' has no member '%s'" %
|
||||
(self.name, key))
|
||||
setattr(self, key, val)
|
||||
return val
|
||||
|
||||
class ModuleNamespace(Namespace):
|
||||
"""A :class:`.Namespace` specific to a Python module instance."""
|
||||
|
||||
def __init__(self, name, context, module,
|
||||
callables=None, inherits=None,
|
||||
populate_self=True, calling_uri=None):
|
||||
self.name = name
|
||||
self.context = context
|
||||
self.inherits = inherits
|
||||
if callables is not None:
|
||||
self.callables = dict([(c.__name__, c) for c in callables])
|
||||
|
||||
mod = __import__(module)
|
||||
for token in module.split('.')[1:]:
|
||||
mod = getattr(mod, token)
|
||||
self.module = mod
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
"""The path of the filesystem file used for this
|
||||
:class:`.Namespace`'s module or template.
|
||||
"""
|
||||
return self.module.__file__
|
||||
|
||||
def _get_star(self):
|
||||
if self.callables:
|
||||
for key in self.callables:
|
||||
yield (key, self.callables[key])
|
||||
for key in dir(self.module):
|
||||
if key[0] != '_':
|
||||
callable_ = getattr(self.module, key)
|
||||
if compat.callable(callable_):
|
||||
yield key, compat.partial(callable_, self.context)
|
||||
|
||||
|
||||
def __getattr__(self, key):
|
||||
if key in self.callables:
|
||||
val = self.callables[key]
|
||||
elif hasattr(self.module, key):
|
||||
callable_ = getattr(self.module, key)
|
||||
val = compat.partial(callable_, self.context)
|
||||
elif self.inherits:
|
||||
val = getattr(self.inherits, key)
|
||||
else:
|
||||
raise AttributeError(
|
||||
"Namespace '%s' has no member '%s'" %
|
||||
(self.name, key))
|
||||
setattr(self, key, val)
|
||||
return val
|
||||
|
||||
def supports_caller(func):
|
||||
"""Apply a caller_stack compatibility decorator to a plain
|
||||
Python function.
|
||||
|
||||
See the example in :ref:`namespaces_python_modules`.
|
||||
|
||||
"""
|
||||
|
||||
def wrap_stackframe(context, *args, **kwargs):
|
||||
context.caller_stack._push_frame()
|
||||
try:
|
||||
return func(context, *args, **kwargs)
|
||||
finally:
|
||||
context.caller_stack._pop_frame()
|
||||
return wrap_stackframe
|
||||
|
||||
def capture(context, callable_, *args, **kwargs):
|
||||
"""Execute the given template def, capturing the output into
|
||||
a buffer.
|
||||
|
||||
See the example in :ref:`namespaces_python_modules`.
|
||||
|
||||
"""
|
||||
|
||||
if not compat.callable(callable_):
|
||||
raise exceptions.RuntimeException(
|
||||
"capture() function expects a callable as "
|
||||
"its argument (i.e. capture(func, *args, **kwargs))"
|
||||
)
|
||||
context._push_buffer()
|
||||
try:
|
||||
callable_(*args, **kwargs)
|
||||
finally:
|
||||
buf = context._pop_buffer()
|
||||
return buf.getvalue()
|
||||
|
||||
def _decorate_toplevel(fn):
|
||||
def decorate_render(render_fn):
|
||||
def go(context, *args, **kw):
|
||||
def y(*args, **kw):
|
||||
return render_fn(context, *args, **kw)
|
||||
try:
|
||||
y.__name__ = render_fn.__name__[7:]
|
||||
except TypeError:
|
||||
# < Python 2.4
|
||||
pass
|
||||
return fn(y)(context, *args, **kw)
|
||||
return go
|
||||
return decorate_render
|
||||
|
||||
def _decorate_inline(context, fn):
|
||||
def decorate_render(render_fn):
|
||||
dec = fn(render_fn)
|
||||
def go(*args, **kw):
|
||||
return dec(context, *args, **kw)
|
||||
return go
|
||||
return decorate_render
|
||||
|
||||
def _include_file(context, uri, calling_uri, **kwargs):
|
||||
"""locate the template from the given uri and include it in
|
||||
the current output."""
|
||||
|
||||
template = _lookup_template(context, uri, calling_uri)
|
||||
(callable_, ctx) = _populate_self_namespace(
|
||||
context._clean_inheritance_tokens(),
|
||||
template)
|
||||
callable_(ctx, **_kwargs_for_include(callable_, context._data, **kwargs))
|
||||
|
||||
def _inherit_from(context, uri, calling_uri):
|
||||
"""called by the _inherit method in template modules to set
|
||||
up the inheritance chain at the start of a template's
|
||||
execution."""
|
||||
|
||||
if uri is None:
|
||||
return None
|
||||
template = _lookup_template(context, uri, calling_uri)
|
||||
self_ns = context['self']
|
||||
ih = self_ns
|
||||
while ih.inherits is not None:
|
||||
ih = ih.inherits
|
||||
lclcontext = context._locals({'next': ih})
|
||||
ih.inherits = TemplateNamespace("self:%s" % template.uri,
|
||||
lclcontext,
|
||||
template=template,
|
||||
populate_self=False)
|
||||
context._data['parent'] = lclcontext._data['local'] = ih.inherits
|
||||
callable_ = getattr(template.module, '_mako_inherit', None)
|
||||
if callable_ is not None:
|
||||
ret = callable_(template, lclcontext)
|
||||
if ret:
|
||||
return ret
|
||||
|
||||
gen_ns = getattr(template.module, '_mako_generate_namespaces', None)
|
||||
if gen_ns is not None:
|
||||
gen_ns(context)
|
||||
return (template.callable_, lclcontext)
|
||||
|
||||
def _lookup_template(context, uri, relativeto):
|
||||
lookup = context._with_template.lookup
|
||||
if lookup is None:
|
||||
raise exceptions.TemplateLookupException(
|
||||
"Template '%s' has no TemplateLookup associated" %
|
||||
context._with_template.uri)
|
||||
uri = lookup.adjust_uri(uri, relativeto)
|
||||
try:
|
||||
return lookup.get_template(uri)
|
||||
except exceptions.TopLevelLookupException:
|
||||
raise exceptions.TemplateLookupException(str(compat.exception_as()))
|
||||
|
||||
def _populate_self_namespace(context, template, self_ns=None):
|
||||
if self_ns is None:
|
||||
self_ns = TemplateNamespace('self:%s' % template.uri,
|
||||
context, template=template,
|
||||
populate_self=False)
|
||||
context._data['self'] = context._data['local'] = self_ns
|
||||
if hasattr(template.module, '_mako_inherit'):
|
||||
ret = template.module._mako_inherit(template, context)
|
||||
if ret:
|
||||
return ret
|
||||
return (template.callable_, context)
|
||||
|
||||
def _render(template, callable_, args, data, as_unicode=False):
|
||||
"""create a Context and return the string
|
||||
output of the given template and template callable."""
|
||||
|
||||
if as_unicode:
|
||||
buf = util.FastEncodingBuffer(as_unicode=True)
|
||||
elif template.bytestring_passthrough:
|
||||
buf = compat.StringIO()
|
||||
else:
|
||||
buf = util.FastEncodingBuffer(
|
||||
as_unicode=as_unicode,
|
||||
encoding=template.output_encoding,
|
||||
errors=template.encoding_errors)
|
||||
context = Context(buf, **data)
|
||||
context._outputting_as_unicode = as_unicode
|
||||
context._set_with_template(template)
|
||||
|
||||
_render_context(template, callable_, context, *args,
|
||||
**_kwargs_for_callable(callable_, data))
|
||||
return context._pop_buffer().getvalue()
|
||||
|
||||
def _kwargs_for_callable(callable_, data):
|
||||
argspec = compat.inspect_func_args(callable_)
|
||||
# for normal pages, **pageargs is usually present
|
||||
if argspec[2]:
|
||||
return data
|
||||
|
||||
# for rendering defs from the top level, figure out the args
|
||||
namedargs = argspec[0] + [v for v in argspec[1:3] if v is not None]
|
||||
kwargs = {}
|
||||
for arg in namedargs:
|
||||
if arg != 'context' and arg in data and arg not in kwargs:
|
||||
kwargs[arg] = data[arg]
|
||||
return kwargs
|
||||
|
||||
def _kwargs_for_include(callable_, data, **kwargs):
|
||||
argspec = compat.inspect_func_args(callable_)
|
||||
namedargs = argspec[0] + [v for v in argspec[1:3] if v is not None]
|
||||
for arg in namedargs:
|
||||
if arg != 'context' and arg in data and arg not in kwargs:
|
||||
kwargs[arg] = data[arg]
|
||||
return kwargs
|
||||
|
||||
def _render_context(tmpl, callable_, context, *args, **kwargs):
|
||||
import mako.template as template
|
||||
# create polymorphic 'self' namespace for this
|
||||
# template with possibly updated context
|
||||
if not isinstance(tmpl, template.DefTemplate):
|
||||
# if main render method, call from the base of the inheritance stack
|
||||
(inherit, lclcontext) = _populate_self_namespace(context, tmpl)
|
||||
_exec_template(inherit, lclcontext, args=args, kwargs=kwargs)
|
||||
else:
|
||||
# otherwise, call the actual rendering method specified
|
||||
(inherit, lclcontext) = _populate_self_namespace(context, tmpl.parent)
|
||||
_exec_template(callable_, context, args=args, kwargs=kwargs)
|
||||
|
||||
def _exec_template(callable_, context, args=None, kwargs=None):
|
||||
"""execute a rendering callable given the callable, a
|
||||
Context, and optional explicit arguments
|
||||
|
||||
the contextual Template will be located if it exists, and
|
||||
the error handling options specified on that Template will
|
||||
be interpreted here.
|
||||
"""
|
||||
template = context._with_template
|
||||
if template is not None and \
|
||||
(template.format_exceptions or template.error_handler):
|
||||
try:
|
||||
callable_(context, *args, **kwargs)
|
||||
except Exception:
|
||||
_render_error(template, context, compat.exception_as())
|
||||
except:
|
||||
e = sys.exc_info()[0]
|
||||
_render_error(template, context, e)
|
||||
else:
|
||||
callable_(context, *args, **kwargs)
|
||||
|
||||
def _render_error(template, context, error):
|
||||
if template.error_handler:
|
||||
result = template.error_handler(context, error)
|
||||
if not result:
|
||||
compat.reraise(*sys.exc_info())
|
||||
else:
|
||||
error_template = exceptions.html_error_template()
|
||||
if context._outputting_as_unicode:
|
||||
context._buffer_stack[:] = [
|
||||
util.FastEncodingBuffer(as_unicode=True)]
|
||||
else:
|
||||
context._buffer_stack[:] = [util.FastEncodingBuffer(
|
||||
error_template.output_encoding,
|
||||
error_template.encoding_errors)]
|
||||
|
||||
context._set_with_template(error_template)
|
||||
error_template.render_context(context, error=error)
|
|
@ -0,0 +1,705 @@
|
|||
# mako/template.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
"""Provides the Template class, a facade for parsing, generating and executing
|
||||
template strings, as well as template runtime operations."""
|
||||
|
||||
from mako.lexer import Lexer
|
||||
from mako import runtime, util, exceptions, codegen, cache, compat
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import stat
|
||||
import sys
|
||||
import tempfile
|
||||
import types
|
||||
import weakref
|
||||
|
||||
|
||||
class Template(object):
|
||||
"""Represents a compiled template.
|
||||
|
||||
:class:`.Template` includes a reference to the original
|
||||
template source (via the :attr:`.source` attribute)
|
||||
as well as the source code of the
|
||||
generated Python module (i.e. the :attr:`.code` attribute),
|
||||
as well as a reference to an actual Python module.
|
||||
|
||||
:class:`.Template` is constructed using either a literal string
|
||||
representing the template text, or a filename representing a filesystem
|
||||
path to a source file.
|
||||
|
||||
:param text: textual template source. This argument is mutually
|
||||
exclusive versus the ``filename`` parameter.
|
||||
|
||||
:param filename: filename of the source template. This argument is
|
||||
mutually exclusive versus the ``text`` parameter.
|
||||
|
||||
:param buffer_filters: string list of filters to be applied
|
||||
to the output of ``%def``\ s which are buffered, cached, or otherwise
|
||||
filtered, after all filters
|
||||
defined with the ``%def`` itself have been applied. Allows the
|
||||
creation of default expression filters that let the output
|
||||
of return-valued ``%def``\ s "opt out" of that filtering via
|
||||
passing special attributes or objects.
|
||||
|
||||
:param bytestring_passthrough: When ``True``, and ``output_encoding`` is
|
||||
set to ``None``, and :meth:`.Template.render` is used to render,
|
||||
the `StringIO` or `cStringIO` buffer will be used instead of the
|
||||
default "fast" buffer. This allows raw bytestrings in the
|
||||
output stream, such as in expressions, to pass straight
|
||||
through to the buffer. This flag is forced
|
||||
to ``True`` if ``disable_unicode`` is also configured.
|
||||
|
||||
.. versionadded:: 0.4
|
||||
Added to provide the same behavior as that of the previous series.
|
||||
|
||||
:param cache_args: Dictionary of cache configuration arguments that
|
||||
will be passed to the :class:`.CacheImpl`. See :ref:`caching_toplevel`.
|
||||
|
||||
:param cache_dir:
|
||||
|
||||
.. deprecated:: 0.6
|
||||
Use the ``'dir'`` argument in the ``cache_args`` dictionary.
|
||||
See :ref:`caching_toplevel`.
|
||||
|
||||
:param cache_enabled: Boolean flag which enables caching of this
|
||||
template. See :ref:`caching_toplevel`.
|
||||
|
||||
:param cache_impl: String name of a :class:`.CacheImpl` caching
|
||||
implementation to use. Defaults to ``'beaker'``.
|
||||
|
||||
:param cache_type:
|
||||
|
||||
.. deprecated:: 0.6
|
||||
Use the ``'type'`` argument in the ``cache_args`` dictionary.
|
||||
See :ref:`caching_toplevel`.
|
||||
|
||||
:param cache_url:
|
||||
|
||||
.. deprecated:: 0.6
|
||||
Use the ``'url'`` argument in the ``cache_args`` dictionary.
|
||||
See :ref:`caching_toplevel`.
|
||||
|
||||
:param default_filters: List of string filter names that will
|
||||
be applied to all expressions. See :ref:`filtering_default_filters`.
|
||||
|
||||
:param disable_unicode: Disables all awareness of Python Unicode
|
||||
objects. See :ref:`unicode_disabled`.
|
||||
|
||||
:param enable_loop: When ``True``, enable the ``loop`` context variable.
|
||||
This can be set to ``False`` to support templates that may
|
||||
be making usage of the name "``loop``". Individual templates can
|
||||
re-enable the "loop" context by placing the directive
|
||||
``enable_loop="True"`` inside the ``<%page>`` tag -- see
|
||||
:ref:`migrating_loop`.
|
||||
|
||||
:param encoding_errors: Error parameter passed to ``encode()`` when
|
||||
string encoding is performed. See :ref:`usage_unicode`.
|
||||
|
||||
:param error_handler: Python callable which is called whenever
|
||||
compile or runtime exceptions occur. The callable is passed
|
||||
the current context as well as the exception. If the
|
||||
callable returns ``True``, the exception is considered to
|
||||
be handled, else it is re-raised after the function
|
||||
completes. Is used to provide custom error-rendering
|
||||
functions.
|
||||
|
||||
:param format_exceptions: if ``True``, exceptions which occur during
|
||||
the render phase of this template will be caught and
|
||||
formatted into an HTML error page, which then becomes the
|
||||
rendered result of the :meth:`.render` call. Otherwise,
|
||||
runtime exceptions are propagated outwards.
|
||||
|
||||
:param imports: String list of Python statements, typically individual
|
||||
"import" lines, which will be placed into the module level
|
||||
preamble of all generated Python modules. See the example
|
||||
in :ref:`filtering_default_filters`.
|
||||
|
||||
:param future_imports: String list of names to import from `__future__`.
|
||||
These will be concatenated into a comma-separated string and inserted
|
||||
into the beginning of the template, e.g. ``futures_imports=['FOO',
|
||||
'BAR']`` results in ``from __future__ import FOO, BAR``. If you're
|
||||
interested in using features like the new division operator, you must
|
||||
use future_imports to convey that to the renderer, as otherwise the
|
||||
import will not appear as the first executed statement in the generated
|
||||
code and will therefore not have the desired effect.
|
||||
|
||||
:param input_encoding: Encoding of the template's source code. Can
|
||||
be used in lieu of the coding comment. See
|
||||
:ref:`usage_unicode` as well as :ref:`unicode_toplevel` for
|
||||
details on source encoding.
|
||||
|
||||
:param lookup: a :class:`.TemplateLookup` instance that will be used
|
||||
for all file lookups via the ``<%namespace>``,
|
||||
``<%include>``, and ``<%inherit>`` tags. See
|
||||
:ref:`usage_templatelookup`.
|
||||
|
||||
:param module_directory: Filesystem location where generated
|
||||
Python module files will be placed.
|
||||
|
||||
:param module_filename: Overrides the filename of the generated
|
||||
Python module file. For advanced usage only.
|
||||
|
||||
:param module_writer: A callable which overrides how the Python
|
||||
module is written entirely. The callable is passed the
|
||||
encoded source content of the module and the destination
|
||||
path to be written to. The default behavior of module writing
|
||||
uses a tempfile in conjunction with a file move in order
|
||||
to make the operation atomic. So a user-defined module
|
||||
writing function that mimics the default behavior would be:
|
||||
|
||||
.. sourcecode:: python
|
||||
|
||||
import tempfile
|
||||
import os
|
||||
import shutil
|
||||
|
||||
def module_writer(source, outputpath):
|
||||
(dest, name) = \\
|
||||
tempfile.mkstemp(
|
||||
dir=os.path.dirname(outputpath)
|
||||
)
|
||||
|
||||
os.write(dest, source)
|
||||
os.close(dest)
|
||||
shutil.move(name, outputpath)
|
||||
|
||||
from mako.template import Template
|
||||
mytemplate = Template(
|
||||
filename="index.html",
|
||||
module_directory="/path/to/modules",
|
||||
module_writer=module_writer
|
||||
)
|
||||
|
||||
The function is provided for unusual configurations where
|
||||
certain platform-specific permissions or other special
|
||||
steps are needed.
|
||||
|
||||
:param output_encoding: The encoding to use when :meth:`.render`
|
||||
is called.
|
||||
See :ref:`usage_unicode` as well as :ref:`unicode_toplevel`.
|
||||
|
||||
:param preprocessor: Python callable which will be passed
|
||||
the full template source before it is parsed. The return
|
||||
result of the callable will be used as the template source
|
||||
code.
|
||||
|
||||
:param lexer_cls: A :class:`.Lexer` class used to parse
|
||||
the template. The :class:`.Lexer` class is used by
|
||||
default.
|
||||
|
||||
.. versionadded:: 0.7.4
|
||||
|
||||
:param strict_undefined: Replaces the automatic usage of
|
||||
``UNDEFINED`` for any undeclared variables not located in
|
||||
the :class:`.Context` with an immediate raise of
|
||||
``NameError``. The advantage is immediate reporting of
|
||||
missing variables which include the name.
|
||||
|
||||
.. versionadded:: 0.3.6
|
||||
|
||||
:param uri: string URI or other identifier for this template.
|
||||
If not provided, the ``uri`` is generated from the filesystem
|
||||
path, or from the in-memory identity of a non-file-based
|
||||
template. The primary usage of the ``uri`` is to provide a key
|
||||
within :class:`.TemplateLookup`, as well as to generate the
|
||||
file path of the generated Python module file, if
|
||||
``module_directory`` is specified.
|
||||
|
||||
"""
|
||||
|
||||
lexer_cls = Lexer
|
||||
|
||||
def __init__(self,
|
||||
text=None,
|
||||
filename=None,
|
||||
uri=None,
|
||||
format_exceptions=False,
|
||||
error_handler=None,
|
||||
lookup=None,
|
||||
output_encoding=None,
|
||||
encoding_errors='strict',
|
||||
module_directory=None,
|
||||
cache_args=None,
|
||||
cache_impl='beaker',
|
||||
cache_enabled=True,
|
||||
cache_type=None,
|
||||
cache_dir=None,
|
||||
cache_url=None,
|
||||
module_filename=None,
|
||||
input_encoding=None,
|
||||
disable_unicode=False,
|
||||
module_writer=None,
|
||||
bytestring_passthrough=False,
|
||||
default_filters=None,
|
||||
buffer_filters=(),
|
||||
strict_undefined=False,
|
||||
imports=None,
|
||||
future_imports=None,
|
||||
enable_loop=True,
|
||||
preprocessor=None,
|
||||
lexer_cls=None):
|
||||
if uri:
|
||||
self.module_id = re.sub(r'\W', "_", uri)
|
||||
self.uri = uri
|
||||
elif filename:
|
||||
self.module_id = re.sub(r'\W', "_", filename)
|
||||
drive, path = os.path.splitdrive(filename)
|
||||
path = os.path.normpath(path).replace(os.path.sep, "/")
|
||||
self.uri = path
|
||||
else:
|
||||
self.module_id = "memory:" + hex(id(self))
|
||||
self.uri = self.module_id
|
||||
|
||||
u_norm = self.uri
|
||||
if u_norm.startswith("/"):
|
||||
u_norm = u_norm[1:]
|
||||
u_norm = os.path.normpath(u_norm)
|
||||
if u_norm.startswith(".."):
|
||||
raise exceptions.TemplateLookupException(
|
||||
"Template uri \"%s\" is invalid - "
|
||||
"it cannot be relative outside "
|
||||
"of the root path." % self.uri)
|
||||
|
||||
self.input_encoding = input_encoding
|
||||
self.output_encoding = output_encoding
|
||||
self.encoding_errors = encoding_errors
|
||||
self.disable_unicode = disable_unicode
|
||||
self.bytestring_passthrough = bytestring_passthrough or disable_unicode
|
||||
self.enable_loop = enable_loop
|
||||
self.strict_undefined = strict_undefined
|
||||
self.module_writer = module_writer
|
||||
|
||||
if compat.py3k and disable_unicode:
|
||||
raise exceptions.UnsupportedError(
|
||||
"Mako for Python 3 does not "
|
||||
"support disabling Unicode")
|
||||
elif output_encoding and disable_unicode:
|
||||
raise exceptions.UnsupportedError(
|
||||
"output_encoding must be set to "
|
||||
"None when disable_unicode is used.")
|
||||
if default_filters is None:
|
||||
if compat.py3k or self.disable_unicode:
|
||||
self.default_filters = ['str']
|
||||
else:
|
||||
self.default_filters = ['unicode']
|
||||
else:
|
||||
self.default_filters = default_filters
|
||||
self.buffer_filters = buffer_filters
|
||||
|
||||
self.imports = imports
|
||||
self.future_imports = future_imports
|
||||
self.preprocessor = preprocessor
|
||||
|
||||
if lexer_cls is not None:
|
||||
self.lexer_cls = lexer_cls
|
||||
|
||||
# if plain text, compile code in memory only
|
||||
if text is not None:
|
||||
(code, module) = _compile_text(self, text, filename)
|
||||
self._code = code
|
||||
self._source = text
|
||||
ModuleInfo(module, None, self, filename, code, text)
|
||||
elif filename is not None:
|
||||
# if template filename and a module directory, load
|
||||
# a filesystem-based module file, generating if needed
|
||||
if module_filename is not None:
|
||||
path = module_filename
|
||||
elif module_directory is not None:
|
||||
path = os.path.abspath(
|
||||
os.path.join(
|
||||
os.path.normpath(module_directory),
|
||||
u_norm + ".py"
|
||||
)
|
||||
)
|
||||
else:
|
||||
path = None
|
||||
module = self._compile_from_file(path, filename)
|
||||
else:
|
||||
raise exceptions.RuntimeException(
|
||||
"Template requires text or filename")
|
||||
|
||||
self.module = module
|
||||
self.filename = filename
|
||||
self.callable_ = self.module.render_body
|
||||
self.format_exceptions = format_exceptions
|
||||
self.error_handler = error_handler
|
||||
self.lookup = lookup
|
||||
|
||||
self.module_directory = module_directory
|
||||
|
||||
self._setup_cache_args(
|
||||
cache_impl, cache_enabled, cache_args,
|
||||
cache_type, cache_dir, cache_url
|
||||
)
|
||||
|
||||
|
||||
@util.memoized_property
|
||||
def reserved_names(self):
|
||||
if self.enable_loop:
|
||||
return codegen.RESERVED_NAMES
|
||||
else:
|
||||
return codegen.RESERVED_NAMES.difference(['loop'])
|
||||
|
||||
def _setup_cache_args(self,
|
||||
cache_impl, cache_enabled, cache_args,
|
||||
cache_type, cache_dir, cache_url):
|
||||
self.cache_impl = cache_impl
|
||||
self.cache_enabled = cache_enabled
|
||||
if cache_args:
|
||||
self.cache_args = cache_args
|
||||
else:
|
||||
self.cache_args = {}
|
||||
|
||||
# transfer deprecated cache_* args
|
||||
if cache_type:
|
||||
self.cache_args['type'] = cache_type
|
||||
if cache_dir:
|
||||
self.cache_args['dir'] = cache_dir
|
||||
if cache_url:
|
||||
self.cache_args['url'] = cache_url
|
||||
|
||||
def _compile_from_file(self, path, filename):
|
||||
if path is not None:
|
||||
util.verify_directory(os.path.dirname(path))
|
||||
filemtime = os.stat(filename)[stat.ST_MTIME]
|
||||
if not os.path.exists(path) or \
|
||||
os.stat(path)[stat.ST_MTIME] < filemtime:
|
||||
data = util.read_file(filename)
|
||||
_compile_module_file(
|
||||
self,
|
||||
data,
|
||||
filename,
|
||||
path,
|
||||
self.module_writer)
|
||||
module = compat.load_module(self.module_id, path)
|
||||
del sys.modules[self.module_id]
|
||||
if module._magic_number != codegen.MAGIC_NUMBER:
|
||||
data = util.read_file(filename)
|
||||
_compile_module_file(
|
||||
self,
|
||||
data,
|
||||
filename,
|
||||
path,
|
||||
self.module_writer)
|
||||
module = compat.load_module(self.module_id, path)
|
||||
del sys.modules[self.module_id]
|
||||
ModuleInfo(module, path, self, filename, None, None)
|
||||
else:
|
||||
# template filename and no module directory, compile code
|
||||
# in memory
|
||||
data = util.read_file(filename)
|
||||
code, module = _compile_text(
|
||||
self,
|
||||
data,
|
||||
filename)
|
||||
self._source = None
|
||||
self._code = code
|
||||
ModuleInfo(module, None, self, filename, code, None)
|
||||
return module
|
||||
|
||||
@property
|
||||
def source(self):
|
||||
"""Return the template source code for this :class:`.Template`."""
|
||||
|
||||
return _get_module_info_from_callable(self.callable_).source
|
||||
|
||||
@property
|
||||
def code(self):
|
||||
"""Return the module source code for this :class:`.Template`."""
|
||||
|
||||
return _get_module_info_from_callable(self.callable_).code
|
||||
|
||||
@util.memoized_property
|
||||
def cache(self):
|
||||
return cache.Cache(self)
|
||||
|
||||
@property
|
||||
def cache_dir(self):
|
||||
return self.cache_args['dir']
|
||||
@property
|
||||
def cache_url(self):
|
||||
return self.cache_args['url']
|
||||
@property
|
||||
def cache_type(self):
|
||||
return self.cache_args['type']
|
||||
|
||||
def render(self, *args, **data):
|
||||
"""Render the output of this template as a string.
|
||||
|
||||
If the template specifies an output encoding, the string
|
||||
will be encoded accordingly, else the output is raw (raw
|
||||
output uses `cStringIO` and can't handle multibyte
|
||||
characters). A :class:`.Context` object is created corresponding
|
||||
to the given data. Arguments that are explicitly declared
|
||||
by this template's internal rendering method are also
|
||||
pulled from the given ``*args``, ``**data`` members.
|
||||
|
||||
"""
|
||||
return runtime._render(self, self.callable_, args, data)
|
||||
|
||||
def render_unicode(self, *args, **data):
|
||||
"""Render the output of this template as a unicode object."""
|
||||
|
||||
return runtime._render(self,
|
||||
self.callable_,
|
||||
args,
|
||||
data,
|
||||
as_unicode=True)
|
||||
|
||||
def render_context(self, context, *args, **kwargs):
|
||||
"""Render this :class:`.Template` with the given context.
|
||||
|
||||
The data is written to the context's buffer.
|
||||
|
||||
"""
|
||||
if getattr(context, '_with_template', None) is None:
|
||||
context._set_with_template(self)
|
||||
runtime._render_context(self,
|
||||
self.callable_,
|
||||
context,
|
||||
*args,
|
||||
**kwargs)
|
||||
|
||||
def has_def(self, name):
|
||||
return hasattr(self.module, "render_%s" % name)
|
||||
|
||||
def get_def(self, name):
|
||||
"""Return a def of this template as a :class:`.DefTemplate`."""
|
||||
|
||||
return DefTemplate(self, getattr(self.module, "render_%s" % name))
|
||||
|
||||
def _get_def_callable(self, name):
|
||||
return getattr(self.module, "render_%s" % name)
|
||||
|
||||
@property
|
||||
def last_modified(self):
|
||||
return self.module._modified_time
|
||||
|
||||
class ModuleTemplate(Template):
|
||||
"""A Template which is constructed given an existing Python module.
|
||||
|
||||
e.g.::
|
||||
|
||||
t = Template("this is a template")
|
||||
f = file("mymodule.py", "w")
|
||||
f.write(t.code)
|
||||
f.close()
|
||||
|
||||
import mymodule
|
||||
|
||||
t = ModuleTemplate(mymodule)
|
||||
print t.render()
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, module,
|
||||
module_filename=None,
|
||||
template=None,
|
||||
template_filename=None,
|
||||
module_source=None,
|
||||
template_source=None,
|
||||
output_encoding=None,
|
||||
encoding_errors='strict',
|
||||
disable_unicode=False,
|
||||
bytestring_passthrough=False,
|
||||
format_exceptions=False,
|
||||
error_handler=None,
|
||||
lookup=None,
|
||||
cache_args=None,
|
||||
cache_impl='beaker',
|
||||
cache_enabled=True,
|
||||
cache_type=None,
|
||||
cache_dir=None,
|
||||
cache_url=None,
|
||||
):
|
||||
self.module_id = re.sub(r'\W', "_", module._template_uri)
|
||||
self.uri = module._template_uri
|
||||
self.input_encoding = module._source_encoding
|
||||
self.output_encoding = output_encoding
|
||||
self.encoding_errors = encoding_errors
|
||||
self.disable_unicode = disable_unicode
|
||||
self.bytestring_passthrough = bytestring_passthrough or disable_unicode
|
||||
self.enable_loop = module._enable_loop
|
||||
|
||||
if compat.py3k and disable_unicode:
|
||||
raise exceptions.UnsupportedError(
|
||||
"Mako for Python 3 does not "
|
||||
"support disabling Unicode")
|
||||
elif output_encoding and disable_unicode:
|
||||
raise exceptions.UnsupportedError(
|
||||
"output_encoding must be set to "
|
||||
"None when disable_unicode is used.")
|
||||
|
||||
self.module = module
|
||||
self.filename = template_filename
|
||||
ModuleInfo(module,
|
||||
module_filename,
|
||||
self,
|
||||
template_filename,
|
||||
module_source,
|
||||
template_source)
|
||||
|
||||
self.callable_ = self.module.render_body
|
||||
self.format_exceptions = format_exceptions
|
||||
self.error_handler = error_handler
|
||||
self.lookup = lookup
|
||||
self._setup_cache_args(
|
||||
cache_impl, cache_enabled, cache_args,
|
||||
cache_type, cache_dir, cache_url
|
||||
)
|
||||
|
||||
class DefTemplate(Template):
|
||||
"""A :class:`.Template` which represents a callable def in a parent
|
||||
template."""
|
||||
|
||||
def __init__(self, parent, callable_):
|
||||
self.parent = parent
|
||||
self.callable_ = callable_
|
||||
self.output_encoding = parent.output_encoding
|
||||
self.module = parent.module
|
||||
self.encoding_errors = parent.encoding_errors
|
||||
self.format_exceptions = parent.format_exceptions
|
||||
self.error_handler = parent.error_handler
|
||||
self.enable_loop = parent.enable_loop
|
||||
self.lookup = parent.lookup
|
||||
self.bytestring_passthrough = parent.bytestring_passthrough
|
||||
|
||||
def get_def(self, name):
|
||||
return self.parent.get_def(name)
|
||||
|
||||
class ModuleInfo(object):
|
||||
"""Stores information about a module currently loaded into
|
||||
memory, provides reverse lookups of template source, module
|
||||
source code based on a module's identifier.
|
||||
|
||||
"""
|
||||
_modules = weakref.WeakValueDictionary()
|
||||
|
||||
def __init__(self,
|
||||
module,
|
||||
module_filename,
|
||||
template,
|
||||
template_filename,
|
||||
module_source,
|
||||
template_source):
|
||||
self.module = module
|
||||
self.module_filename = module_filename
|
||||
self.template_filename = template_filename
|
||||
self.module_source = module_source
|
||||
self.template_source = template_source
|
||||
self._modules[module.__name__] = template._mmarker = self
|
||||
if module_filename:
|
||||
self._modules[module_filename] = self
|
||||
|
||||
@classmethod
|
||||
def get_module_source_metadata(cls, module_source, full_line_map=False):
|
||||
source_map = re.search(
|
||||
r"__M_BEGIN_METADATA(.+?)__M_END_METADATA",
|
||||
module_source, re.S).group(1)
|
||||
source_map = compat.json.loads(source_map)
|
||||
source_map['line_map'] = dict((int(k), int(v))
|
||||
for k, v in source_map['line_map'].items())
|
||||
if full_line_map:
|
||||
f_line_map = source_map['full_line_map'] = []
|
||||
line_map = source_map['line_map']
|
||||
|
||||
curr_templ_line = 1
|
||||
for mod_line in range(1, max(line_map)):
|
||||
if mod_line in line_map:
|
||||
curr_templ_line = line_map[mod_line]
|
||||
f_line_map.append(curr_templ_line)
|
||||
return source_map
|
||||
|
||||
@property
|
||||
def code(self):
|
||||
if self.module_source is not None:
|
||||
return self.module_source
|
||||
else:
|
||||
return util.read_python_file(self.module_filename)
|
||||
|
||||
@property
|
||||
def source(self):
|
||||
if self.template_source is not None:
|
||||
if self.module._source_encoding and \
|
||||
not isinstance(self.template_source, compat.text_type):
|
||||
return self.template_source.decode(
|
||||
self.module._source_encoding)
|
||||
else:
|
||||
return self.template_source
|
||||
else:
|
||||
data = util.read_file(self.template_filename)
|
||||
if self.module._source_encoding:
|
||||
return data.decode(self.module._source_encoding)
|
||||
else:
|
||||
return data
|
||||
|
||||
def _compile(template, text, filename, generate_magic_comment):
|
||||
lexer = template.lexer_cls(text,
|
||||
filename,
|
||||
disable_unicode=template.disable_unicode,
|
||||
input_encoding=template.input_encoding,
|
||||
preprocessor=template.preprocessor)
|
||||
node = lexer.parse()
|
||||
source = codegen.compile(node,
|
||||
template.uri,
|
||||
filename,
|
||||
default_filters=template.default_filters,
|
||||
buffer_filters=template.buffer_filters,
|
||||
imports=template.imports,
|
||||
future_imports=template.future_imports,
|
||||
source_encoding=lexer.encoding,
|
||||
generate_magic_comment=generate_magic_comment,
|
||||
disable_unicode=template.disable_unicode,
|
||||
strict_undefined=template.strict_undefined,
|
||||
enable_loop=template.enable_loop,
|
||||
reserved_names=template.reserved_names)
|
||||
return source, lexer
|
||||
|
||||
def _compile_text(template, text, filename):
|
||||
identifier = template.module_id
|
||||
source, lexer = _compile(template, text, filename,
|
||||
generate_magic_comment=template.disable_unicode)
|
||||
|
||||
cid = identifier
|
||||
if not compat.py3k and isinstance(cid, compat.text_type):
|
||||
cid = cid.encode()
|
||||
module = types.ModuleType(cid)
|
||||
code = compile(source, cid, 'exec')
|
||||
|
||||
# this exec() works for 2.4->3.3.
|
||||
exec(code, module.__dict__, module.__dict__)
|
||||
return (source, module)
|
||||
|
||||
def _compile_module_file(template, text, filename, outputpath, module_writer):
|
||||
source, lexer = _compile(template, text, filename,
|
||||
generate_magic_comment=True)
|
||||
|
||||
if isinstance(source, compat.text_type):
|
||||
source = source.encode(lexer.encoding or 'ascii')
|
||||
|
||||
if module_writer:
|
||||
module_writer(source, outputpath)
|
||||
else:
|
||||
# make tempfiles in the same location as the ultimate
|
||||
# location. this ensures they're on the same filesystem,
|
||||
# avoiding synchronization issues.
|
||||
(dest, name) = tempfile.mkstemp(dir=os.path.dirname(outputpath))
|
||||
|
||||
os.write(dest, source)
|
||||
os.close(dest)
|
||||
shutil.move(name, outputpath)
|
||||
|
||||
def _get_module_info_from_callable(callable_):
|
||||
if compat.py3k:
|
||||
return _get_module_info(callable_.__globals__['__name__'])
|
||||
else:
|
||||
return _get_module_info(callable_.func_globals['__name__'])
|
||||
|
||||
def _get_module_info(filename):
|
||||
return ModuleInfo._modules[filename]
|
||||
|
|
@ -0,0 +1,360 @@
|
|||
# mako/util.py
|
||||
# Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
|
||||
#
|
||||
# This module is part of Mako and is released under
|
||||
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
import re
|
||||
import collections
|
||||
import codecs
|
||||
import os
|
||||
from mako import compat
|
||||
import operator
|
||||
|
||||
def update_wrapper(decorated, fn):
|
||||
decorated.__wrapped__ = fn
|
||||
decorated.__name__ = fn.__name__
|
||||
return decorated
|
||||
|
||||
|
||||
class PluginLoader(object):
|
||||
def __init__(self, group):
|
||||
self.group = group
|
||||
self.impls = {}
|
||||
|
||||
def load(self, name):
|
||||
if name in self.impls:
|
||||
return self.impls[name]()
|
||||
else:
|
||||
import pkg_resources
|
||||
for impl in pkg_resources.iter_entry_points(
|
||||
self.group,
|
||||
name):
|
||||
self.impls[name] = impl.load
|
||||
return impl.load()
|
||||
else:
|
||||
from mako import exceptions
|
||||
raise exceptions.RuntimeException(
|
||||
"Can't load plugin %s %s" %
|
||||
(self.group, name))
|
||||
|
||||
def register(self, name, modulepath, objname):
|
||||
def load():
|
||||
mod = __import__(modulepath)
|
||||
for token in modulepath.split(".")[1:]:
|
||||
mod = getattr(mod, token)
|
||||
return getattr(mod, objname)
|
||||
self.impls[name] = load
|
||||
|
||||
def verify_directory(dir):
|
||||
"""create and/or verify a filesystem directory."""
|
||||
|
||||
tries = 0
|
||||
|
||||
while not os.path.exists(dir):
|
||||
try:
|
||||
tries += 1
|
||||
os.makedirs(dir, compat.octal("0775"))
|
||||
except:
|
||||
if tries > 5:
|
||||
raise
|
||||
|
||||
def to_list(x, default=None):
|
||||
if x is None:
|
||||
return default
|
||||
if not isinstance(x, (list, tuple)):
|
||||
return [x]
|
||||
else:
|
||||
return x
|
||||
|
||||
|
||||
class memoized_property(object):
|
||||
"""A read-only @property that is only evaluated once."""
|
||||
def __init__(self, fget, doc=None):
|
||||
self.fget = fget
|
||||
self.__doc__ = doc or fget.__doc__
|
||||
self.__name__ = fget.__name__
|
||||
|
||||
def __get__(self, obj, cls):
|
||||
if obj is None:
|
||||
return self
|
||||
obj.__dict__[self.__name__] = result = self.fget(obj)
|
||||
return result
|
||||
|
||||
class memoized_instancemethod(object):
|
||||
"""Decorate a method memoize its return value.
|
||||
|
||||
Best applied to no-arg methods: memoization is not sensitive to
|
||||
argument values, and will always return the same value even when
|
||||
called with different arguments.
|
||||
|
||||
"""
|
||||
def __init__(self, fget, doc=None):
|
||||
self.fget = fget
|
||||
self.__doc__ = doc or fget.__doc__
|
||||
self.__name__ = fget.__name__
|
||||
|
||||
def __get__(self, obj, cls):
|
||||
if obj is None:
|
||||
return self
|
||||
def oneshot(*args, **kw):
|
||||
result = self.fget(obj, *args, **kw)
|
||||
memo = lambda *a, **kw: result
|
||||
memo.__name__ = self.__name__
|
||||
memo.__doc__ = self.__doc__
|
||||
obj.__dict__[self.__name__] = memo
|
||||
return result
|
||||
oneshot.__name__ = self.__name__
|
||||
oneshot.__doc__ = self.__doc__
|
||||
return oneshot
|
||||
|
||||
class SetLikeDict(dict):
|
||||
"""a dictionary that has some setlike methods on it"""
|
||||
def union(self, other):
|
||||
"""produce a 'union' of this dict and another (at the key level).
|
||||
|
||||
values in the second dict take precedence over that of the first"""
|
||||
x = SetLikeDict(**self)
|
||||
x.update(other)
|
||||
return x
|
||||
|
||||
class FastEncodingBuffer(object):
|
||||
"""a very rudimentary buffer that is faster than StringIO,
|
||||
but doesn't crash on unicode data like cStringIO."""
|
||||
|
||||
def __init__(self, encoding=None, errors='strict', as_unicode=False):
|
||||
self.data = collections.deque()
|
||||
self.encoding = encoding
|
||||
if as_unicode:
|
||||
self.delim = compat.u('')
|
||||
else:
|
||||
self.delim = ''
|
||||
self.as_unicode = as_unicode
|
||||
self.errors = errors
|
||||
self.write = self.data.append
|
||||
|
||||
def truncate(self):
|
||||
self.data = collections.deque()
|
||||
self.write = self.data.append
|
||||
|
||||
def getvalue(self):
|
||||
if self.encoding:
|
||||
return self.delim.join(self.data).encode(self.encoding,
|
||||
self.errors)
|
||||
else:
|
||||
return self.delim.join(self.data)
|
||||
|
||||
class LRUCache(dict):
|
||||
"""A dictionary-like object that stores a limited number of items,
|
||||
discarding lesser used items periodically.
|
||||
|
||||
this is a rewrite of LRUCache from Myghty to use a periodic timestamp-based
|
||||
paradigm so that synchronization is not really needed. the size management
|
||||
is inexact.
|
||||
"""
|
||||
|
||||
class _Item(object):
|
||||
def __init__(self, key, value):
|
||||
self.key = key
|
||||
self.value = value
|
||||
self.timestamp = compat.time_func()
|
||||
def __repr__(self):
|
||||
return repr(self.value)
|
||||
|
||||
def __init__(self, capacity, threshold=.5):
|
||||
self.capacity = capacity
|
||||
self.threshold = threshold
|
||||
|
||||
def __getitem__(self, key):
|
||||
item = dict.__getitem__(self, key)
|
||||
item.timestamp = compat.time_func()
|
||||
return item.value
|
||||
|
||||
def values(self):
|
||||
return [i.value for i in dict.values(self)]
|
||||
|
||||
def setdefault(self, key, value):
|
||||
if key in self:
|
||||
return self[key]
|
||||
else:
|
||||
self[key] = value
|
||||
return value
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
item = dict.get(self, key)
|
||||
if item is None:
|
||||
item = self._Item(key, value)
|
||||
dict.__setitem__(self, key, item)
|
||||
else:
|
||||
item.value = value
|
||||
self._manage_size()
|
||||
|
||||
def _manage_size(self):
|
||||
while len(self) > self.capacity + self.capacity * self.threshold:
|
||||
bytime = sorted(dict.values(self),
|
||||
key=operator.attrgetter('timestamp'), reverse=True)
|
||||
for item in bytime[self.capacity:]:
|
||||
try:
|
||||
del self[item.key]
|
||||
except KeyError:
|
||||
# if we couldn't find a key, most likely some other thread
|
||||
# broke in on us. loop around and try again
|
||||
break
|
||||
|
||||
# Regexp to match python magic encoding line
|
||||
_PYTHON_MAGIC_COMMENT_re = re.compile(
|
||||
r'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)',
|
||||
re.VERBOSE)
|
||||
|
||||
def parse_encoding(fp):
|
||||
"""Deduce the encoding of a Python source file (binary mode) from magic
|
||||
comment.
|
||||
|
||||
It does this in the same way as the `Python interpreter`__
|
||||
|
||||
.. __: http://docs.python.org/ref/encodings.html
|
||||
|
||||
The ``fp`` argument should be a seekable file object in binary mode.
|
||||
"""
|
||||
pos = fp.tell()
|
||||
fp.seek(0)
|
||||
try:
|
||||
line1 = fp.readline()
|
||||
has_bom = line1.startswith(codecs.BOM_UTF8)
|
||||
if has_bom:
|
||||
line1 = line1[len(codecs.BOM_UTF8):]
|
||||
|
||||
m = _PYTHON_MAGIC_COMMENT_re.match(line1.decode('ascii', 'ignore'))
|
||||
if not m:
|
||||
try:
|
||||
import parser
|
||||
parser.suite(line1.decode('ascii', 'ignore'))
|
||||
except (ImportError, SyntaxError):
|
||||
# Either it's a real syntax error, in which case the source
|
||||
# is not valid python source, or line2 is a continuation of
|
||||
# line1, in which case we don't want to scan line2 for a magic
|
||||
# comment.
|
||||
pass
|
||||
else:
|
||||
line2 = fp.readline()
|
||||
m = _PYTHON_MAGIC_COMMENT_re.match(
|
||||
line2.decode('ascii', 'ignore'))
|
||||
|
||||
if has_bom:
|
||||
if m:
|
||||
raise SyntaxError("python refuses to compile code with both a UTF8" \
|
||||
" byte-order-mark and a magic encoding comment")
|
||||
return 'utf_8'
|
||||
elif m:
|
||||
return m.group(1)
|
||||
else:
|
||||
return None
|
||||
finally:
|
||||
fp.seek(pos)
|
||||
|
||||
def sorted_dict_repr(d):
|
||||
"""repr() a dictionary with the keys in order.
|
||||
|
||||
Used by the lexer unit test to compare parse trees based on strings.
|
||||
|
||||
"""
|
||||
keys = list(d.keys())
|
||||
keys.sort()
|
||||
return "{" + ", ".join(["%r: %r" % (k, d[k]) for k in keys]) + "}"
|
||||
|
||||
def restore__ast(_ast):
|
||||
"""Attempt to restore the required classes to the _ast module if it
|
||||
appears to be missing them
|
||||
"""
|
||||
if hasattr(_ast, 'AST'):
|
||||
return
|
||||
_ast.PyCF_ONLY_AST = 2 << 9
|
||||
m = compile("""\
|
||||
def foo(): pass
|
||||
class Bar(object): pass
|
||||
if False: pass
|
||||
baz = 'mako'
|
||||
1 + 2 - 3 * 4 / 5
|
||||
6 // 7 % 8 << 9 >> 10
|
||||
11 & 12 ^ 13 | 14
|
||||
15 and 16 or 17
|
||||
-baz + (not +18) - ~17
|
||||
baz and 'foo' or 'bar'
|
||||
(mako is baz == baz) is not baz != mako
|
||||
mako > baz < mako >= baz <= mako
|
||||
mako in baz not in mako""", '<unknown>', 'exec', _ast.PyCF_ONLY_AST)
|
||||
_ast.Module = type(m)
|
||||
|
||||
for cls in _ast.Module.__mro__:
|
||||
if cls.__name__ == 'mod':
|
||||
_ast.mod = cls
|
||||
elif cls.__name__ == 'AST':
|
||||
_ast.AST = cls
|
||||
|
||||
_ast.FunctionDef = type(m.body[0])
|
||||
_ast.ClassDef = type(m.body[1])
|
||||
_ast.If = type(m.body[2])
|
||||
|
||||
_ast.Name = type(m.body[3].targets[0])
|
||||
_ast.Store = type(m.body[3].targets[0].ctx)
|
||||
_ast.Str = type(m.body[3].value)
|
||||
|
||||
_ast.Sub = type(m.body[4].value.op)
|
||||
_ast.Add = type(m.body[4].value.left.op)
|
||||
_ast.Div = type(m.body[4].value.right.op)
|
||||
_ast.Mult = type(m.body[4].value.right.left.op)
|
||||
|
||||
_ast.RShift = type(m.body[5].value.op)
|
||||
_ast.LShift = type(m.body[5].value.left.op)
|
||||
_ast.Mod = type(m.body[5].value.left.left.op)
|
||||
_ast.FloorDiv = type(m.body[5].value.left.left.left.op)
|
||||
|
||||
_ast.BitOr = type(m.body[6].value.op)
|
||||
_ast.BitXor = type(m.body[6].value.left.op)
|
||||
_ast.BitAnd = type(m.body[6].value.left.left.op)
|
||||
|
||||
_ast.Or = type(m.body[7].value.op)
|
||||
_ast.And = type(m.body[7].value.values[0].op)
|
||||
|
||||
_ast.Invert = type(m.body[8].value.right.op)
|
||||
_ast.Not = type(m.body[8].value.left.right.op)
|
||||
_ast.UAdd = type(m.body[8].value.left.right.operand.op)
|
||||
_ast.USub = type(m.body[8].value.left.left.op)
|
||||
|
||||
_ast.Or = type(m.body[9].value.op)
|
||||
_ast.And = type(m.body[9].value.values[0].op)
|
||||
|
||||
_ast.IsNot = type(m.body[10].value.ops[0])
|
||||
_ast.NotEq = type(m.body[10].value.ops[1])
|
||||
_ast.Is = type(m.body[10].value.left.ops[0])
|
||||
_ast.Eq = type(m.body[10].value.left.ops[1])
|
||||
|
||||
_ast.Gt = type(m.body[11].value.ops[0])
|
||||
_ast.Lt = type(m.body[11].value.ops[1])
|
||||
_ast.GtE = type(m.body[11].value.ops[2])
|
||||
_ast.LtE = type(m.body[11].value.ops[3])
|
||||
|
||||
_ast.In = type(m.body[12].value.ops[0])
|
||||
_ast.NotIn = type(m.body[12].value.ops[1])
|
||||
|
||||
|
||||
|
||||
def read_file(path, mode='rb'):
|
||||
fp = open(path, mode)
|
||||
try:
|
||||
data = fp.read()
|
||||
return data
|
||||
finally:
|
||||
fp.close()
|
||||
|
||||
def read_python_file(path):
|
||||
fp = open(path, "rb")
|
||||
try:
|
||||
encoding = parse_encoding(fp)
|
||||
data = fp.read()
|
||||
if encoding:
|
||||
data = data.decode(encoding)
|
||||
return data
|
||||
finally:
|
||||
fp.close()
|
||||
|
|
@ -0,0 +1,141 @@
|
|||
<%
|
||||
max_len = 0
|
||||
for knob in knobs:
|
||||
if len(knob[0]) > max_len: max_len = len(knob[0])
|
||||
max_len += len('KNOB_ ')
|
||||
if max_len % 4: max_len += 4 - (max_len % 4)
|
||||
|
||||
def space_knob(knob):
|
||||
knob_len = len('KNOB_' + knob)
|
||||
return ' '*(max_len - knob_len)
|
||||
%>/******************************************************************************
|
||||
*
|
||||
* Copyright 2015
|
||||
* Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http ://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
% if gen_header:
|
||||
* @file ${filename}.h
|
||||
% else:
|
||||
* @file ${filename}.cpp
|
||||
% endif
|
||||
*
|
||||
* @brief Dynamic Knobs for Core.
|
||||
*
|
||||
* ======================= AUTO GENERATED: DO NOT EDIT !!! ====================
|
||||
*
|
||||
******************************************************************************/
|
||||
%if gen_header:
|
||||
#pragma once
|
||||
#include <string>
|
||||
|
||||
template <typename T>
|
||||
struct Knob
|
||||
{
|
||||
const T& Value() const { return m_Value; }
|
||||
const T& Value(const T& newValue) { m_Value = newValue; return Value(); }
|
||||
|
||||
protected:
|
||||
Knob(const T& defaultValue) : m_Value(defaultValue) {}
|
||||
|
||||
private:
|
||||
T m_Value;
|
||||
};
|
||||
|
||||
#define DEFINE_KNOB(_name, _type, _default) \\
|
||||
|
||||
struct Knob_##_name : Knob<_type> \\
|
||||
|
||||
{ \\
|
||||
|
||||
Knob_##_name() : Knob<_type>(_default) { } \\
|
||||
|
||||
static const char* Name() { return "KNOB_" #_name; } \\
|
||||
|
||||
} _name;
|
||||
|
||||
#define GET_KNOB(_name) g_GlobalKnobs._name.Value()
|
||||
#define SET_KNOB(_name, _newValue) g_GlobalKnobs._name.Value(_newValue)
|
||||
|
||||
struct GlobalKnobs
|
||||
{
|
||||
% for knob in knobs:
|
||||
//-----------------------------------------------------------
|
||||
// KNOB_${knob[0]}
|
||||
//
|
||||
% for line in knob[1]['desc']:
|
||||
// ${line}
|
||||
% endfor
|
||||
DEFINE_KNOB(${knob[0]}, ${knob[1]['type']}, ${knob[1]['default']});
|
||||
|
||||
% endfor
|
||||
GlobalKnobs();
|
||||
std::string ToString(const char* optPerLinePrefix="");
|
||||
};
|
||||
extern GlobalKnobs g_GlobalKnobs;
|
||||
|
||||
% for knob in knobs:
|
||||
#define KNOB_${knob[0]}${space_knob(knob[0])}GET_KNOB(${knob[0]})
|
||||
% endfor
|
||||
|
||||
|
||||
% else:
|
||||
% for inc in includes:
|
||||
#include <${inc}>
|
||||
% endfor
|
||||
|
||||
//========================================================
|
||||
// Static Data Members
|
||||
//========================================================
|
||||
GlobalKnobs g_GlobalKnobs;
|
||||
|
||||
//========================================================
|
||||
// Knob Initialization
|
||||
//========================================================
|
||||
GlobalKnobs::GlobalKnobs()
|
||||
{
|
||||
% for knob in knobs:
|
||||
InitKnob(${knob[0]});
|
||||
% endfor
|
||||
|
||||
}
|
||||
|
||||
//========================================================
|
||||
// Knob Display (Convert to String)
|
||||
//========================================================
|
||||
std::string GlobalKnobs::ToString(const char* optPerLinePrefix)
|
||||
{
|
||||
std::basic_stringstream<char> str;
|
||||
str << std::showbase << std::setprecision(1) << std::fixed;
|
||||
|
||||
if (optPerLinePrefix == nullptr) { optPerLinePrefix = ""; }
|
||||
|
||||
% for knob in knobs:
|
||||
str << optPerLinePrefix << "KNOB_${knob[0]}:${space_knob(knob[0])}";
|
||||
% if knob[1]['type'] == 'bool':
|
||||
str << (KNOB_${knob[0]} ? "+\n" : "-\n");
|
||||
% elif knob[1]['type'] != 'float':
|
||||
str << std::hex << std::setw(11) << std::left << KNOB_${knob[0]};
|
||||
str << std::dec << KNOB_${knob[0]} << "\n";
|
||||
% else:
|
||||
str << KNOB_${knob[0]} << "\n";
|
||||
% endif
|
||||
% endfor
|
||||
str << std::ends;
|
||||
|
||||
return str.str();
|
||||
}
|
||||
|
||||
|
||||
% endif
|
Loading…
Reference in New Issue