mirror of https://github.com/doitsujin/dxvk
123 lines
3.8 KiB
C++
123 lines
3.8 KiB
C++
#include "spirv_compression.h"
|
|
|
|
namespace dxvk {
|
|
|
|
SpirvCompressedBuffer::SpirvCompressedBuffer()
|
|
: m_size(0) {
|
|
|
|
}
|
|
|
|
|
|
SpirvCompressedBuffer::SpirvCompressedBuffer(SpirvCodeBuffer& code)
|
|
: m_size(code.dwords()) {
|
|
// The compression (detailed below) achieves roughly 55% of the
|
|
// original size on average and is very consistent, so an initial
|
|
// estimate of roughly 58% will be accurate most of the time.
|
|
const uint32_t* data = code.data();
|
|
m_code.reserve((m_size * 75) / 128);
|
|
|
|
std::array<uint32_t, 16> block;
|
|
uint32_t blockMask = 0;
|
|
uint32_t blockOffset = 0;
|
|
|
|
// The algorithm used is a simple variable-to-fixed compression that
|
|
// encodes up to two consecutive SPIR-V tokens into one DWORD using
|
|
// a small number of different encodings. While not achieving great
|
|
// compression ratios, the main goal is to allow decompression code
|
|
// to be fast, with short dependency chains.
|
|
// Compressed tokens are stored in blocks of 16 DWORDs, each preceeded
|
|
// by a single DWORD which stores the layout for each DWORD, two bits
|
|
// each. The supported layouts, are as follows:
|
|
// 0x0: 1x 32-bit; 0x1: 1x 20-bit + 1x 12-bit
|
|
// 0x2: 2x 16-bit; 0x3: 1x 12-bit + 1x 20-bit
|
|
// These layouts are chosen to allow reasonably efficient encoding of
|
|
// opcode tokens, which usually fit into 20 bits, followed by type IDs,
|
|
// which tend to be low as well since most types are defined early.
|
|
for (size_t i = 0; i < m_size; ) {
|
|
if (likely(i + 1 < m_size)) {
|
|
uint32_t a = data[i];
|
|
uint32_t b = data[i + 1];
|
|
uint32_t schema;
|
|
uint32_t encode;
|
|
|
|
if (std::max(a, b) < (1u << 16)) {
|
|
schema = 0x2;
|
|
encode = a | (b << 16);
|
|
} else if (a < (1u << 20) && b < (1u << 12)) {
|
|
schema = 0x1;
|
|
encode = a | (b << 20);
|
|
} else if (a < (1u << 12) && b < (1u << 20)) {
|
|
schema = 0x3;
|
|
encode = a | (b << 12);
|
|
} else {
|
|
schema = 0x0;
|
|
encode = a;
|
|
}
|
|
|
|
block[blockOffset] = encode;
|
|
blockMask |= schema << (blockOffset << 1);
|
|
blockOffset += 1;
|
|
|
|
i += schema ? 2 : 1;
|
|
} else {
|
|
block[blockOffset] = data[i++];
|
|
blockOffset += 1;
|
|
}
|
|
|
|
if (unlikely(blockOffset == 16) || unlikely(i == m_size)) {
|
|
m_code.insert(m_code.end(), blockMask);
|
|
m_code.insert(m_code.end(), block.begin(), block.begin() + blockOffset);
|
|
|
|
blockMask = 0;
|
|
blockOffset = 0;
|
|
}
|
|
}
|
|
|
|
// Only shrink the array if we have lots of overhead for some reason.
|
|
// This should only happen on shaders where our initial estimate was
|
|
// too small. In general, we want to avoid reallocation here.
|
|
if (m_code.capacity() > (m_code.size() * 10) / 9)
|
|
m_code.shrink_to_fit();
|
|
}
|
|
|
|
|
|
SpirvCompressedBuffer::~SpirvCompressedBuffer() {
|
|
|
|
}
|
|
|
|
|
|
SpirvCodeBuffer SpirvCompressedBuffer::decompress() const {
|
|
SpirvCodeBuffer code(m_size);
|
|
uint32_t* data = code.data();
|
|
|
|
uint32_t srcOffset = 0;
|
|
uint32_t dstOffset = 0;
|
|
|
|
constexpr uint32_t shiftAmounts = 0x0c101420;
|
|
|
|
while (dstOffset < m_size) {
|
|
uint32_t blockMask = m_code[srcOffset];
|
|
|
|
for (uint32_t i = 0; i < 16 && dstOffset < m_size; i++) {
|
|
// Use 64-bit integers for some of the operands so we can
|
|
// shift by 32 bits and not handle it as a special cases
|
|
uint32_t schema = (blockMask >> (i << 1)) & 0x3;
|
|
uint32_t shift = (shiftAmounts >> (schema << 3)) & 0xff;
|
|
uint64_t mask = ~(~0ull << shift);
|
|
uint64_t encode = m_code[srcOffset + i + 1];
|
|
|
|
data[dstOffset] = encode & mask;
|
|
|
|
if (likely(schema))
|
|
data[dstOffset + 1] = encode >> shift;
|
|
|
|
dstOffset += schema ? 2 : 1;
|
|
}
|
|
|
|
srcOffset += 17;
|
|
}
|
|
|
|
return code;
|
|
}
|
|
|
|
} |