Full-width mantissa for group E registers and FDIV_M

This commit is contained in:
tevador 2019-04-17 16:18:02 +02:00
parent d43c7db416
commit 67046a9f38
11 changed files with 60 additions and 39 deletions

View File

@ -38,7 +38,7 @@ namespace RandomX {
template<bool superscalar> template<bool superscalar>
void CompiledLightVirtualMachine<superscalar>::initialize() { void CompiledLightVirtualMachine<superscalar>::initialize() {
VirtualMachine::initialize(); VirtualMachine::initialize();
compiler.generateProgramLight<superscalar>(program); compiler.generateProgramLight<superscalar>(program, config);
//mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize); //mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
} }

View File

@ -37,7 +37,7 @@ namespace RandomX {
void CompiledVirtualMachine::initialize() { void CompiledVirtualMachine::initialize() {
VirtualMachine::initialize(); VirtualMachine::initialize();
compiler.generateProgram(program); compiler.generateProgram(program, config);
mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize); mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
} }

View File

@ -116,6 +116,16 @@ namespace RandomX {
return scratchpad + addr; return scratchpad + addr;
} }
template<bool superscalar>
FORCE_INLINE __m128d InterpretedVirtualMachine<superscalar>::maskRegisterExponentMantissa(__m128d x) {
constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1;
const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64));
const __m128d exponentMask = _mm_load_pd((const double*)&config.eMask);
x = _mm_and_pd(x, mantissaMask);
x = _mm_or_pd(x, exponentMask);
return x;
}
template<bool superscalar> template<bool superscalar>
FORCE_INLINE void InterpretedVirtualMachine<superscalar>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { FORCE_INLINE void InterpretedVirtualMachine<superscalar>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
auto& ibc = byteCode[ic]; auto& ibc = byteCode[ic];
@ -229,7 +239,7 @@ namespace RandomX {
} break; } break;
case InstructionType::FDIV_M: { case InstructionType::FDIV_M: {
__m128d fsrc = ieee_set_exponent<-240>(load_cvt_i32x2(getScratchpadAddress(ibc))); __m128d fsrc = maskRegisterExponentMantissa(load_cvt_i32x2(getScratchpadAddress(ibc)));
*ibc.fdst = _mm_div_pd(*ibc.fdst, fsrc); *ibc.fdst = _mm_div_pd(*ibc.fdst, fsrc);
} break; } break;
@ -326,7 +336,7 @@ namespace RandomX {
uint32_t spAddr1 = mem.ma; uint32_t spAddr1 = mem.ma;
if (trace) { if (trace) {
std::cout << "execute (reg: r" << readReg0 << ", r" << readReg1 << ", r" << readReg2 << ", r" << readReg3 << ")" << std::endl; std::cout << "execute (reg: r" << config.readReg0 << ", r" << config.readReg1 << ", r" << config.readReg2 << ", r" << config.readReg3 << ")" << std::endl;
std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl; std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl; std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
printState(r, f, e, a); printState(r, f, e, a);
@ -334,7 +344,7 @@ namespace RandomX {
for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) { for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) {
//std::cout << "Iteration " << iter << std::endl; //std::cout << "Iteration " << iter << std::endl;
uint64_t spMix = r[readReg0] ^ r[readReg1]; uint64_t spMix = r[config.readReg0] ^ r[config.readReg1];
spAddr0 ^= spMix; spAddr0 ^= spMix;
spAddr0 &= ScratchpadL3Mask64; spAddr0 &= ScratchpadL3Mask64;
spAddr1 ^= spMix >> 32; spAddr1 ^= spMix >> 32;
@ -353,10 +363,10 @@ namespace RandomX {
f[1] = load_cvt_i32x2(scratchpad + spAddr1 + 8); f[1] = load_cvt_i32x2(scratchpad + spAddr1 + 8);
f[2] = load_cvt_i32x2(scratchpad + spAddr1 + 16); f[2] = load_cvt_i32x2(scratchpad + spAddr1 + 16);
f[3] = load_cvt_i32x2(scratchpad + spAddr1 + 24); f[3] = load_cvt_i32x2(scratchpad + spAddr1 + 24);
e[0] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 32)); e[0] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 32));
e[1] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 40)); e[1] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 40));
e[2] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 48)); e[2] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 48));
e[3] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 56)); e[3] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 56));
if (trace) { if (trace) {
std::cout << "iteration " << std::dec << ic << std::endl; std::cout << "iteration " << std::dec << ic << std::endl;
@ -368,7 +378,7 @@ namespace RandomX {
executeBytecode(r, f, e, a); executeBytecode(r, f, e, a);
mem.mx ^= r[readReg2] ^ r[readReg3]; mem.mx ^= r[config.readReg2] ^ r[config.readReg3];
mem.mx &= CacheLineAlignMask; mem.mx &= CacheLineAlignMask;
if (superscalar) { if (superscalar) {
executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r); executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r);

View File

@ -133,5 +133,6 @@ namespace RandomX {
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]); void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]);
void* getScratchpadAddress(InstructionByteCode& ibc); void* getScratchpadAddress(InstructionByteCode& ibc);
__m128d maskRegisterExponentMantissa(__m128d);
}; };
} }

View File

@ -24,6 +24,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "Program.hpp" #include "Program.hpp"
#include "reciprocal.h" #include "reciprocal.h"
#include "virtualMemory.hpp" #include "virtualMemory.hpp"
#include "intrinPortable.h"
#define RANDOMX_JUMP #define RANDOMX_JUMP
@ -230,20 +231,20 @@ namespace RandomX {
freePagedMemory(code, CodeSize); freePagedMemory(code, CodeSize);
} }
void JitCompilerX86::generateProgram(Program& prog) { void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
generateProgramPrologue(prog); generateProgramPrologue(prog, pcfg);
memcpy(code + codePos, codeReadDataset, readDatasetSize); memcpy(code + codePos, codeReadDataset, readDatasetSize);
codePos += readDatasetSize; codePos += readDatasetSize;
generateProgramEpilogue(prog); generateProgramEpilogue(prog);
} }
template<bool superscalar> template<bool superscalar>
void JitCompilerX86::generateProgramLight(Program& prog) { void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) {
if (RANDOMX_CACHE_ACCESSES != 8) if (RANDOMX_CACHE_ACCESSES != 8)
throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES"); throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES");
if (RANDOMX_ARGON_GROWTH != 0) if (RANDOMX_ARGON_GROWTH != 0)
throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH"); throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH");
generateProgramPrologue(prog); generateProgramPrologue(prog, pcfg);
if (superscalar) { if (superscalar) {
emit(codeReadDatasetLightSshInit, readDatasetLightInitSize); emit(codeReadDatasetLightSshInit, readDatasetLightInitSize);
emitByte(CALL); emitByte(CALL);
@ -259,8 +260,8 @@ namespace RandomX {
generateProgramEpilogue(prog); generateProgramEpilogue(prog);
} }
template void JitCompilerX86::generateProgramLight<true>(Program& prog); template void JitCompilerX86::generateProgramLight<true>(Program& prog, ProgramConfiguration& pcfg);
template void JitCompilerX86::generateProgramLight<false>(Program& prog); template void JitCompilerX86::generateProgramLight<false>(Program& prog, ProgramConfiguration& pcfg);
template<size_t N> template<size_t N>
void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[N]) { void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[N]) {
@ -298,33 +299,26 @@ namespace RandomX {
memcpy(code, codeDatasetInit, datasetInitSize); memcpy(code, codeDatasetInit, datasetInitSize);
} }
void JitCompilerX86::generateProgramPrologue(Program& prog) { void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
#ifdef RANDOMX_JUMP #ifdef RANDOMX_JUMP
instructionOffsets.clear(); instructionOffsets.clear();
for (unsigned i = 0; i < 8; ++i) { for (unsigned i = 0; i < 8; ++i) {
registerUsage[i] = -1; registerUsage[i] = -1;
} }
#endif #endif
auto addressRegisters = prog.getEntropy(12);
uint32_t readReg0 = 0 + (addressRegisters & 1);
addressRegisters >>= 1;
uint32_t readReg1 = 2 + (addressRegisters & 1);
addressRegisters >>= 1;
uint32_t readReg2 = 4 + (addressRegisters & 1);
addressRegisters >>= 1;
uint32_t readReg3 = 6 + (addressRegisters & 1);
codePos = prologueSize; codePos = prologueSize;
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
emit(REX_XOR_RAX_R64); emit(REX_XOR_RAX_R64);
emitByte(0xc0 + readReg0); emitByte(0xc0 + pcfg.readReg0);
emit(REX_XOR_RAX_R64); emit(REX_XOR_RAX_R64);
emitByte(0xc0 + readReg1); emitByte(0xc0 + pcfg.readReg1);
memcpy(code + codePos, codeLoopLoad, loopLoadSize); memcpy(code + codePos, codeLoopLoad, loopLoadSize);
codePos += loopLoadSize; codePos += loopLoadSize;
generateCode(prog); generateCode(prog);
emit(REX_MOV_RR); emit(REX_MOV_RR);
emitByte(0xc0 + readReg2); emitByte(0xc0 + pcfg.readReg2);
emit(REX_XOR_EAX); emit(REX_XOR_EAX);
emitByte(0xc0 + readReg3); emitByte(0xc0 + pcfg.readReg3);
} }
void JitCompilerX86::generateProgramEpilogue(Program& prog) { void JitCompilerX86::generateProgramEpilogue(Program& prog) {

View File

@ -27,6 +27,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX { namespace RandomX {
class Program; class Program;
class ProgramConfiguration;
class SuperscalarProgram; class SuperscalarProgram;
class JitCompilerX86; class JitCompilerX86;
@ -38,9 +39,9 @@ namespace RandomX {
public: public:
JitCompilerX86(); JitCompilerX86();
~JitCompilerX86(); ~JitCompilerX86();
void generateProgram(Program&); void generateProgram(Program&, ProgramConfiguration&);
template<bool superscalar> template<bool superscalar>
void generateProgramLight(Program&); void generateProgramLight(Program&, ProgramConfiguration&);
template<size_t N> template<size_t N>
void generateSuperScalarHash(SuperscalarProgram (&programs)[N]); void generateSuperScalarHash(SuperscalarProgram (&programs)[N]);
ProgramFunc getProgramFunc() { ProgramFunc getProgramFunc() {
@ -73,7 +74,7 @@ namespace RandomX {
void generateDatasetInitCode(); void generateDatasetInitCode();
void generateProgramPrologue(Program&); void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&); void generateProgramEpilogue(Program&);
int getConditionRegister(); int getConditionRegister();
void genAddressReg(Instruction&, bool); void genAddressReg(Instruction&, bool);

View File

@ -27,6 +27,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX { namespace RandomX {
struct ProgramConfiguration {
uint64_t eMask[2];
uint32_t readReg0, readReg1, readReg2, readReg3;
};
class Program { class Program {
public: public:
Instruction& operator()(int pc) { Instruction& operator()(int pc) {

View File

@ -77,14 +77,18 @@ namespace RandomX {
mem.ma = program.getEntropy(8) & CacheLineAlignMask; mem.ma = program.getEntropy(8) & CacheLineAlignMask;
mem.mx = program.getEntropy(10); mem.mx = program.getEntropy(10);
auto addressRegisters = program.getEntropy(12); auto addressRegisters = program.getEntropy(12);
readReg0 = 0 + (addressRegisters & 1); config.readReg0 = 0 + (addressRegisters & 1);
addressRegisters >>= 1; addressRegisters >>= 1;
readReg1 = 2 + (addressRegisters & 1); config.readReg1 = 2 + (addressRegisters & 1);
addressRegisters >>= 1; addressRegisters >>= 1;
readReg2 = 4 + (addressRegisters & 1); config.readReg2 = 4 + (addressRegisters & 1);
addressRegisters >>= 1; addressRegisters >>= 1;
readReg3 = 6 + (addressRegisters & 1); config.readReg3 = 6 + (addressRegisters & 1);
datasetBase = program.getEntropy(14) % datasetRange; datasetBase = program.getEntropy(13) % datasetRange;
constexpr uint64_t mask22bit = (1ULL << 22) - 1;
constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>();
store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240);
store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240);
} }
template<bool softAes> template<bool softAes>

View File

@ -46,9 +46,9 @@ namespace RandomX {
protected: protected:
alignas(64) Program program; alignas(64) Program program;
alignas(64) RegisterFile reg; alignas(64) RegisterFile reg;
alignas(16) ProgramConfiguration config;
MemoryRegisters mem; MemoryRegisters mem;
uint8_t* scratchpad; uint8_t* scratchpad;
uint32_t readReg0, readReg1, readReg2, readReg3;
uint32_t datasetRange; uint32_t datasetRange;
uint32_t datasetBase; uint32_t datasetBase;
}; };

View File

@ -311,6 +311,12 @@ inline __m128d load_cvt_i32x2(const void* addr) {
return _mm_cvtepi32_pd(ix); return _mm_cvtepi32_pd(ix);
} }
template<int E>
constexpr uint64_t ieee_get_exponent_mask() {
static_assert(E > -1023, "Invalid exponent value");
return (uint64_t)(E + 1023U) << 52;
}
template<int E> template<int E>
__m128d ieee_set_exponent(__m128d x) { __m128d ieee_set_exponent(__m128d x) {
static_assert(E > -1023, "Invalid exponent value"); static_assert(E > -1023, "Invalid exponent value");

View File

@ -396,7 +396,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: "; std::cout << "Calculated result: ";
result.print(std::cout); result.print(std::cout);
if(!legacy && programCount == 1000) if(!legacy && programCount == 1000)
std::cout << "Reference result: af72d8069bd95ef04b414d3a83772c7bd2df454940bad15ae0b48543aeef8ab2" << std::endl; std::cout << "Reference result: 630ad3bc7f44fe8386462d7b671fa2a1167d3e062bfb9a2967f64832760cfedb" << std::endl;
if (!miningMode) { if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl; std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl;
} }