diff --git a/src/assembly_generator_x86.cpp b/src/assembly_generator_x86.cpp index b9866d6..645fd9d 100644 --- a/src/assembly_generator_x86.cpp +++ b/src/assembly_generator_x86.cpp @@ -67,10 +67,12 @@ namespace randomx { void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) { asmCode.str(std::string()); //clear +#ifdef RANDOMX_ALIGN asmCode << "ALIGN 16" << std::endl; +#endif for (unsigned i = 0; i < prog.getSize(); ++i) { Instruction& instr = prog(i); - switch (instr.opcode) + switch ((SuperscalarInstructionType)instr.opcode) { case SuperscalarInstructionType::ISUB_R: asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; @@ -95,19 +97,27 @@ namespace randomx { break; case SuperscalarInstructionType::IADD_C8: asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; +#ifdef RANDOMX_ALIGN asmCode << "nop" << std::endl; +#endif break; case SuperscalarInstructionType::IXOR_C8: asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; +#ifdef RANDOMX_ALIGN asmCode << "nop" << std::endl; +#endif break; case SuperscalarInstructionType::IADD_C9: asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; +#ifdef RANDOMX_ALIGN asmCode << "xchg ax, ax ;nop" << std::endl; +#endif break; case SuperscalarInstructionType::IXOR_C9: asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; +#ifdef RANDOMX_ALIGN asmCode << "xchg ax, ax ;nop" << std::endl; +#endif break; case SuperscalarInstructionType::IMULH_R: asmCode << "mov rax, " << regR[instr.dst] << std::endl; @@ -179,7 +189,7 @@ namespace randomx { asmCode << "uint64_t r8 = r[0], r9 = r[1], r10 = r[2], r11 = r[3], r12 = r[4], r13 = r[5], r14 = r[6], r15 = r[7];" << std::endl; for (unsigned i = 0; i < prog.getSize(); ++i) { Instruction& instr = prog(i); - switch (instr.opcode) + switch ((SuperscalarInstructionType)instr.opcode) { case SuperscalarInstructionType::ISUB_R: asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl; diff --git a/src/dataset.cpp b/src/dataset.cpp index 193d49a..e382fd0 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -125,7 +125,7 @@ namespace randomx { randomx::generateSuperscalar(cache->programs[i], gen); for (unsigned j = 0; j < cache->programs[i].getSize(); ++j) { auto& instr = cache->programs[i](j); - if (instr.opcode == randomx::SuperscalarInstructionType::IMUL_RCP) { + if ((SuperscalarInstructionType)instr.opcode == SuperscalarInstructionType::IMUL_RCP) { auto rcp = randomx_reciprocal(instr.getImm32()); instr.setImm32(cache->reciprocalCache.size()); cache->reciprocalCache.push_back(rcp); diff --git a/src/instruction.hpp b/src/instruction.hpp index 44b2c48..b1863b5 100644 --- a/src/instruction.hpp +++ b/src/instruction.hpp @@ -39,38 +39,38 @@ namespace randomx { typedef void(Instruction::*InstructionFormatter)(std::ostream&) const; - namespace InstructionType { - constexpr int IADD_RS = 0; - constexpr int IADD_M = 1; - constexpr int ISUB_R = 2; - constexpr int ISUB_M = 3; - constexpr int IMUL_R = 4; - constexpr int IMUL_M = 5; - constexpr int IMULH_R = 6; - constexpr int IMULH_M = 7; - constexpr int ISMULH_R = 8; - constexpr int ISMULH_M = 9; - constexpr int IMUL_RCP = 10; - constexpr int INEG_R = 11; - constexpr int IXOR_R = 12; - constexpr int IXOR_M = 13; - constexpr int IROR_R = 14; - constexpr int IROL_R = 15; - constexpr int ISWAP_R = 16; - constexpr int FSWAP_R = 17; - constexpr int FADD_R = 18; - constexpr int FADD_M = 19; - constexpr int FSUB_R = 20; - constexpr int FSUB_M = 21; - constexpr int FSCAL_R = 22; - constexpr int FMUL_R = 23; - constexpr int FDIV_M = 24; - constexpr int FSQRT_R = 25; - constexpr int CBRANCH = 26; - constexpr int CFROUND = 27; - constexpr int ISTORE = 28; - constexpr int NOP = 29; - } + enum class InstructionType : uint16_t { + IADD_RS = 0, + IADD_M = 1, + ISUB_R = 2, + ISUB_M = 3, + IMUL_R = 4, + IMUL_M = 5, + IMULH_R = 6, + IMULH_M = 7, + ISMULH_R = 8, + ISMULH_M = 9, + IMUL_RCP = 10, + INEG_R = 11, + IXOR_R = 12, + IXOR_M = 13, + IROR_R = 14, + IROL_R = 15, + ISWAP_R = 16, + FSWAP_R = 17, + FADD_R = 18, + FADD_M = 19, + FSUB_R = 20, + FSUB_M = 21, + FSCAL_R = 22, + FMUL_R = 23, + FDIV_M = 24, + FSQRT_R = 25, + CBRANCH = 26, + CFROUND = 27, + ISTORE = 28, + NOP = 29, + }; class Instruction { public: diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index bb2ae76..054a171 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -306,7 +306,7 @@ namespace randomx { } void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector &reciprocalCache) { - switch (instr.opcode) + switch ((SuperscalarInstructionType)instr.opcode) { case randomx::SuperscalarInstructionType::ISUB_R: emit(REX_SUB_RR); diff --git a/src/superscalar.cpp b/src/superscalar.cpp index 73c7571..39d772f 100644 --- a/src/superscalar.cpp +++ b/src/superscalar.cpp @@ -40,7 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { - static bool isMultiplication(int type) { + static bool isMultiplication(SuperscalarInstructionType type) { return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP; } @@ -167,7 +167,7 @@ namespace randomx { const MacroOp& getOp(int index) const { return ops_[index]; } - int getType() const { + SuperscalarInstructionType getType() const { return type_; } int getResultOp() const { @@ -196,7 +196,7 @@ namespace randomx { static const SuperscalarInstructionInfo NOP; private: const char* name_; - int type_; + SuperscalarInstructionType type_; std::vector ops_; int latency_; int resultOp_ = 0; @@ -204,13 +204,13 @@ namespace randomx { int srcOp_; SuperscalarInstructionInfo(const char* name) - : name_(name), type_(-1), latency_(0) {} - SuperscalarInstructionInfo(const char* name, int type, const MacroOp& op, int srcOp) + : name_(name), type_(SuperscalarInstructionType::INVALID), latency_(0) {} + SuperscalarInstructionInfo(const char* name, SuperscalarInstructionType type, const MacroOp& op, int srcOp) : name_(name), type_(type), latency_(op.getLatency()), srcOp_(srcOp) { ops_.push_back(MacroOp(op)); } template - SuperscalarInstructionInfo(const char* name, int type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp) + SuperscalarInstructionInfo(const char* name, SuperscalarInstructionType type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp) : name_(name), type_(type), latency_(0), resultOp_(resultOp), dstOp_(dstOp), srcOp_(srcOp) { for (unsigned i = 0; i < N; ++i) { ops_.push_back(MacroOp(arr[i])); @@ -267,7 +267,7 @@ namespace randomx { const char* getName() const { return name_; } - const DecoderBuffer* fetchNext(int instrType, int cycle, int mulCount, Blake2Generator& gen) const { + const DecoderBuffer* fetchNext(SuperscalarInstructionType instrType, int cycle, int mulCount, Blake2Generator& gen) const { //If the current RandomX instruction is "IMULH", the next fetch configuration must be 3-3-10 //because the full 128-bit multiplication instruction is 3 bytes long and decodes to 2 uOPs on Intel CPUs. //Intel CPUs can decode at most 4 uOPs per cycle, so this requires a 2-1-1 configuration for a total of 3 macro ops. @@ -345,9 +345,9 @@ namespace randomx { class RegisterInfo { public: - RegisterInfo() : latency(0), lastOpGroup(-1), lastOpPar(-1), value(0) {} + RegisterInfo() : latency(0), lastOpGroup(SuperscalarInstructionType::INVALID), lastOpPar(-1), value(0) {} int latency; - int lastOpGroup; + SuperscalarInstructionType lastOpGroup; int lastOpPar; int value; }; @@ -356,7 +356,7 @@ namespace randomx { class SuperscalarInstruction { public: void toInstr(Instruction& instr) { //translate to a RandomX instruction format - instr.opcode = getType(); + instr.opcode = (int)getType(); instr.dst = dst_; instr.src = src_ >= 0 ? src_ : dst_; instr.setMod(mod_); @@ -534,7 +534,7 @@ namespace randomx { return false; } - int getType() { + SuperscalarInstructionType getType() { return info_->getType(); } int getSource() { @@ -543,7 +543,7 @@ namespace randomx { int getDestination() { return dst_; } - int getGroup() { + SuperscalarInstructionType getGroup() { return opGroup_; } int getGroupPar() { @@ -562,7 +562,7 @@ namespace randomx { int dst_ = -1; int mod_; uint32_t imm32_; - int opGroup_; + SuperscalarInstructionType opGroup_; int opGroupPar_; bool canReuse_ = false; bool groupParIsSource_ = false; @@ -849,40 +849,40 @@ namespace randomx { void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector *reciprocals) { for (unsigned j = 0; j < prog.getSize(); ++j) { Instruction& instr = prog(j); - switch (instr.opcode) + switch ((SuperscalarInstructionType)instr.opcode) { - case randomx::SuperscalarInstructionType::ISUB_R: + case SuperscalarInstructionType::ISUB_R: r[instr.dst] -= r[instr.src]; break; - case randomx::SuperscalarInstructionType::IXOR_R: + case SuperscalarInstructionType::IXOR_R: r[instr.dst] ^= r[instr.src]; break; - case randomx::SuperscalarInstructionType::IADD_RS: + case SuperscalarInstructionType::IADD_RS: r[instr.dst] += r[instr.src] << instr.getModShift(); break; - case randomx::SuperscalarInstructionType::IMUL_R: + case SuperscalarInstructionType::IMUL_R: r[instr.dst] *= r[instr.src]; break; - case randomx::SuperscalarInstructionType::IROR_C: + case SuperscalarInstructionType::IROR_C: r[instr.dst] = rotr(r[instr.dst], instr.getImm32()); break; - case randomx::SuperscalarInstructionType::IADD_C7: - case randomx::SuperscalarInstructionType::IADD_C8: - case randomx::SuperscalarInstructionType::IADD_C9: + case SuperscalarInstructionType::IADD_C7: + case SuperscalarInstructionType::IADD_C8: + case SuperscalarInstructionType::IADD_C9: r[instr.dst] += signExtend2sCompl(instr.getImm32()); break; - case randomx::SuperscalarInstructionType::IXOR_C7: - case randomx::SuperscalarInstructionType::IXOR_C8: - case randomx::SuperscalarInstructionType::IXOR_C9: + case SuperscalarInstructionType::IXOR_C7: + case SuperscalarInstructionType::IXOR_C8: + case SuperscalarInstructionType::IXOR_C9: r[instr.dst] ^= signExtend2sCompl(instr.getImm32()); break; - case randomx::SuperscalarInstructionType::IMULH_R: + case SuperscalarInstructionType::IMULH_R: r[instr.dst] = mulh(r[instr.dst], r[instr.src]); break; - case randomx::SuperscalarInstructionType::ISMULH_R: + case SuperscalarInstructionType::ISMULH_R: r[instr.dst] = smulh(r[instr.dst], r[instr.src]); break; - case randomx::SuperscalarInstructionType::IMUL_RCP: + case SuperscalarInstructionType::IMUL_RCP: if (reciprocals != nullptr) r[instr.dst] *= (*reciprocals)[instr.getImm32()]; else diff --git a/src/superscalar.hpp b/src/superscalar.hpp index 96360aa..2e55533 100644 --- a/src/superscalar.hpp +++ b/src/superscalar.hpp @@ -35,25 +35,24 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { // Intel Ivy Bridge reference - namespace SuperscalarInstructionType { //uOPs (decode) execution ports latency code size - constexpr int ISUB_R = 0; //1 p015 1 3 (sub) - constexpr int IXOR_R = 1; //1 p015 1 3 (xor) - constexpr int IADD_RS = 2; //1 p01 1 4 (lea) - constexpr int IMUL_R = 3; //1 p1 3 4 (imul) - constexpr int IROR_C = 4; //1 p05 1 4 (ror) - constexpr int IADD_C7 = 5; //1 p015 1 7 (add) - constexpr int IXOR_C7 = 6; //1 p015 1 7 (xor) - constexpr int IADD_C8 = 7; //1+0 p015 1 7+1 (add+nop) - constexpr int IXOR_C8 = 8; //1+0 p015 1 7+1 (xor+nop) - constexpr int IADD_C9 = 9; //1+0 p015 1 7+2 (add+nop) - constexpr int IXOR_C9 = 10; //1+0 p015 1 7+2 (xor+nop) - constexpr int IMULH_R = 11; //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+mul+mov) - constexpr int ISMULH_R = 12; //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov) - constexpr int IMUL_RCP = 13; //1+1 p015+p1 4 10+4 (mov+imul) + enum class SuperscalarInstructionType { //uOPs (decode) execution ports latency code size + ISUB_R = 0, //1 p015 1 3 (sub) + IXOR_R = 1, //1 p015 1 3 (xor) + IADD_RS = 2, //1 p01 1 4 (lea) + IMUL_R = 3, //1 p1 3 4 (imul) + IROR_C = 4, //1 p05 1 4 (ror) + IADD_C7 = 5, //1 p015 1 7 (add) + IXOR_C7 = 6, //1 p015 1 7 (xor) + IADD_C8 = 7, //1+0 p015 1 7+1 (add+nop) + IXOR_C8 = 8, //1+0 p015 1 7+1 (xor+nop) + IADD_C9 = 9, //1+0 p015 1 7+2 (add+nop) + IXOR_C9 = 10, //1+0 p015 1 7+2 (xor+nop) + IMULH_R = 11, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+mul+mov) + ISMULH_R = 12, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov) + IMUL_RCP = 13, //1+1 p015+p1 4 10+4 (mov+imul) - constexpr int COUNT = 14; - constexpr int INVALID = -1; - } + INVALID = -1 + }; void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen); void executeSuperscalar(uint64_t(&r)[8], SuperscalarProgram& prog, std::vector *reciprocals = nullptr); diff --git a/src/vm_interpreted.hpp b/src/vm_interpreted.hpp index dfa1ba7..25795a6 100644 --- a/src/vm_interpreted.hpp +++ b/src/vm_interpreted.hpp @@ -50,7 +50,7 @@ namespace randomx { uint64_t imm; int64_t simm; }; - uint16_t type; + InstructionType type; union { int16_t target; uint16_t shift; @@ -58,6 +58,8 @@ namespace randomx { uint32_t memMask; }; + static_assert(sizeof(InstructionByteCode) == 32, "Invalid packing of struct InstructionByteCode"); + template class InterpretedVm : public VmBase { public: