483 lines
13 KiB
C
483 lines
13 KiB
C
/*
|
|
* Copyright © 2021 Google, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/mman.h>
|
|
#include <unistd.h>
|
|
|
|
#include "util/u_math.h"
|
|
|
|
#include "freedreno_pm4.h"
|
|
|
|
#include "emu.h"
|
|
#include "util.h"
|
|
|
|
#define rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
|
|
#define rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
|
|
|
|
/**
|
|
* AFUC emulator. Currently only supports a6xx
|
|
*
|
|
* TODO to add a5xx it might be easier to compile this multiple times
|
|
* with conditional compile to deal with differences between generations.
|
|
*/
|
|
|
|
static uint32_t
|
|
emu_alu(struct emu *emu, afuc_opc opc, uint32_t src1, uint32_t src2)
|
|
{
|
|
uint64_t tmp;
|
|
switch (opc) {
|
|
case OPC_ADD:
|
|
tmp = (uint64_t)src1 + (uint64_t)src2;
|
|
emu->carry = tmp >> 32;
|
|
return (uint32_t)tmp;
|
|
case OPC_ADDHI:
|
|
return src1 + src2 + emu->carry;
|
|
case OPC_SUB:
|
|
tmp = (uint64_t)src1 - (uint64_t)src2;
|
|
emu->carry = tmp >> 32;
|
|
return (uint32_t)tmp;
|
|
case OPC_SUBHI:
|
|
return src1 - src2 + emu->carry;
|
|
case OPC_AND:
|
|
return src1 & src2;
|
|
case OPC_OR:
|
|
return src1 | src2;
|
|
case OPC_XOR:
|
|
return src1 ^ src2;
|
|
case OPC_NOT:
|
|
return ~src1;
|
|
case OPC_SHL:
|
|
return src1 << src2;
|
|
case OPC_USHR:
|
|
return src1 >> src2;
|
|
case OPC_ISHR:
|
|
return (int32_t)src1 >> src2;
|
|
case OPC_ROT:
|
|
if (src2 & 0x80000000)
|
|
return rotl64(src1, -*(int32_t *)&src2);
|
|
else
|
|
return rotl32(src1, src2);
|
|
case OPC_MUL8:
|
|
return (src1 & 0xff) * (src2 & 0xff);
|
|
case OPC_MIN:
|
|
return MIN2(src1, src2);
|
|
case OPC_MAX:
|
|
return MAX2(src1, src2);
|
|
case OPC_CMP:
|
|
if (src1 > src2)
|
|
return 0x00;
|
|
else if (src1 == src2)
|
|
return 0x2b;
|
|
return 0x1e;
|
|
case OPC_MSB:
|
|
if (!src2)
|
|
return 0;
|
|
return util_last_bit(src2) - 1;
|
|
default:
|
|
printf("unhandled alu opc: 0x%02x\n", opc);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Helper to calculate load/store address based on LOAD_STORE_HI
|
|
*/
|
|
static uintptr_t
|
|
load_store_addr(struct emu *emu, unsigned gpr)
|
|
{
|
|
EMU_CONTROL_REG(LOAD_STORE_HI);
|
|
|
|
uintptr_t addr = emu_get_reg32(emu, &LOAD_STORE_HI);
|
|
addr <<= 32;
|
|
|
|
return addr + emu_get_gpr_reg(emu, gpr);
|
|
}
|
|
|
|
static void
|
|
emu_instr(struct emu *emu, afuc_instr *instr)
|
|
{
|
|
uint32_t rem = emu_get_gpr_reg(emu, REG_REM);
|
|
afuc_opc opc;
|
|
bool rep;
|
|
|
|
afuc_get_opc(instr, &opc, &rep);
|
|
|
|
switch (opc) {
|
|
case OPC_NOP:
|
|
break;
|
|
case OPC_ADD ... OPC_CMP: {
|
|
uint32_t val = emu_alu(emu, opc,
|
|
emu_get_gpr_reg(emu, instr->alui.src),
|
|
instr->alui.uimm);
|
|
emu_set_gpr_reg(emu, instr->alui.dst, val);
|
|
break;
|
|
}
|
|
case OPC_MOVI: {
|
|
uint32_t val = instr->movi.uimm << instr->movi.shift;
|
|
emu_set_gpr_reg(emu, instr->movi.dst, val);
|
|
break;
|
|
}
|
|
case OPC_ALU: {
|
|
uint32_t val = emu_alu(emu, instr->alu.alu,
|
|
emu_get_gpr_reg(emu, instr->alu.src1),
|
|
emu_get_gpr_reg(emu, instr->alu.src2));
|
|
emu_set_gpr_reg(emu, instr->alu.dst, val);
|
|
|
|
if (instr->alu.xmov) {
|
|
unsigned m = MIN2(instr->alu.xmov, rem);
|
|
|
|
assert(m <= 3);
|
|
|
|
if (m == 1) {
|
|
emu_set_gpr_reg(emu, REG_REM, --rem);
|
|
emu_dump_state_change(emu);
|
|
emu_set_gpr_reg(emu, REG_DATA,
|
|
emu_get_gpr_reg(emu, instr->alu.src2));
|
|
} else if (m == 2) {
|
|
emu_set_gpr_reg(emu, REG_REM, --rem);
|
|
emu_dump_state_change(emu);
|
|
emu_set_gpr_reg(emu, REG_DATA,
|
|
emu_get_gpr_reg(emu, instr->alu.src2));
|
|
emu_set_gpr_reg(emu, REG_REM, --rem);
|
|
emu_dump_state_change(emu);
|
|
emu_set_gpr_reg(emu, REG_DATA,
|
|
emu_get_gpr_reg(emu, instr->alu.src2));
|
|
} else if (m == 3) {
|
|
emu_set_gpr_reg(emu, REG_REM, --rem);
|
|
emu_dump_state_change(emu);
|
|
emu_set_gpr_reg(emu, REG_DATA,
|
|
emu_get_gpr_reg(emu, instr->alu.src2));
|
|
emu_set_gpr_reg(emu, REG_REM, --rem);
|
|
emu_dump_state_change(emu);
|
|
emu_set_gpr_reg(emu, instr->alu.dst,
|
|
emu_get_gpr_reg(emu, instr->alu.src2));
|
|
emu_set_gpr_reg(emu, REG_REM, --rem);
|
|
emu_dump_state_change(emu);
|
|
emu_set_gpr_reg(emu, REG_DATA,
|
|
emu_get_gpr_reg(emu, instr->alu.src2));
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case OPC_CWRITE6: {
|
|
uint32_t src1 = emu_get_gpr_reg(emu, instr->control.src1);
|
|
uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2);
|
|
|
|
if (instr->control.flags == 0x4) {
|
|
emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm);
|
|
} else if (instr->control.flags && !emu->quiet) {
|
|
printf("unhandled flags: %x\n", instr->control.flags);
|
|
}
|
|
|
|
emu_set_control_reg(emu, src2 + instr->control.uimm, src1);
|
|
break;
|
|
}
|
|
case OPC_CREAD6: {
|
|
uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2);
|
|
|
|
if (instr->control.flags == 0x4) {
|
|
emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm);
|
|
} else if (instr->control.flags && !emu->quiet) {
|
|
printf("unhandled flags: %x\n", instr->control.flags);
|
|
}
|
|
|
|
emu_set_gpr_reg(emu, instr->control.src1,
|
|
emu_get_control_reg(emu, src2 + instr->control.uimm));
|
|
break;
|
|
}
|
|
case OPC_LOAD6: {
|
|
uintptr_t addr = load_store_addr(emu, instr->control.src2) +
|
|
instr->control.uimm;
|
|
|
|
if (instr->control.flags == 0x4) {
|
|
uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2);
|
|
emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm);
|
|
} else if (instr->control.flags && !emu->quiet) {
|
|
printf("unhandled flags: %x\n", instr->control.flags);
|
|
}
|
|
|
|
uint32_t val = emu_mem_read_dword(emu, addr);
|
|
|
|
emu_set_gpr_reg(emu, instr->control.src1, val);
|
|
|
|
break;
|
|
}
|
|
case OPC_STORE6: {
|
|
uintptr_t addr = load_store_addr(emu, instr->control.src2) +
|
|
instr->control.uimm;
|
|
|
|
if (instr->control.flags == 0x4) {
|
|
uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2);
|
|
emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm);
|
|
} else if (instr->control.flags && !emu->quiet) {
|
|
printf("unhandled flags: %x\n", instr->control.flags);
|
|
}
|
|
|
|
uint32_t val = emu_get_gpr_reg(emu, instr->control.src1);
|
|
|
|
emu_mem_write_dword(emu, addr, val);
|
|
|
|
break;
|
|
}
|
|
case OPC_BRNEI ... OPC_BREQB: {
|
|
uint32_t off = emu->gpr_regs.pc + instr->br.ioff;
|
|
uint32_t src = emu_get_gpr_reg(emu, instr->br.src);
|
|
|
|
if (opc == OPC_BRNEI) {
|
|
if (src != instr->br.bit_or_imm)
|
|
emu->branch_target = off;
|
|
} else if (opc == OPC_BREQI) {
|
|
if (src == instr->br.bit_or_imm)
|
|
emu->branch_target = off;
|
|
} else if (opc == OPC_BRNEB) {
|
|
if (!(src & (1 << instr->br.bit_or_imm)))
|
|
emu->branch_target = off;
|
|
} else if (opc == OPC_BREQB) {
|
|
if (src & (1 << instr->br.bit_or_imm))
|
|
emu->branch_target = off;
|
|
} else {
|
|
assert(0);
|
|
}
|
|
break;
|
|
}
|
|
case OPC_RET: {
|
|
assert(emu->call_stack_idx > 0);
|
|
|
|
/* counter-part to 'call' instruction, also has a delay slot: */
|
|
emu->branch_target = emu->call_stack[--emu->call_stack_idx];
|
|
|
|
break;
|
|
}
|
|
case OPC_CALL: {
|
|
assert(emu->call_stack_idx < ARRAY_SIZE(emu->call_stack));
|
|
|
|
/* call looks to have same delay-slot behavior as branch/etc, so
|
|
* presumably the return PC is two instructions later:
|
|
*/
|
|
emu->call_stack[emu->call_stack_idx++] = emu->gpr_regs.pc + 2;
|
|
emu->branch_target = instr->call.uoff;
|
|
|
|
break;
|
|
}
|
|
case OPC_WIN: {
|
|
assert(!emu->branch_target);
|
|
emu->run_mode = false;
|
|
emu->waitin = true;
|
|
break;
|
|
}
|
|
/* OPC_PREEMPTLEAVE6 */
|
|
case OPC_SETSECURE: {
|
|
// TODO this acts like a conditional branch, but in which case
|
|
// does it branch?
|
|
break;
|
|
}
|
|
default:
|
|
printf("unhandled opc: 0x%02x\n", opc);
|
|
exit(1);
|
|
}
|
|
|
|
if (rep) {
|
|
assert(rem > 0);
|
|
emu_set_gpr_reg(emu, REG_REM, --rem);
|
|
}
|
|
}
|
|
|
|
void
|
|
emu_step(struct emu *emu)
|
|
{
|
|
afuc_instr *instr = (void *)&emu->instrs[emu->gpr_regs.pc];
|
|
afuc_opc opc;
|
|
bool rep;
|
|
|
|
emu_main_prompt(emu);
|
|
|
|
uint32_t branch_target = emu->branch_target;
|
|
emu->branch_target = 0;
|
|
|
|
bool waitin = emu->waitin;
|
|
emu->waitin = false;
|
|
|
|
afuc_get_opc(instr, &opc, &rep);
|
|
|
|
if (rep) {
|
|
do {
|
|
if (!emu_get_gpr_reg(emu, REG_REM))
|
|
break;
|
|
|
|
emu_clear_state_change(emu);
|
|
emu_instr(emu, instr);
|
|
|
|
/* defer last state-change dump until after any
|
|
* post-delay-slot handling below:
|
|
*/
|
|
if (emu_get_gpr_reg(emu, REG_REM))
|
|
emu_dump_state_change(emu);
|
|
} while (true);
|
|
} else {
|
|
emu_clear_state_change(emu);
|
|
emu_instr(emu, instr);
|
|
}
|
|
|
|
emu->gpr_regs.pc++;
|
|
|
|
if (branch_target) {
|
|
emu->gpr_regs.pc = branch_target;
|
|
}
|
|
|
|
if (waitin) {
|
|
uint32_t hdr = emu_get_gpr_reg(emu, 1);
|
|
uint32_t id, count;
|
|
|
|
if (pkt_is_type4(hdr)) {
|
|
id = afuc_pm4_id("PKT4");
|
|
count = type4_pkt_size(hdr);
|
|
|
|
/* Possibly a hack, not sure what the hw actually
|
|
* does here, but we want to mask out the pkt
|
|
* type field from the hdr, so that PKT4 handler
|
|
* doesn't see it and interpret it as part as the
|
|
* register offset:
|
|
*/
|
|
emu->gpr_regs.val[1] &= 0x0fffffff;
|
|
} else if (pkt_is_type7(hdr)) {
|
|
id = cp_type7_opcode(hdr);
|
|
count = type7_pkt_size(hdr);
|
|
} else {
|
|
printf("Invalid opcode: 0x%08x\n", hdr);
|
|
exit(1); /* GPU goes *boom* */
|
|
}
|
|
|
|
assert(id < ARRAY_SIZE(emu->jmptbl));
|
|
|
|
emu_set_gpr_reg(emu, REG_REM, count);
|
|
emu->gpr_regs.pc = emu->jmptbl[id];
|
|
}
|
|
|
|
emu_dump_state_change(emu);
|
|
}
|
|
|
|
void
|
|
emu_run_bootstrap(struct emu *emu)
|
|
{
|
|
EMU_CONTROL_REG(PACKET_TABLE_WRITE_ADDR);
|
|
|
|
emu->quiet = true;
|
|
emu->run_mode = true;
|
|
|
|
while (emu_get_reg32(emu, &PACKET_TABLE_WRITE_ADDR) < 0x80) {
|
|
emu_step(emu);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
check_access(struct emu *emu, uintptr_t gpuaddr, unsigned sz)
|
|
{
|
|
if ((gpuaddr % sz) != 0) {
|
|
printf("unaligned access fault: %p\n", (void *)gpuaddr);
|
|
exit(1);
|
|
}
|
|
|
|
if ((gpuaddr + sz) >= EMU_MEMORY_SIZE) {
|
|
printf("iova fault: %p\n", (void *)gpuaddr);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
uint32_t
|
|
emu_mem_read_dword(struct emu *emu, uintptr_t gpuaddr)
|
|
{
|
|
check_access(emu, gpuaddr, 4);
|
|
return *(uint32_t *)(emu->gpumem + gpuaddr);
|
|
}
|
|
|
|
static void
|
|
mem_write_dword(struct emu *emu, uintptr_t gpuaddr, uint32_t val)
|
|
{
|
|
check_access(emu, gpuaddr, 4);
|
|
*(uint32_t *)(emu->gpumem + gpuaddr) = val;
|
|
}
|
|
|
|
void
|
|
emu_mem_write_dword(struct emu *emu, uintptr_t gpuaddr, uint32_t val)
|
|
{
|
|
mem_write_dword(emu, gpuaddr, val);
|
|
assert(emu->gpumem_written == ~0);
|
|
emu->gpumem_written = gpuaddr;
|
|
}
|
|
|
|
void
|
|
emu_init(struct emu *emu)
|
|
{
|
|
emu->gpumem = mmap(NULL, EMU_MEMORY_SIZE,
|
|
PROT_READ | PROT_WRITE,
|
|
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE,
|
|
0, 0);
|
|
if (emu->gpumem == MAP_FAILED) {
|
|
printf("Could not allocate GPU memory: %s\n", strerror(errno));
|
|
exit(1);
|
|
}
|
|
|
|
/* Copy the instructions into GPU memory: */
|
|
for (unsigned i = 0; i < emu->sizedwords; i++) {
|
|
mem_write_dword(emu, EMU_INSTR_BASE + (4 * i), emu->instrs[i]);
|
|
}
|
|
|
|
EMU_GPU_REG(CP_SQE_INSTR_BASE);
|
|
EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE);
|
|
|
|
/* Setup the address of the SQE fw, just use the normal CPU ptr address: */
|
|
if (emu->lpac) {
|
|
emu_set_reg64(emu, &CP_LPAC_SQE_INSTR_BASE, EMU_INSTR_BASE);
|
|
} else {
|
|
emu_set_reg64(emu, &CP_SQE_INSTR_BASE, EMU_INSTR_BASE);
|
|
}
|
|
|
|
if (emu->gpu_id == 660) {
|
|
emu_set_control_reg(emu, 0, 3 << 28);
|
|
} else if (emu->gpu_id == 650) {
|
|
emu_set_control_reg(emu, 0, 1 << 28);
|
|
}
|
|
}
|
|
|
|
void
|
|
emu_fini(struct emu *emu)
|
|
{
|
|
uint32_t *instrs = emu->instrs;
|
|
unsigned sizedwords = emu->sizedwords;
|
|
unsigned gpu_id = emu->gpu_id;
|
|
|
|
munmap(emu->gpumem, EMU_MEMORY_SIZE);
|
|
memset(emu, 0, sizeof(*emu));
|
|
|
|
emu->instrs = instrs;
|
|
emu->sizedwords = sizedwords;
|
|
emu->gpu_id = gpu_id;
|
|
}
|