553 lines
18 KiB
C
553 lines
18 KiB
C
/*
|
|
* Copyright © 2014-2017 Broadcom
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
/**
|
|
* @file v3dx_simulator.c
|
|
*
|
|
* Implements the actual HW interaction betweeh the GL driver's V3D simulator and the simulator.
|
|
*
|
|
* The register headers between V3D versions will have conflicting defines, so
|
|
* all register interactions appear in this file and are compiled per V3D version
|
|
* we support.
|
|
*/
|
|
|
|
#ifdef USE_V3D_SIMULATOR
|
|
|
|
#include <assert.h>
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
|
|
#include "v3d_simulator.h"
|
|
#include "v3d_simulator_wrapper.h"
|
|
|
|
#include "util/macros.h"
|
|
#include "util/bitscan.h"
|
|
#include "drm-uapi/v3d_drm.h"
|
|
|
|
#define HW_REGISTER_RO(x) (x)
|
|
#define HW_REGISTER_RW(x) (x)
|
|
#if V3D_VERSION >= 41
|
|
#include "libs/core/v3d/registers/4.1.35.0/v3d.h"
|
|
#else
|
|
#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
|
|
#endif
|
|
|
|
#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
|
|
#define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
|
|
|
|
static void
|
|
v3d_invalidate_l3(struct v3d_hw *v3d)
|
|
{
|
|
#if V3D_VERSION < 40
|
|
uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
|
|
|
|
V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
|
|
V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
|
|
#endif
|
|
}
|
|
|
|
/* Invalidates the L2C cache. This is a read-only cache for uniforms and instructions. */
|
|
static void
|
|
v3d_invalidate_l2c(struct v3d_hw *v3d)
|
|
{
|
|
if (V3D_VERSION >= 33)
|
|
return;
|
|
|
|
V3D_WRITE(V3D_CTL_0_L2CACTL,
|
|
V3D_CTL_0_L2CACTL_L2CCLR_SET |
|
|
V3D_CTL_0_L2CACTL_L2CENA_SET);
|
|
}
|
|
|
|
enum v3d_l2t_cache_flush_mode {
|
|
V3D_CACHE_FLUSH_MODE_FLUSH,
|
|
V3D_CACHE_FLUSH_MODE_CLEAR,
|
|
V3D_CACHE_FLUSH_MODE_CLEAN,
|
|
};
|
|
|
|
/* Invalidates texture L2 cachelines */
|
|
static void
|
|
v3d_invalidate_l2t(struct v3d_hw *v3d)
|
|
{
|
|
V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
|
|
V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
|
|
V3D_WRITE(V3D_CTL_0_L2TCACTL,
|
|
V3D_CTL_0_L2TCACTL_L2TFLS_SET |
|
|
(V3D_CACHE_FLUSH_MODE_FLUSH << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
|
|
}
|
|
|
|
/*
|
|
* Wait for l2tcactl, used for flushes.
|
|
*
|
|
* FIXME: for a multicore scenario we should pass here the core. All wrapper
|
|
* assumes just one core, so would be better to handle that on that case.
|
|
*/
|
|
static UNUSED void v3d_core_wait_l2tcactl(struct v3d_hw *v3d,
|
|
uint32_t ctrl)
|
|
{
|
|
assert(!(ctrl & ~(V3D_CTL_0_L2TCACTL_TMUWCF_SET | V3D_CTL_0_L2TCACTL_L2TFLS_SET)));
|
|
|
|
while (V3D_READ(V3D_CTL_0_L2TCACTL) & ctrl) {
|
|
v3d_hw_tick(v3d);
|
|
}
|
|
}
|
|
|
|
/* Flushes dirty texture cachelines from the L1 write combiner */
|
|
static void
|
|
v3d_flush_l1td(struct v3d_hw *v3d)
|
|
{
|
|
V3D_WRITE(V3D_CTL_0_L2TCACTL,
|
|
V3D_CTL_0_L2TCACTL_TMUWCF_SET);
|
|
|
|
/* Note: here the kernel (and previous versions of the simulator
|
|
* wrapper) is using V3D_CTL_0_L2TCACTL_L2TFLS_SET, as with l2t. We
|
|
* understand that it makes more sense to do like this. We need to
|
|
* confirm which one is doing it correctly. So far things work fine on
|
|
* the simulator this way.
|
|
*/
|
|
v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_TMUWCF_SET);
|
|
}
|
|
|
|
/* Flushes dirty texture L2 cachelines */
|
|
static void
|
|
v3d_flush_l2t(struct v3d_hw *v3d)
|
|
{
|
|
V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
|
|
V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
|
|
V3D_WRITE(V3D_CTL_0_L2TCACTL,
|
|
V3D_CTL_0_L2TCACTL_L2TFLS_SET |
|
|
(V3D_CACHE_FLUSH_MODE_CLEAN << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
|
|
|
|
v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_L2TFLS_SET);
|
|
}
|
|
|
|
/* Invalidates the slice caches. These are read-only caches. */
|
|
static void
|
|
v3d_invalidate_slices(struct v3d_hw *v3d)
|
|
{
|
|
V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
|
|
}
|
|
|
|
static void
|
|
v3d_invalidate_caches(struct v3d_hw *v3d)
|
|
{
|
|
v3d_invalidate_l3(v3d);
|
|
v3d_invalidate_l2c(v3d);
|
|
v3d_invalidate_l2t(v3d);
|
|
v3d_invalidate_slices(v3d);
|
|
}
|
|
|
|
static uint32_t g_gmp_ofs;
|
|
static void
|
|
v3d_reload_gmp(struct v3d_hw *v3d)
|
|
{
|
|
/* Completely reset the GMP. */
|
|
V3D_WRITE(V3D_GMP_CFG,
|
|
V3D_GMP_CFG_PROTENABLE_SET);
|
|
V3D_WRITE(V3D_GMP_TABLE_ADDR, g_gmp_ofs);
|
|
V3D_WRITE(V3D_GMP_CLEAR_LOAD, ~0);
|
|
while (V3D_READ(V3D_GMP_STATUS) &
|
|
V3D_GMP_STATUS_CFG_BUSY_SET) {
|
|
;
|
|
}
|
|
}
|
|
|
|
static UNUSED void
|
|
v3d_flush_caches(struct v3d_hw *v3d)
|
|
{
|
|
v3d_flush_l1td(v3d);
|
|
v3d_flush_l2t(v3d);
|
|
}
|
|
|
|
int
|
|
v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
|
|
struct drm_v3d_submit_tfu *args)
|
|
{
|
|
int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET;
|
|
|
|
V3D_WRITE(V3D_TFU_IIA, args->iia);
|
|
V3D_WRITE(V3D_TFU_IIS, args->iis);
|
|
V3D_WRITE(V3D_TFU_ICA, args->ica);
|
|
V3D_WRITE(V3D_TFU_IUA, args->iua);
|
|
V3D_WRITE(V3D_TFU_IOA, args->ioa);
|
|
V3D_WRITE(V3D_TFU_IOS, args->ios);
|
|
V3D_WRITE(V3D_TFU_COEF0, args->coef[0]);
|
|
V3D_WRITE(V3D_TFU_COEF1, args->coef[1]);
|
|
V3D_WRITE(V3D_TFU_COEF2, args->coef[2]);
|
|
V3D_WRITE(V3D_TFU_COEF3, args->coef[3]);
|
|
|
|
V3D_WRITE(V3D_TFU_ICFG, args->icfg);
|
|
|
|
while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) {
|
|
v3d_hw_tick(v3d);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#if V3D_VERSION >= 41
|
|
int
|
|
v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
|
|
struct drm_v3d_submit_csd *args,
|
|
uint32_t gmp_ofs)
|
|
{
|
|
int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) &
|
|
V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET);
|
|
g_gmp_ofs = gmp_ofs;
|
|
v3d_reload_gmp(v3d);
|
|
|
|
v3d_invalidate_caches(v3d);
|
|
|
|
V3D_WRITE(V3D_CSD_0_QUEUED_CFG1, args->cfg[1]);
|
|
V3D_WRITE(V3D_CSD_0_QUEUED_CFG2, args->cfg[2]);
|
|
V3D_WRITE(V3D_CSD_0_QUEUED_CFG3, args->cfg[3]);
|
|
V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
|
|
V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
|
|
V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
|
|
/* CFG0 kicks off the job */
|
|
V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
|
|
|
|
/* Now we wait for the dispatch to finish. The safest way is to check
|
|
* if NUM_COMPLETED_JOBS has increased. Note that in spite of that
|
|
* name that register field is about the number of completed
|
|
* dispatches.
|
|
*/
|
|
while ((V3D_READ(V3D_CSD_0_STATUS) &
|
|
V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET) == last_completed_jobs) {
|
|
v3d_hw_tick(v3d);
|
|
}
|
|
|
|
v3d_flush_caches(v3d);
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
int
|
|
v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
|
|
struct drm_v3d_get_param *args)
|
|
{
|
|
static const uint32_t reg_map[] = {
|
|
[DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
|
|
[DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
|
|
[DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
|
|
[DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
|
|
[DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
|
|
[DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
|
|
[DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
|
|
};
|
|
|
|
switch (args->param) {
|
|
case DRM_V3D_PARAM_SUPPORTS_TFU:
|
|
args->value = 1;
|
|
return 0;
|
|
case DRM_V3D_PARAM_SUPPORTS_CSD:
|
|
args->value = V3D_VERSION >= 41;
|
|
return 0;
|
|
case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
|
|
args->value = 1;
|
|
return 0;
|
|
case DRM_V3D_PARAM_SUPPORTS_PERFMON:
|
|
args->value = V3D_VERSION >= 41;
|
|
return 0;
|
|
case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
|
|
args->value = 1;
|
|
return 0;
|
|
}
|
|
|
|
if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
|
|
args->value = V3D_READ(reg_map[args->param]);
|
|
return 0;
|
|
}
|
|
|
|
fprintf(stderr, "Unknown DRM_IOCTL_V3D_GET_PARAM(%lld)\n",
|
|
(long long)args->value);
|
|
abort();
|
|
}
|
|
|
|
static struct v3d_hw *v3d_isr_hw;
|
|
|
|
|
|
static void
|
|
v3d_isr_core(struct v3d_hw *v3d,
|
|
unsigned core)
|
|
{
|
|
/* FIXME: so far we are assuming just one core, and using only the _0_
|
|
* registers. If we add multiple-core on the simulator, we would need
|
|
* to pass core as a parameter, and chose the proper registers.
|
|
*/
|
|
assert(core == 0);
|
|
uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
|
|
V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
|
|
|
|
if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
|
|
uint32_t size = 256 * 1024;
|
|
uint32_t offset = v3d_simulator_get_spill(size);
|
|
|
|
v3d_reload_gmp(v3d);
|
|
|
|
V3D_WRITE(V3D_PTB_0_BPOA, offset);
|
|
V3D_WRITE(V3D_PTB_0_BPOS, size);
|
|
return;
|
|
}
|
|
|
|
if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
|
|
fprintf(stderr, "GMP violation at 0x%08x\n",
|
|
V3D_READ(V3D_GMP_VIO_ADDR));
|
|
abort();
|
|
} else {
|
|
fprintf(stderr,
|
|
"Unexpected ISR with core status 0x%08x\n",
|
|
core_status);
|
|
}
|
|
abort();
|
|
}
|
|
|
|
static void
|
|
handle_mmu_interruptions(struct v3d_hw *v3d,
|
|
uint32_t hub_status)
|
|
{
|
|
bool wrv = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET;
|
|
bool pti = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET;
|
|
bool cap = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET;
|
|
|
|
if (!(pti || cap || wrv))
|
|
return;
|
|
|
|
const char *client = "?";
|
|
uint32_t axi_id = V3D_READ(V3D_MMU_VIO_ID);
|
|
uint32_t va_width = 30;
|
|
|
|
#if V3D_VERSION >= 41
|
|
static const char *const v3d41_axi_ids[] = {
|
|
"L2T",
|
|
"PTB",
|
|
"PSE",
|
|
"TLB",
|
|
"CLE",
|
|
"TFU",
|
|
"MMU",
|
|
"GMP",
|
|
};
|
|
|
|
axi_id = axi_id >> 5;
|
|
if (axi_id < ARRAY_SIZE(v3d41_axi_ids))
|
|
client = v3d41_axi_ids[axi_id];
|
|
|
|
uint32_t mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO);
|
|
|
|
va_width += ((mmu_debug & V3D_MMU_DEBUG_INFO_VA_WIDTH_SET)
|
|
>> V3D_MMU_DEBUG_INFO_VA_WIDTH_LSB);
|
|
#endif
|
|
/* Only the top bits (final number depends on the gen) of the virtual
|
|
* address are reported in the MMU VIO_ADDR register.
|
|
*/
|
|
uint64_t vio_addr = ((uint64_t)V3D_READ(V3D_MMU_VIO_ADDR) <<
|
|
(va_width - 32));
|
|
|
|
/* Difference with the kernal: here were are going to abort after
|
|
* logging, so we don't bother with some stuff that the kernel does,
|
|
* like restoring the MMU ctrl bits
|
|
*/
|
|
|
|
fprintf(stderr, "MMU error from client %s (%d) at 0x%llx%s%s%s\n",
|
|
client, axi_id, (long long) vio_addr,
|
|
wrv ? ", write violation" : "",
|
|
pti ? ", pte invalid" : "",
|
|
cap ? ", cap exceeded" : "");
|
|
|
|
abort();
|
|
}
|
|
|
|
static void
|
|
v3d_isr_hub(struct v3d_hw *v3d)
|
|
{
|
|
uint32_t hub_status = V3D_READ(V3D_HUB_CTL_INT_STS);
|
|
|
|
/* Acknowledge the interrupts we're handling here */
|
|
V3D_WRITE(V3D_HUB_CTL_INT_CLR, hub_status);
|
|
|
|
if (hub_status & V3D_HUB_CTL_INT_STS_INT_TFUC_SET) {
|
|
/* FIXME: we were not able to raise this exception. We let the
|
|
* unreachable here, so we could get one if it is raised on
|
|
* the future. In any case, note that for this case we would
|
|
* only be doing debugging log.
|
|
*/
|
|
unreachable("TFU Conversion Complete interrupt not handled");
|
|
}
|
|
|
|
handle_mmu_interruptions(v3d, hub_status);
|
|
}
|
|
|
|
static void
|
|
v3d_isr(uint32_t hub_status)
|
|
{
|
|
struct v3d_hw *v3d = v3d_isr_hw;
|
|
uint32_t mask = hub_status;
|
|
|
|
/* Check the hub_status bits */
|
|
while (mask) {
|
|
unsigned core = u_bit_scan(&mask);
|
|
|
|
if (core == v3d_hw_get_hub_core())
|
|
v3d_isr_hub(v3d);
|
|
else
|
|
v3d_isr_core(v3d, core);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
void
|
|
v3dX(simulator_init_regs)(struct v3d_hw *v3d)
|
|
{
|
|
#if V3D_VERSION == 33
|
|
/* Set OVRTMUOUT to match kernel behavior.
|
|
*
|
|
* This means that the texture sampler uniform configuration's tmu
|
|
* output type field is used, instead of using the hardware default
|
|
* behavior based on the texture type. If you want the default
|
|
* behavior, you can still put "2" in the indirect texture state's
|
|
* output_type field.
|
|
*/
|
|
V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
|
|
#endif
|
|
|
|
/* FIXME: the kernel captures some additional core interrupts here,
|
|
* for tracing. Perhaps we should evaluate to do the same here and add
|
|
* some debug options.
|
|
*/
|
|
uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
|
|
V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
|
|
V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
|
|
V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
|
|
|
|
uint32_t hub_interrupts =
|
|
(V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET | /* write violation */
|
|
V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET | /* page table invalid */
|
|
V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET | /* CAP exceeded */
|
|
V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */
|
|
|
|
V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);
|
|
V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);
|
|
|
|
v3d_isr_hw = v3d;
|
|
v3d_hw_set_isr(v3d, v3d_isr);
|
|
}
|
|
|
|
void
|
|
v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
|
|
struct drm_v3d_submit_cl *submit,
|
|
uint32_t gmp_ofs)
|
|
{
|
|
int last_bfc = (V3D_READ(V3D_CLE_0_BFC) &
|
|
V3D_CLE_0_BFC_BMFCT_SET);
|
|
|
|
int last_rfc = (V3D_READ(V3D_CLE_0_RFC) &
|
|
V3D_CLE_0_RFC_RMFCT_SET);
|
|
|
|
g_gmp_ofs = gmp_ofs;
|
|
v3d_reload_gmp(v3d);
|
|
|
|
v3d_invalidate_caches(v3d);
|
|
|
|
if (submit->qma) {
|
|
V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
|
|
V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
|
|
}
|
|
#if V3D_VERSION >= 41
|
|
if (submit->qts) {
|
|
V3D_WRITE(V3D_CLE_0_CT0QTS,
|
|
V3D_CLE_0_CT0QTS_CTQTSEN_SET |
|
|
submit->qts);
|
|
}
|
|
#endif
|
|
V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
|
|
V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
|
|
|
|
/* Wait for bin to complete before firing render. The kernel's
|
|
* scheduler implements this using the GPU scheduler blocking on the
|
|
* bin fence completing. (We don't use HW semaphores).
|
|
*/
|
|
while ((V3D_READ(V3D_CLE_0_BFC) &
|
|
V3D_CLE_0_BFC_BMFCT_SET) == last_bfc) {
|
|
v3d_hw_tick(v3d);
|
|
}
|
|
|
|
v3d_invalidate_caches(v3d);
|
|
|
|
V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
|
|
V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
|
|
|
|
while ((V3D_READ(V3D_CLE_0_RFC) &
|
|
V3D_CLE_0_RFC_RMFCT_SET) == last_rfc) {
|
|
v3d_hw_tick(v3d);
|
|
}
|
|
}
|
|
|
|
#if V3D_VERSION >= 41
|
|
#define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x))
|
|
#define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x))
|
|
#define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8)
|
|
#define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \
|
|
V3D_PCTR_0_SRC_N_SHIFT(x) + 6))
|
|
#endif
|
|
|
|
void
|
|
v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
|
|
uint32_t ncounters,
|
|
uint8_t *events)
|
|
{
|
|
#if V3D_VERSION >= 41
|
|
int i, j;
|
|
uint32_t source;
|
|
uint32_t mask = BITFIELD_RANGE(0, ncounters);
|
|
|
|
for (i = 0; i < ncounters; i+=4) {
|
|
source = i / 4;
|
|
uint32_t channels = 0;
|
|
for (j = 0; j < 4 && (i + j) < ncounters; j++)
|
|
channels |= events[i + j] << V3D_PCTR_0_SRC_N_SHIFT(j);
|
|
V3D_WRITE(V3D_PCTR_0_SRC_N(source), channels);
|
|
}
|
|
V3D_WRITE(V3D_PCTR_0_CLR, mask);
|
|
V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask);
|
|
V3D_WRITE(V3D_PCTR_0_EN, mask);
|
|
#endif
|
|
}
|
|
|
|
void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
|
|
uint32_t ncounters,
|
|
uint64_t *values)
|
|
{
|
|
#if V3D_VERSION >= 41
|
|
int i;
|
|
|
|
for (i = 0; i < ncounters; i++)
|
|
values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i));
|
|
|
|
V3D_WRITE(V3D_PCTR_0_EN, 0);
|
|
#endif
|
|
}
|
|
|
|
#endif /* USE_V3D_SIMULATOR */
|