freedreno: Generate device-info tables at build time
This way we can make the tables const. At the same time, for a6xx, this introduces a "sub-generation template" to reduce the copy/paste for parameters which are keyed to the sub-generation. It also explicitly lists every supported GPU, to get rid of duplicate lists of supported gpus between the device-info and drivers. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11790>
This commit is contained in:
parent
0eda0188aa
commit
78c8a8af80
|
@ -25,106 +25,16 @@
|
|||
#include "freedreno_dev_info.h"
|
||||
#include "util/macros.h"
|
||||
|
||||
static inline unsigned
|
||||
max_bitfield_val(unsigned high, unsigned low, unsigned shift)
|
||||
{
|
||||
return BITFIELD_MASK(high - low) << shift;
|
||||
}
|
||||
extern const struct fd_dev_id fd_dev_ids[];
|
||||
extern const unsigned fd_dev_ids_count;
|
||||
|
||||
void
|
||||
fd_dev_info_init(struct fd_dev_info *info, uint32_t gpu_id)
|
||||
const struct fd_dev_info *
|
||||
fd_dev_info(uint32_t gpu_id)
|
||||
{
|
||||
if (gpu_id >= 600) {
|
||||
info->gmem_align_w = 16;
|
||||
info->gmem_align_h = 4;
|
||||
info->tile_align_w = gpu_id == 650 ? 96 : 32;
|
||||
info->tile_align_h = 32;
|
||||
/* based on GRAS_BIN_CONTROL: */
|
||||
info->tile_max_w = 1024; /* max_bitfield_val(5, 0, 5) */
|
||||
info->tile_max_h = max_bitfield_val(14, 8, 4);
|
||||
info->num_vsc_pipes = 32;
|
||||
|
||||
switch (gpu_id) {
|
||||
case 615:
|
||||
case 618:
|
||||
info->num_sp_cores = 1;
|
||||
info->fibers_per_sp = 128 * 16;
|
||||
info->a6xx.ccu_cntl_gmem_unk2 = true;
|
||||
info->a6xx.supports_multiview_mask = false;
|
||||
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000;
|
||||
info->a6xx.magic.PC_UNKNOWN_9805 = 0;
|
||||
info->a6xx.magic.SP_UNKNOWN_A0F8 = 0;
|
||||
break;
|
||||
case 630:
|
||||
info->num_sp_cores = 2;
|
||||
info->fibers_per_sp = 128 * 16;
|
||||
info->a6xx.ccu_cntl_gmem_unk2 = true;
|
||||
info->a6xx.supports_multiview_mask = false;
|
||||
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x01000000;
|
||||
info->a6xx.magic.PC_UNKNOWN_9805 = 1;
|
||||
info->a6xx.magic.SP_UNKNOWN_A0F8 = 1;
|
||||
break;
|
||||
case 640:
|
||||
info->num_sp_cores = 2;
|
||||
/* The wavefront ID returned by the getwid instruction has a
|
||||
* maximum of 3 * 10 - 1, or so it seems. However the swizzled
|
||||
* index used in the mem offset calcuation is
|
||||
* "(wid / 3) | ((wid % 3) << 4)", so that the actual max is
|
||||
* around 3 * 16. Furthermore, with the per-fiber layout, the HW
|
||||
* swizzles the wavefront index and fiber index itself, and it
|
||||
* pads the number of wavefronts to 4 * 16 to make the swizzling
|
||||
* simpler, so we have to bump the number of wavefronts to 4 * 16
|
||||
* for the per-fiber layout. We could theoretically reduce it for
|
||||
* the per-wave layout though.
|
||||
*/
|
||||
info->fibers_per_sp = 128 * 4 * 16;
|
||||
info->a6xx.supports_multiview_mask = true;
|
||||
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000;
|
||||
info->a6xx.magic.PC_UNKNOWN_9805 = 1;
|
||||
info->a6xx.magic.SP_UNKNOWN_A0F8 = 1;
|
||||
info->a6xx.has_z24uint_s8uint = true;
|
||||
break;
|
||||
case 650:
|
||||
info->num_sp_cores = 3;
|
||||
info->fibers_per_sp = 128 * 2 * 16;
|
||||
info->a6xx.supports_multiview_mask = true;
|
||||
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x04100000;
|
||||
info->a6xx.magic.PC_UNKNOWN_9805 = 2;
|
||||
info->a6xx.magic.SP_UNKNOWN_A0F8 = 2;
|
||||
info->a6xx.has_z24uint_s8uint = true;
|
||||
break;
|
||||
default:
|
||||
/* Drivers should be doing their own version filtering, so we
|
||||
* should never get here.
|
||||
*/
|
||||
unreachable("missing a6xx config");
|
||||
for (int i = 0; i < fd_dev_ids_count; i++) {
|
||||
if (gpu_id == fd_dev_ids[i].gpu_id) {
|
||||
return fd_dev_ids[i].info;
|
||||
}
|
||||
} else if (gpu_id >= 500) {
|
||||
info->gmem_align_w = info->tile_align_w = 64;
|
||||
info->gmem_align_h = info->tile_align_h = 32;
|
||||
/* based on VSC_BIN_SIZE: */
|
||||
info->tile_max_w = 1024; /* max_bitfield_val(7, 0, 5) */
|
||||
info->tile_max_h = max_bitfield_val(16, 9, 5);
|
||||
info->num_vsc_pipes = 16;
|
||||
} else if (gpu_id >= 400) {
|
||||
info->gmem_align_w = info->tile_align_w = 32;
|
||||
info->gmem_align_h = info->tile_align_h = 32;
|
||||
/* based on VSC_BIN_SIZE: */
|
||||
info->tile_max_w = 1024; /* max_bitfield_val(4, 0, 5) */
|
||||
info->tile_max_h = max_bitfield_val(9, 5, 5);
|
||||
info->num_vsc_pipes = 8;
|
||||
} else if (gpu_id >= 300) {
|
||||
info->gmem_align_w = info->tile_align_w = 32;
|
||||
info->gmem_align_h = info->tile_align_h = 32;
|
||||
/* based on VSC_BIN_SIZE: */
|
||||
info->tile_max_w = 992; /* max_bitfield_val(4, 0, 5) */
|
||||
info->tile_max_h = max_bitfield_val(9, 5, 5);
|
||||
info->num_vsc_pipes = 8;
|
||||
} else {
|
||||
info->gmem_align_w = info->tile_align_w = 32;
|
||||
info->gmem_align_h = info->tile_align_h = 32;
|
||||
info->tile_max_w = 512;
|
||||
info->tile_max_h = ~0; /* TODO */
|
||||
info->num_vsc_pipes = 8;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -51,11 +51,12 @@ struct fd_dev_info {
|
|||
uint32_t num_sp_cores;
|
||||
uint32_t num_ccu;
|
||||
};
|
||||
/* Information for private memory calculations */
|
||||
uint32_t fibers_per_sp;
|
||||
|
||||
union {
|
||||
struct {
|
||||
/* Information for private memory calculations */
|
||||
uint32_t fibers_per_sp;
|
||||
|
||||
/* Whether the PC_MULTIVIEW_MASK register exists. */
|
||||
bool supports_multiview_mask;
|
||||
|
||||
|
@ -72,6 +73,11 @@ struct fd_dev_info {
|
|||
};
|
||||
};
|
||||
|
||||
struct fd_dev_id {
|
||||
uint32_t gpu_id;
|
||||
const struct fd_dev_info *info;
|
||||
};
|
||||
|
||||
/* per CCU GMEM amount reserved for depth cache for direct rendering */
|
||||
#define A6XX_CCU_DEPTH_SIZE (64 * 1024)
|
||||
/* per CCU GMEM amount reserved for color cache used by GMEM resolves
|
||||
|
@ -83,7 +89,7 @@ struct fd_dev_info {
|
|||
*/
|
||||
#define A6XX_CCU_GMEM_COLOR_SIZE (16 * 1024)
|
||||
|
||||
void fd_dev_info_init(struct fd_dev_info *info, uint32_t gpu_id);
|
||||
const struct fd_dev_info * fd_dev_info(uint32_t gpu_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* end of extern "C" */
|
||||
|
|
|
@ -0,0 +1,281 @@
|
|||
#
|
||||
# Copyright © 2021 Google, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
from mako.template import Template
|
||||
import sys
|
||||
|
||||
def max_bitfield_val(high, low, shift):
|
||||
return ((1 << (high - low)) - 1) << shift
|
||||
|
||||
class State(object):
|
||||
def __init__(self):
|
||||
# List of unique device-info structs, multiple different GPU ids
|
||||
# can map to a single info struct in cases where the differences
|
||||
# are not sw visible, or the only differences are parameters
|
||||
# queried from the kernel (like GMEM size)
|
||||
self.gpu_infos = []
|
||||
|
||||
# Table mapping GPU id to device-info struct
|
||||
self.gpus = {}
|
||||
|
||||
def info_index(self, gpu_info):
|
||||
i = 0
|
||||
for info in self.gpu_infos:
|
||||
if gpu_info == info:
|
||||
return i
|
||||
i += 1
|
||||
raise Error("invalid info")
|
||||
|
||||
s = State()
|
||||
|
||||
def add_gpus(ids, info):
|
||||
for id in ids:
|
||||
s.gpus[id] = info
|
||||
|
||||
class Struct(object):
|
||||
"""A helper class that stringifies itself to a 'C' struct initializer
|
||||
"""
|
||||
def __str__(self):
|
||||
s = "{"
|
||||
for name, value in vars(self).items():
|
||||
s += "." + name + "=" + str(value) + ","
|
||||
return s + "}"
|
||||
|
||||
class GPUInfo(Struct):
|
||||
"""Base class for any generation of adreno, consists of GMEM layout
|
||||
related parameters
|
||||
|
||||
Note that tile_max_h is normally only constrained by corresponding
|
||||
bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h
|
||||
tends to have lower limits, in which case a comment will describe
|
||||
the bitfield size/shift
|
||||
"""
|
||||
def __init__(self, gmem_align_w, gmem_align_h,
|
||||
tile_align_w, tile_align_h,
|
||||
tile_max_w, tile_max_h, num_vsc_pipes):
|
||||
self.gmem_align_w = gmem_align_w
|
||||
self.gmem_align_h = gmem_align_h
|
||||
self.tile_align_w = tile_align_w
|
||||
self.tile_align_h = tile_align_h
|
||||
self.tile_max_w = tile_max_w
|
||||
self.tile_max_h = tile_max_h
|
||||
self.num_vsc_pipes = num_vsc_pipes
|
||||
|
||||
s.gpu_infos.append(self)
|
||||
|
||||
|
||||
class A6xxGPUInfo(GPUInfo):
|
||||
"""The a6xx generation has a lot more parameters, and is broken down
|
||||
into distinct sub-generations. The template parameter avoids
|
||||
duplication of parameters that are unique to the sub-generation.
|
||||
"""
|
||||
def __init__(self, template, num_sp_cores, num_ccu,
|
||||
RB_UNKNOWN_8E04_blit, PC_UNKNOWN_9805,
|
||||
SP_UNKNOWN_A0F8):
|
||||
super().__init__(gmem_align_w = 16, gmem_align_h = 4,
|
||||
tile_align_w = 32, tile_align_h = 32,
|
||||
tile_max_w = 1024, # max_bitfield_val(5, 0, 5)
|
||||
tile_max_h = max_bitfield_val(14, 8, 4),
|
||||
num_vsc_pipes = 32)
|
||||
assert(num_sp_cores == num_ccu)
|
||||
|
||||
self.num_sp_cores = num_sp_cores
|
||||
|
||||
# 96 tile alignment seems correlated to 3 CCU
|
||||
if num_ccu == 3:
|
||||
self.tile_align_h = 96
|
||||
|
||||
self.a6xx = Struct()
|
||||
self.a6xx.magic = Struct()
|
||||
|
||||
# Various "magic" register values:
|
||||
self.a6xx.magic.RB_UNKNOWN_8E04_blit = RB_UNKNOWN_8E04_blit
|
||||
self.a6xx.magic.PC_UNKNOWN_9805 = PC_UNKNOWN_9805
|
||||
self.a6xx.magic.SP_UNKNOWN_A0F8 = SP_UNKNOWN_A0F8
|
||||
|
||||
for name, val in template.items():
|
||||
setattr(self.a6xx, name, val)
|
||||
|
||||
# a2xx is really two sub-generations, a20x and a22x, but we don't currently
|
||||
# capture that in the device-info tables
|
||||
add_gpus([
|
||||
200,
|
||||
201,
|
||||
205,
|
||||
220,
|
||||
], GPUInfo(
|
||||
gmem_align_w = 32, gmem_align_h = 32,
|
||||
tile_align_w = 32, tile_align_h = 32,
|
||||
tile_max_w = 512,
|
||||
tile_max_h = ~0, # TODO
|
||||
num_vsc_pipes = 8,
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
305,
|
||||
307,
|
||||
320,
|
||||
330,
|
||||
], GPUInfo(
|
||||
gmem_align_w = 32, gmem_align_h = 32,
|
||||
tile_align_w = 32, tile_align_h = 32,
|
||||
tile_max_w = 992, # max_bitfield_val(4, 0, 5)
|
||||
tile_max_h = max_bitfield_val(9, 5, 5),
|
||||
num_vsc_pipes = 8,
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
405,
|
||||
420,
|
||||
430,
|
||||
], GPUInfo(
|
||||
gmem_align_w = 32, gmem_align_h = 32,
|
||||
tile_align_w = 32, tile_align_h = 32,
|
||||
tile_max_w = 1024, # max_bitfield_val(4, 0, 5)
|
||||
tile_max_h = max_bitfield_val(9, 5, 5),
|
||||
num_vsc_pipes = 8,
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
510,
|
||||
530,
|
||||
540,
|
||||
], GPUInfo(
|
||||
gmem_align_w = 64, gmem_align_h = 32,
|
||||
tile_align_w = 64, tile_align_h = 32,
|
||||
tile_max_w = 1024, # max_bitfield_val(7, 0, 5)
|
||||
tile_max_h = max_bitfield_val(16, 9, 5),
|
||||
num_vsc_pipes = 16,
|
||||
))
|
||||
|
||||
# a6xx can be divided into distinct sub-generations, where certain device-
|
||||
# info parameters are keyed to the sub-generation. These templates reduce
|
||||
# the copypaste
|
||||
|
||||
# a615, a618, a630:
|
||||
a6xx_gen1 = dict(
|
||||
fibers_per_sp = 128 * 16,
|
||||
ccu_cntl_gmem_unk2 = True,
|
||||
)
|
||||
|
||||
# a640, a680:
|
||||
a6xx_gen2 = dict(
|
||||
fibers_per_sp = 128 * 4 * 16,
|
||||
supports_multiview_mask = True,
|
||||
has_z24uint_s8uint = True,
|
||||
)
|
||||
|
||||
# a650:
|
||||
a6xx_gen3 = dict(
|
||||
fibers_per_sp = 128 * 2 * 16,
|
||||
supports_multiview_mask = True,
|
||||
has_z24uint_s8uint = True,
|
||||
)
|
||||
|
||||
add_gpus([
|
||||
615,
|
||||
618,
|
||||
], A6xxGPUInfo(
|
||||
a6xx_gen1,
|
||||
num_sp_cores = 1,
|
||||
num_ccu = 1,
|
||||
RB_UNKNOWN_8E04_blit = 0x00100000,
|
||||
PC_UNKNOWN_9805 = 0,
|
||||
SP_UNKNOWN_A0F8 = 0,
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
630,
|
||||
], A6xxGPUInfo(
|
||||
a6xx_gen1,
|
||||
num_sp_cores = 2,
|
||||
num_ccu = 2,
|
||||
RB_UNKNOWN_8E04_blit = 0x01000000,
|
||||
PC_UNKNOWN_9805 = 1,
|
||||
SP_UNKNOWN_A0F8 = 1,
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
640,
|
||||
], A6xxGPUInfo(
|
||||
a6xx_gen2,
|
||||
num_sp_cores = 2,
|
||||
num_ccu = 2,
|
||||
RB_UNKNOWN_8E04_blit = 0x00100000,
|
||||
PC_UNKNOWN_9805 = 1,
|
||||
SP_UNKNOWN_A0F8 = 1,
|
||||
))
|
||||
|
||||
add_gpus([
|
||||
650,
|
||||
], A6xxGPUInfo(
|
||||
a6xx_gen3,
|
||||
num_sp_cores = 3,
|
||||
num_ccu = 3,
|
||||
RB_UNKNOWN_8E04_blit = 0x04100000,
|
||||
PC_UNKNOWN_9805 = 2,
|
||||
SP_UNKNOWN_A0F8 = 2,
|
||||
))
|
||||
|
||||
template = """\
|
||||
/* Copyright (C) 2021 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "freedreno_dev_info.h"
|
||||
|
||||
/* Map python to C: */
|
||||
#define True true
|
||||
#define False false
|
||||
|
||||
%for info in s.gpu_infos:
|
||||
static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)};
|
||||
%endfor
|
||||
|
||||
const struct fd_dev_id fd_dev_ids[] = {
|
||||
%for id, info in s.gpus.items():
|
||||
{ ${id}, &__info${s.info_index(info)} },
|
||||
%endfor
|
||||
};
|
||||
const unsigned fd_dev_ids_count = ${len(s.gpus)};
|
||||
"""
|
||||
|
||||
print(Template(template).render(s=s))
|
||||
|
|
@ -18,6 +18,14 @@
|
|||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
freedreno_devices_c = custom_target(
|
||||
'freedreno_devices.c',
|
||||
input: 'freedreno_devices.py',
|
||||
output: 'freedreno_devices.c',
|
||||
command: [prog_python, '@INPUT@'],
|
||||
capture: true,
|
||||
)
|
||||
|
||||
libfreedreno_common = static_library(
|
||||
'freedreno_common',
|
||||
[
|
||||
|
@ -28,6 +36,7 @@ libfreedreno_common = static_library(
|
|||
'freedreno_uuid.c',
|
||||
'freedreno_uuid.h',
|
||||
'freedreno_guardband.h',
|
||||
freedreno_devices_c,
|
||||
sha1_h,
|
||||
],
|
||||
include_directories : [inc_freedreno, inc_include, inc_src, inc_gallium],
|
||||
|
|
|
@ -78,7 +78,7 @@ FreedrenoDriver::setup_a6xx_counters()
|
|||
);
|
||||
|
||||
counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
|
||||
return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info.num_sp_cores);
|
||||
return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info->num_sp_cores);
|
||||
}
|
||||
);
|
||||
|
||||
|
@ -177,7 +177,7 @@ FreedrenoDriver::init_perfcnt()
|
|||
for (auto countable : countables)
|
||||
countable.resolve();
|
||||
|
||||
fd_dev_info_init(&info, gpu_id);
|
||||
info = fd_dev_info(gpu_id);
|
||||
|
||||
io = fd_dt_find_io();
|
||||
if (!io) {
|
||||
|
|
|
@ -42,7 +42,7 @@ private:
|
|||
bool has_suspend_count;
|
||||
uint32_t suspend_count;
|
||||
|
||||
struct fd_dev_info info;
|
||||
const struct fd_dev_info *info;
|
||||
|
||||
/**
|
||||
* The memory mapped i/o space for counter readback:
|
||||
|
|
|
@ -195,13 +195,16 @@ tu_physical_device_init(struct tu_physical_device *device,
|
|||
memset(device->name, 0, sizeof(device->name));
|
||||
sprintf(device->name, "FD%d", device->gpu_id);
|
||||
|
||||
switch (device->gpu_id) {
|
||||
case 615:
|
||||
case 618:
|
||||
case 630:
|
||||
case 640:
|
||||
case 650:
|
||||
fd_dev_info_init(&device->info, device->gpu_id);
|
||||
const struct fd_dev_info *info = fd_dev_info(device->gpu_id);
|
||||
if (!info) {
|
||||
result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"device %s is unsupported", device->name);
|
||||
return result;
|
||||
}
|
||||
switch (device->gpu_id / 100) {
|
||||
case 6:
|
||||
// TODO convert to pointer:
|
||||
device->info = *info;
|
||||
device->ccu_offset_bypass = device->info.num_ccu * A6XX_CCU_DEPTH_SIZE;
|
||||
device->ccu_offset_gmem = (device->gmem_size -
|
||||
device->info.num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);
|
||||
|
|
|
@ -2064,7 +2064,7 @@ calc_pvtmem_size(struct tu_device *dev, struct tu_pvtmem_config *config,
|
|||
{
|
||||
uint32_t per_fiber_size = ALIGN(pvtmem_bytes, 512);
|
||||
uint32_t per_sp_size =
|
||||
ALIGN(per_fiber_size * dev->physical_device->info.fibers_per_sp, 1 << 12);
|
||||
ALIGN(per_fiber_size * dev->physical_device->info.a6xx.fibers_per_sp, 1 << 12);
|
||||
|
||||
if (config) {
|
||||
config->per_fiber_size = per_fiber_size;
|
||||
|
|
|
@ -103,7 +103,7 @@ fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
fd_emit_string5(ring, name, strlen(name));
|
||||
#endif
|
||||
|
||||
uint32_t fibers_per_sp = ctx->screen->info.fibers_per_sp;
|
||||
uint32_t fibers_per_sp = ctx->screen->info.a6xx.fibers_per_sp;
|
||||
uint32_t num_sp_cores = ctx->screen->info.num_sp_cores;
|
||||
|
||||
uint32_t per_fiber_size = ALIGN(so->pvtmem_size, 512);
|
||||
|
|
|
@ -1020,6 +1020,12 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro)
|
|||
DBG(" Chip-id: 0x%08x", screen->chip_id);
|
||||
DBG(" GMEM size: 0x%08x", screen->gmemsize_bytes);
|
||||
|
||||
const struct fd_dev_info *info = fd_dev_info(screen->gpu_id);
|
||||
if (!info) {
|
||||
mesa_loge("unsupported GPU: a%03d", screen->gpu_id);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* explicitly checking for GPU revisions that are known to work. This
|
||||
* may be overly conservative for a3xx, where spoofing the gpu_id with
|
||||
* the blob driver seems to generate identical cmdstream dumps. But
|
||||
|
@ -1031,33 +1037,20 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro)
|
|||
* of the cases below and see what happens. And if it works, please
|
||||
* send a patch ;-)
|
||||
*/
|
||||
switch (screen->gpu_id) {
|
||||
case 200:
|
||||
case 201:
|
||||
case 205:
|
||||
case 220:
|
||||
switch (screen->gpu_id / 100) {
|
||||
case 2:
|
||||
fd2_screen_init(pscreen);
|
||||
break;
|
||||
case 305:
|
||||
case 307:
|
||||
case 320:
|
||||
case 330:
|
||||
case 3:
|
||||
fd3_screen_init(pscreen);
|
||||
break;
|
||||
case 405:
|
||||
case 420:
|
||||
case 430:
|
||||
case 4:
|
||||
fd4_screen_init(pscreen);
|
||||
break;
|
||||
case 510:
|
||||
case 530:
|
||||
case 540:
|
||||
case 5:
|
||||
fd5_screen_init(pscreen);
|
||||
break;
|
||||
case 618:
|
||||
case 630:
|
||||
case 640:
|
||||
case 650:
|
||||
case 6:
|
||||
fd6_screen_init(pscreen);
|
||||
break;
|
||||
default:
|
||||
|
@ -1065,7 +1058,8 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro)
|
|||
goto fail;
|
||||
}
|
||||
|
||||
fd_dev_info_init(&screen->info, screen->gpu_id);
|
||||
// TODO change to pointer:
|
||||
screen->info = *info;
|
||||
|
||||
if (is_a6xx(screen)) {
|
||||
screen->ccu_offset_bypass = screen->info.num_ccu * A6XX_CCU_DEPTH_SIZE;
|
||||
|
|
|
@ -167,7 +167,8 @@ main(int argc, char **argv)
|
|||
.gmemsize_bytes = gpu_info->gmemsize_bytes,
|
||||
};
|
||||
|
||||
fd_dev_info_init(&screen.info, gpu_info->gpu_id);
|
||||
// TODO change to pointer:
|
||||
screen.info = *fd_dev_info(gpu_info->gpu_id);
|
||||
|
||||
/* And finally run thru all the GMEM keys: */
|
||||
for (int i = 0; i < ARRAY_SIZE(keys); i++) {
|
||||
|
|
Loading…
Reference in New Issue