freedreno: Generate device-info tables at build time

This way we can make the tables const.  At the same time, for a6xx, this
introduces a "sub-generation template" to reduce the copy/paste for
parameters which are keyed to the sub-generation.  It also explicitly
lists every supported GPU, to get rid of duplicate lists of supported
gpus between the device-info and drivers.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11790>
This commit is contained in:
Rob Clark 2021-07-07 14:47:02 -07:00 committed by Marge Bot
parent 0eda0188aa
commit 78c8a8af80
11 changed files with 338 additions and 134 deletions

View File

@ -25,106 +25,16 @@
#include "freedreno_dev_info.h"
#include "util/macros.h"
static inline unsigned
max_bitfield_val(unsigned high, unsigned low, unsigned shift)
{
return BITFIELD_MASK(high - low) << shift;
}
extern const struct fd_dev_id fd_dev_ids[];
extern const unsigned fd_dev_ids_count;
void
fd_dev_info_init(struct fd_dev_info *info, uint32_t gpu_id)
const struct fd_dev_info *
fd_dev_info(uint32_t gpu_id)
{
if (gpu_id >= 600) {
info->gmem_align_w = 16;
info->gmem_align_h = 4;
info->tile_align_w = gpu_id == 650 ? 96 : 32;
info->tile_align_h = 32;
/* based on GRAS_BIN_CONTROL: */
info->tile_max_w = 1024; /* max_bitfield_val(5, 0, 5) */
info->tile_max_h = max_bitfield_val(14, 8, 4);
info->num_vsc_pipes = 32;
switch (gpu_id) {
case 615:
case 618:
info->num_sp_cores = 1;
info->fibers_per_sp = 128 * 16;
info->a6xx.ccu_cntl_gmem_unk2 = true;
info->a6xx.supports_multiview_mask = false;
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000;
info->a6xx.magic.PC_UNKNOWN_9805 = 0;
info->a6xx.magic.SP_UNKNOWN_A0F8 = 0;
break;
case 630:
info->num_sp_cores = 2;
info->fibers_per_sp = 128 * 16;
info->a6xx.ccu_cntl_gmem_unk2 = true;
info->a6xx.supports_multiview_mask = false;
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x01000000;
info->a6xx.magic.PC_UNKNOWN_9805 = 1;
info->a6xx.magic.SP_UNKNOWN_A0F8 = 1;
break;
case 640:
info->num_sp_cores = 2;
/* The wavefront ID returned by the getwid instruction has a
* maximum of 3 * 10 - 1, or so it seems. However the swizzled
* index used in the mem offset calcuation is
* "(wid / 3) | ((wid % 3) << 4)", so that the actual max is
* around 3 * 16. Furthermore, with the per-fiber layout, the HW
* swizzles the wavefront index and fiber index itself, and it
* pads the number of wavefronts to 4 * 16 to make the swizzling
* simpler, so we have to bump the number of wavefronts to 4 * 16
* for the per-fiber layout. We could theoretically reduce it for
* the per-wave layout though.
*/
info->fibers_per_sp = 128 * 4 * 16;
info->a6xx.supports_multiview_mask = true;
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000;
info->a6xx.magic.PC_UNKNOWN_9805 = 1;
info->a6xx.magic.SP_UNKNOWN_A0F8 = 1;
info->a6xx.has_z24uint_s8uint = true;
break;
case 650:
info->num_sp_cores = 3;
info->fibers_per_sp = 128 * 2 * 16;
info->a6xx.supports_multiview_mask = true;
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x04100000;
info->a6xx.magic.PC_UNKNOWN_9805 = 2;
info->a6xx.magic.SP_UNKNOWN_A0F8 = 2;
info->a6xx.has_z24uint_s8uint = true;
break;
default:
/* Drivers should be doing their own version filtering, so we
* should never get here.
*/
unreachable("missing a6xx config");
for (int i = 0; i < fd_dev_ids_count; i++) {
if (gpu_id == fd_dev_ids[i].gpu_id) {
return fd_dev_ids[i].info;
}
} else if (gpu_id >= 500) {
info->gmem_align_w = info->tile_align_w = 64;
info->gmem_align_h = info->tile_align_h = 32;
/* based on VSC_BIN_SIZE: */
info->tile_max_w = 1024; /* max_bitfield_val(7, 0, 5) */
info->tile_max_h = max_bitfield_val(16, 9, 5);
info->num_vsc_pipes = 16;
} else if (gpu_id >= 400) {
info->gmem_align_w = info->tile_align_w = 32;
info->gmem_align_h = info->tile_align_h = 32;
/* based on VSC_BIN_SIZE: */
info->tile_max_w = 1024; /* max_bitfield_val(4, 0, 5) */
info->tile_max_h = max_bitfield_val(9, 5, 5);
info->num_vsc_pipes = 8;
} else if (gpu_id >= 300) {
info->gmem_align_w = info->tile_align_w = 32;
info->gmem_align_h = info->tile_align_h = 32;
/* based on VSC_BIN_SIZE: */
info->tile_max_w = 992; /* max_bitfield_val(4, 0, 5) */
info->tile_max_h = max_bitfield_val(9, 5, 5);
info->num_vsc_pipes = 8;
} else {
info->gmem_align_w = info->tile_align_w = 32;
info->gmem_align_h = info->tile_align_h = 32;
info->tile_max_w = 512;
info->tile_max_h = ~0; /* TODO */
info->num_vsc_pipes = 8;
}
return NULL;
}

View File

@ -51,11 +51,12 @@ struct fd_dev_info {
uint32_t num_sp_cores;
uint32_t num_ccu;
};
/* Information for private memory calculations */
uint32_t fibers_per_sp;
union {
struct {
/* Information for private memory calculations */
uint32_t fibers_per_sp;
/* Whether the PC_MULTIVIEW_MASK register exists. */
bool supports_multiview_mask;
@ -72,6 +73,11 @@ struct fd_dev_info {
};
};
struct fd_dev_id {
uint32_t gpu_id;
const struct fd_dev_info *info;
};
/* per CCU GMEM amount reserved for depth cache for direct rendering */
#define A6XX_CCU_DEPTH_SIZE (64 * 1024)
/* per CCU GMEM amount reserved for color cache used by GMEM resolves
@ -83,7 +89,7 @@ struct fd_dev_info {
*/
#define A6XX_CCU_GMEM_COLOR_SIZE (16 * 1024)
void fd_dev_info_init(struct fd_dev_info *info, uint32_t gpu_id);
const struct fd_dev_info * fd_dev_info(uint32_t gpu_id);
#ifdef __cplusplus
} /* end of extern "C" */

View File

@ -0,0 +1,281 @@
#
# Copyright © 2021 Google, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
from mako.template import Template
import sys
def max_bitfield_val(high, low, shift):
return ((1 << (high - low)) - 1) << shift
class State(object):
def __init__(self):
# List of unique device-info structs, multiple different GPU ids
# can map to a single info struct in cases where the differences
# are not sw visible, or the only differences are parameters
# queried from the kernel (like GMEM size)
self.gpu_infos = []
# Table mapping GPU id to device-info struct
self.gpus = {}
def info_index(self, gpu_info):
i = 0
for info in self.gpu_infos:
if gpu_info == info:
return i
i += 1
raise Error("invalid info")
s = State()
def add_gpus(ids, info):
for id in ids:
s.gpus[id] = info
class Struct(object):
"""A helper class that stringifies itself to a 'C' struct initializer
"""
def __str__(self):
s = "{"
for name, value in vars(self).items():
s += "." + name + "=" + str(value) + ","
return s + "}"
class GPUInfo(Struct):
"""Base class for any generation of adreno, consists of GMEM layout
related parameters
Note that tile_max_h is normally only constrained by corresponding
bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h
tends to have lower limits, in which case a comment will describe
the bitfield size/shift
"""
def __init__(self, gmem_align_w, gmem_align_h,
tile_align_w, tile_align_h,
tile_max_w, tile_max_h, num_vsc_pipes):
self.gmem_align_w = gmem_align_w
self.gmem_align_h = gmem_align_h
self.tile_align_w = tile_align_w
self.tile_align_h = tile_align_h
self.tile_max_w = tile_max_w
self.tile_max_h = tile_max_h
self.num_vsc_pipes = num_vsc_pipes
s.gpu_infos.append(self)
class A6xxGPUInfo(GPUInfo):
"""The a6xx generation has a lot more parameters, and is broken down
into distinct sub-generations. The template parameter avoids
duplication of parameters that are unique to the sub-generation.
"""
def __init__(self, template, num_sp_cores, num_ccu,
RB_UNKNOWN_8E04_blit, PC_UNKNOWN_9805,
SP_UNKNOWN_A0F8):
super().__init__(gmem_align_w = 16, gmem_align_h = 4,
tile_align_w = 32, tile_align_h = 32,
tile_max_w = 1024, # max_bitfield_val(5, 0, 5)
tile_max_h = max_bitfield_val(14, 8, 4),
num_vsc_pipes = 32)
assert(num_sp_cores == num_ccu)
self.num_sp_cores = num_sp_cores
# 96 tile alignment seems correlated to 3 CCU
if num_ccu == 3:
self.tile_align_h = 96
self.a6xx = Struct()
self.a6xx.magic = Struct()
# Various "magic" register values:
self.a6xx.magic.RB_UNKNOWN_8E04_blit = RB_UNKNOWN_8E04_blit
self.a6xx.magic.PC_UNKNOWN_9805 = PC_UNKNOWN_9805
self.a6xx.magic.SP_UNKNOWN_A0F8 = SP_UNKNOWN_A0F8
for name, val in template.items():
setattr(self.a6xx, name, val)
# a2xx is really two sub-generations, a20x and a22x, but we don't currently
# capture that in the device-info tables
add_gpus([
200,
201,
205,
220,
], GPUInfo(
gmem_align_w = 32, gmem_align_h = 32,
tile_align_w = 32, tile_align_h = 32,
tile_max_w = 512,
tile_max_h = ~0, # TODO
num_vsc_pipes = 8,
))
add_gpus([
305,
307,
320,
330,
], GPUInfo(
gmem_align_w = 32, gmem_align_h = 32,
tile_align_w = 32, tile_align_h = 32,
tile_max_w = 992, # max_bitfield_val(4, 0, 5)
tile_max_h = max_bitfield_val(9, 5, 5),
num_vsc_pipes = 8,
))
add_gpus([
405,
420,
430,
], GPUInfo(
gmem_align_w = 32, gmem_align_h = 32,
tile_align_w = 32, tile_align_h = 32,
tile_max_w = 1024, # max_bitfield_val(4, 0, 5)
tile_max_h = max_bitfield_val(9, 5, 5),
num_vsc_pipes = 8,
))
add_gpus([
510,
530,
540,
], GPUInfo(
gmem_align_w = 64, gmem_align_h = 32,
tile_align_w = 64, tile_align_h = 32,
tile_max_w = 1024, # max_bitfield_val(7, 0, 5)
tile_max_h = max_bitfield_val(16, 9, 5),
num_vsc_pipes = 16,
))
# a6xx can be divided into distinct sub-generations, where certain device-
# info parameters are keyed to the sub-generation. These templates reduce
# the copypaste
# a615, a618, a630:
a6xx_gen1 = dict(
fibers_per_sp = 128 * 16,
ccu_cntl_gmem_unk2 = True,
)
# a640, a680:
a6xx_gen2 = dict(
fibers_per_sp = 128 * 4 * 16,
supports_multiview_mask = True,
has_z24uint_s8uint = True,
)
# a650:
a6xx_gen3 = dict(
fibers_per_sp = 128 * 2 * 16,
supports_multiview_mask = True,
has_z24uint_s8uint = True,
)
add_gpus([
615,
618,
], A6xxGPUInfo(
a6xx_gen1,
num_sp_cores = 1,
num_ccu = 1,
RB_UNKNOWN_8E04_blit = 0x00100000,
PC_UNKNOWN_9805 = 0,
SP_UNKNOWN_A0F8 = 0,
))
add_gpus([
630,
], A6xxGPUInfo(
a6xx_gen1,
num_sp_cores = 2,
num_ccu = 2,
RB_UNKNOWN_8E04_blit = 0x01000000,
PC_UNKNOWN_9805 = 1,
SP_UNKNOWN_A0F8 = 1,
))
add_gpus([
640,
], A6xxGPUInfo(
a6xx_gen2,
num_sp_cores = 2,
num_ccu = 2,
RB_UNKNOWN_8E04_blit = 0x00100000,
PC_UNKNOWN_9805 = 1,
SP_UNKNOWN_A0F8 = 1,
))
add_gpus([
650,
], A6xxGPUInfo(
a6xx_gen3,
num_sp_cores = 3,
num_ccu = 3,
RB_UNKNOWN_8E04_blit = 0x04100000,
PC_UNKNOWN_9805 = 2,
SP_UNKNOWN_A0F8 = 2,
))
template = """\
/* Copyright (C) 2021 Google, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "freedreno_dev_info.h"
/* Map python to C: */
#define True true
#define False false
%for info in s.gpu_infos:
static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)};
%endfor
const struct fd_dev_id fd_dev_ids[] = {
%for id, info in s.gpus.items():
{ ${id}, &__info${s.info_index(info)} },
%endfor
};
const unsigned fd_dev_ids_count = ${len(s.gpus)};
"""
print(Template(template).render(s=s))

View File

@ -18,6 +18,14 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
freedreno_devices_c = custom_target(
'freedreno_devices.c',
input: 'freedreno_devices.py',
output: 'freedreno_devices.c',
command: [prog_python, '@INPUT@'],
capture: true,
)
libfreedreno_common = static_library(
'freedreno_common',
[
@ -28,6 +36,7 @@ libfreedreno_common = static_library(
'freedreno_uuid.c',
'freedreno_uuid.h',
'freedreno_guardband.h',
freedreno_devices_c,
sha1_h,
],
include_directories : [inc_freedreno, inc_include, inc_src, inc_gallium],

View File

@ -78,7 +78,7 @@ FreedrenoDriver::setup_a6xx_counters()
);
counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info.num_sp_cores);
return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info->num_sp_cores);
}
);
@ -177,7 +177,7 @@ FreedrenoDriver::init_perfcnt()
for (auto countable : countables)
countable.resolve();
fd_dev_info_init(&info, gpu_id);
info = fd_dev_info(gpu_id);
io = fd_dt_find_io();
if (!io) {

View File

@ -42,7 +42,7 @@ private:
bool has_suspend_count;
uint32_t suspend_count;
struct fd_dev_info info;
const struct fd_dev_info *info;
/**
* The memory mapped i/o space for counter readback:

View File

@ -195,13 +195,16 @@ tu_physical_device_init(struct tu_physical_device *device,
memset(device->name, 0, sizeof(device->name));
sprintf(device->name, "FD%d", device->gpu_id);
switch (device->gpu_id) {
case 615:
case 618:
case 630:
case 640:
case 650:
fd_dev_info_init(&device->info, device->gpu_id);
const struct fd_dev_info *info = fd_dev_info(device->gpu_id);
if (!info) {
result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"device %s is unsupported", device->name);
return result;
}
switch (device->gpu_id / 100) {
case 6:
// TODO convert to pointer:
device->info = *info;
device->ccu_offset_bypass = device->info.num_ccu * A6XX_CCU_DEPTH_SIZE;
device->ccu_offset_gmem = (device->gmem_size -
device->info.num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);

View File

@ -2064,7 +2064,7 @@ calc_pvtmem_size(struct tu_device *dev, struct tu_pvtmem_config *config,
{
uint32_t per_fiber_size = ALIGN(pvtmem_bytes, 512);
uint32_t per_sp_size =
ALIGN(per_fiber_size * dev->physical_device->info.fibers_per_sp, 1 << 12);
ALIGN(per_fiber_size * dev->physical_device->info.a6xx.fibers_per_sp, 1 << 12);
if (config) {
config->per_fiber_size = per_fiber_size;

View File

@ -103,7 +103,7 @@ fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring,
fd_emit_string5(ring, name, strlen(name));
#endif
uint32_t fibers_per_sp = ctx->screen->info.fibers_per_sp;
uint32_t fibers_per_sp = ctx->screen->info.a6xx.fibers_per_sp;
uint32_t num_sp_cores = ctx->screen->info.num_sp_cores;
uint32_t per_fiber_size = ALIGN(so->pvtmem_size, 512);

View File

@ -1020,6 +1020,12 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro)
DBG(" Chip-id: 0x%08x", screen->chip_id);
DBG(" GMEM size: 0x%08x", screen->gmemsize_bytes);
const struct fd_dev_info *info = fd_dev_info(screen->gpu_id);
if (!info) {
mesa_loge("unsupported GPU: a%03d", screen->gpu_id);
goto fail;
}
/* explicitly checking for GPU revisions that are known to work. This
* may be overly conservative for a3xx, where spoofing the gpu_id with
* the blob driver seems to generate identical cmdstream dumps. But
@ -1031,33 +1037,20 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro)
* of the cases below and see what happens. And if it works, please
* send a patch ;-)
*/
switch (screen->gpu_id) {
case 200:
case 201:
case 205:
case 220:
switch (screen->gpu_id / 100) {
case 2:
fd2_screen_init(pscreen);
break;
case 305:
case 307:
case 320:
case 330:
case 3:
fd3_screen_init(pscreen);
break;
case 405:
case 420:
case 430:
case 4:
fd4_screen_init(pscreen);
break;
case 510:
case 530:
case 540:
case 5:
fd5_screen_init(pscreen);
break;
case 618:
case 630:
case 640:
case 650:
case 6:
fd6_screen_init(pscreen);
break;
default:
@ -1065,7 +1058,8 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro)
goto fail;
}
fd_dev_info_init(&screen->info, screen->gpu_id);
// TODO change to pointer:
screen->info = *info;
if (is_a6xx(screen)) {
screen->ccu_offset_bypass = screen->info.num_ccu * A6XX_CCU_DEPTH_SIZE;

View File

@ -167,7 +167,8 @@ main(int argc, char **argv)
.gmemsize_bytes = gpu_info->gmemsize_bytes,
};
fd_dev_info_init(&screen.info, gpu_info->gpu_id);
// TODO change to pointer:
screen.info = *fd_dev_info(gpu_info->gpu_id);
/* And finally run thru all the GMEM keys: */
for (int i = 0; i < ARRAY_SIZE(keys); i++) {