mesa/src/freedreno/common/freedreno_devices.py

404 lines
12 KiB
Python

#
# Copyright © 2021 Google, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
from mako.template import Template
import sys
def max_bitfield_val(high, low, shift):
return ((1 << (high - low)) - 1) << shift
class State(object):
def __init__(self):
# List of unique device-info structs, multiple different GPU ids
# can map to a single info struct in cases where the differences
# are not sw visible, or the only differences are parameters
# queried from the kernel (like GMEM size)
self.gpu_infos = []
# Table mapping GPU id to device-info struct
self.gpus = {}
def info_index(self, gpu_info):
i = 0
for info in self.gpu_infos:
if gpu_info == info:
return i
i += 1
raise Error("invalid info")
s = State()
def add_gpus(ids, info):
for id in ids:
s.gpus[id] = info
class GPUId(object):
def __init__(self, gpu_id = None, chip_id = None, name=None):
if chip_id == None:
assert(gpu_id != None)
val = gpu_id
core = int(val / 100)
val -= (core * 100);
major = int(val / 10);
val -= (major * 10)
minor = val
chip_id = (core << 24) | (major << 16) | (minor << 8) | 0xff
self.chip_id = chip_id
if gpu_id == None:
gpu_id = 0
self.gpu_id = gpu_id
if name == None:
assert(gpu_id != 0)
name = "FD%d" % gpu_id
self.name = name
class Struct(object):
"""A helper class that stringifies itself to a 'C' struct initializer
"""
def __str__(self):
s = "{"
for name, value in vars(self).items():
s += "." + name + "=" + str(value) + ","
return s + "}"
class GPUInfo(Struct):
"""Base class for any generation of adreno, consists of GMEM layout
related parameters
Note that tile_max_h is normally only constrained by corresponding
bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h
tends to have lower limits, in which case a comment will describe
the bitfield size/shift
"""
def __init__(self, gmem_align_w, gmem_align_h,
tile_align_w, tile_align_h,
tile_max_w, tile_max_h, num_vsc_pipes):
self.gmem_align_w = gmem_align_w
self.gmem_align_h = gmem_align_h
self.tile_align_w = tile_align_w
self.tile_align_h = tile_align_h
self.tile_max_w = tile_max_w
self.tile_max_h = tile_max_h
self.num_vsc_pipes = num_vsc_pipes
s.gpu_infos.append(self)
class A6xxGPUInfo(GPUInfo):
"""The a6xx generation has a lot more parameters, and is broken down
into distinct sub-generations. The template parameter avoids
duplication of parameters that are unique to the sub-generation.
"""
def __init__(self, template, num_sp_cores, num_ccu,
RB_UNKNOWN_8E04_blit, PC_POWER_CNTL):
super().__init__(gmem_align_w = 16, gmem_align_h = 4,
tile_align_w = 32, tile_align_h = 32,
tile_max_w = 1024, # max_bitfield_val(5, 0, 5)
tile_max_h = max_bitfield_val(14, 8, 4),
num_vsc_pipes = 32)
assert(num_sp_cores == num_ccu)
self.num_sp_cores = num_sp_cores
# 96 tile alignment seems correlated to 3 CCU
if num_ccu == 3:
self.tile_align_w = 96
self.a6xx = Struct()
self.a6xx.magic = Struct()
for name, val in template["magic"].items():
setattr(self.a6xx.magic, name, val)
# Various "magic" register values:
self.a6xx.magic.RB_UNKNOWN_8E04_blit = RB_UNKNOWN_8E04_blit
self.a6xx.magic.PC_POWER_CNTL = PC_POWER_CNTL
# Things that earlier gens have and later gens remove, provide
# defaults here and let them be overridden by sub-gen template:
self.a6xx.has_cp_reg_write = True
self.a6xx.has_8bpp_ubwc = True
for name, val in template.items():
if name == "magic": # handled above
continue
setattr(self.a6xx, name, val)
# a2xx is really two sub-generations, a20x and a22x, but we don't currently
# capture that in the device-info tables
add_gpus([
GPUId(200),
GPUId(201),
GPUId(205),
GPUId(220),
], GPUInfo(
gmem_align_w = 32, gmem_align_h = 32,
tile_align_w = 32, tile_align_h = 32,
tile_max_w = 512,
tile_max_h = ~0, # TODO
num_vsc_pipes = 8,
))
add_gpus([
GPUId(305),
GPUId(307),
GPUId(320),
GPUId(330),
], GPUInfo(
gmem_align_w = 32, gmem_align_h = 32,
tile_align_w = 32, tile_align_h = 32,
tile_max_w = 992, # max_bitfield_val(4, 0, 5)
tile_max_h = max_bitfield_val(9, 5, 5),
num_vsc_pipes = 8,
))
add_gpus([
GPUId(405),
GPUId(420),
GPUId(430),
], GPUInfo(
gmem_align_w = 32, gmem_align_h = 32,
tile_align_w = 32, tile_align_h = 32,
tile_max_w = 1024, # max_bitfield_val(4, 0, 5)
tile_max_h = max_bitfield_val(9, 5, 5),
num_vsc_pipes = 8,
))
add_gpus([
GPUId(508),
GPUId(509),
GPUId(510),
GPUId(512),
GPUId(530),
GPUId(540),
], GPUInfo(
gmem_align_w = 64, gmem_align_h = 32,
tile_align_w = 64, tile_align_h = 32,
tile_max_w = 1024, # max_bitfield_val(7, 0, 5)
tile_max_h = max_bitfield_val(16, 9, 5),
num_vsc_pipes = 16,
))
# a6xx can be divided into distinct sub-generations, where certain device-
# info parameters are keyed to the sub-generation. These templates reduce
# the copypaste
# a615, a616, a618, a619, a620 and a630:
a6xx_gen1 = dict(
fibers_per_sp = 128 * 16,
reg_size_vec4 = 96,
instr_cache_size = 64,
concurrent_resolve = True,
indirect_draw_wfm_quirk = True,
depth_bounds_require_depth_test_quirk = True,
magic = dict(
TPL1_DBG_ECO_CNTL = 0x100000,
)
)
# a640, a680:
a6xx_gen2 = dict(
fibers_per_sp = 128 * 4 * 16,
reg_size_vec4 = 96,
instr_cache_size = 64, # TODO
supports_multiview_mask = True,
has_z24uint_s8uint = True,
indirect_draw_wfm_quirk = True,
depth_bounds_require_depth_test_quirk = True, # TODO: check if true
has_dp2acc = False, # TODO: check if true
magic = dict(
TPL1_DBG_ECO_CNTL = 0,
),
)
# a650:
a6xx_gen3 = dict(
fibers_per_sp = 128 * 2 * 16,
reg_size_vec4 = 64,
# Blob limits it to 128 but we hang with 128
instr_cache_size = 127,
supports_multiview_mask = True,
has_z24uint_s8uint = True,
tess_use_shared = True,
storage_16bit = True,
has_tex_filter_cubic = True,
has_sample_locations = True,
has_ccu_flush_bug = True,
has_8bpp_ubwc = False,
has_dp2acc = True,
has_lrz_dir_tracking = True,
enable_lrz_fast_clear = True,
lrz_track_quirk = True,
magic = dict(
# this seems to be a chicken bit that fixes cubic filtering:
TPL1_DBG_ECO_CNTL = 0x1000000,
),
)
# a635, a660:
a6xx_gen4 = dict(
fibers_per_sp = 128 * 2 * 16,
reg_size_vec4 = 64,
# Blob limits it to 128 but we hang with 128
instr_cache_size = 127,
supports_multiview_mask = True,
has_z24uint_s8uint = True,
tess_use_shared = True,
storage_16bit = True,
has_tex_filter_cubic = True,
has_sample_locations = True,
has_ccu_flush_bug = True,
has_cp_reg_write = False,
has_8bpp_ubwc = False,
has_lpac = True,
has_shading_rate = True,
has_getfiberid = True,
has_dp2acc = True,
has_dp4acc = True,
enable_lrz_fast_clear = True,
has_lrz_dir_tracking = True,
magic = dict(
TPL1_DBG_ECO_CNTL = 0x5008000,
),
)
add_gpus([
GPUId(615),
GPUId(616),
GPUId(618),
GPUId(619),
GPUId(620),
], A6xxGPUInfo(
a6xx_gen1,
num_sp_cores = 1,
num_ccu = 1,
RB_UNKNOWN_8E04_blit = 0x00100000,
PC_POWER_CNTL = 0,
))
add_gpus([
GPUId(630),
], A6xxGPUInfo(
a6xx_gen1,
num_sp_cores = 2,
num_ccu = 2,
RB_UNKNOWN_8E04_blit = 0x01000000,
PC_POWER_CNTL = 1,
))
add_gpus([
GPUId(640),
], A6xxGPUInfo(
a6xx_gen2,
num_sp_cores = 2,
num_ccu = 2,
RB_UNKNOWN_8E04_blit = 0x00100000,
PC_POWER_CNTL = 1,
))
add_gpus([
GPUId(680),
], A6xxGPUInfo(
a6xx_gen2,
num_sp_cores = 4,
num_ccu = 4,
RB_UNKNOWN_8E04_blit = 0x04100000,
PC_POWER_CNTL = 3,
))
add_gpus([
GPUId(650),
], A6xxGPUInfo(
a6xx_gen3,
num_sp_cores = 3,
num_ccu = 3,
RB_UNKNOWN_8E04_blit = 0x04100000,
PC_POWER_CNTL = 2,
))
add_gpus([
GPUId(chip_id=0x00be06030500, name="Adreno 8c Gen 3"),
GPUId(chip_id=0x007506030500, name="Adreno 7c+ Gen 3"),
GPUId(chip_id=0x006006030500, name="Adreno 7c+ Gen 3 Lite"),
# fallback wildcard entry should be last:
GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"),
], A6xxGPUInfo(
a6xx_gen4,
num_sp_cores = 2,
num_ccu = 2,
RB_UNKNOWN_8E04_blit = 0x00100000,
PC_POWER_CNTL = 1,
))
add_gpus([
GPUId(660),
], A6xxGPUInfo(
a6xx_gen4,
num_sp_cores = 3,
num_ccu = 3,
RB_UNKNOWN_8E04_blit = 0x04100000,
PC_POWER_CNTL = 2,
))
template = """\
/* Copyright (C) 2021 Google, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "freedreno_dev_info.h"
/* Map python to C: */
#define True true
#define False false
%for info in s.gpu_infos:
static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)};
%endfor
static const struct fd_dev_rec fd_dev_recs[] = {
%for id, info in s.gpus.items():
{ {${id.gpu_id}, ${hex(id.chip_id)}}, "${id.name}", &__info${s.info_index(info)} },
%endfor
};
"""
print(Template(template).render(s=s))